mobile-debug-mcp 0.21.0 → 0.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { WaitForElementResponse, TapResponse, SwipeResponse, TypeTextResponse, PressBackResponse } from "../types.js"
1
+ import { TapResponse, SwipeResponse, TypeTextResponse, PressBackResponse } from "../types.js"
2
2
  import { execAdb, getAndroidDeviceMetadata, getDeviceInfo } from "../utils/android/utils.js"
3
3
  import { AndroidObserve } from "../observe/index.js"
4
4
  import { scrollToElementShared } from "../utils/ui/index.js"
@@ -7,37 +7,6 @@ import { scrollToElementShared } from "../utils/ui/index.js"
7
7
  export class AndroidInteract {
8
8
  private observe = new AndroidObserve();
9
9
 
10
- async waitForElement(text: string, timeout: number, deviceId?: string): Promise<WaitForElementResponse> {
11
- const metadata = await getAndroidDeviceMetadata("", deviceId)
12
- const deviceInfo = getDeviceInfo(deviceId || 'default', metadata)
13
- const startTime = Date.now();
14
-
15
- while (Date.now() - startTime < timeout) {
16
- try {
17
- const tree = await this.observe.getUITree(deviceId);
18
-
19
- if (tree.error) {
20
- return { device: deviceInfo, found: false, error: tree.error };
21
- }
22
-
23
- const element = tree.elements.find(e => e.text === text);
24
- if (element) {
25
- return { device: deviceInfo, found: true, element };
26
- }
27
- } catch (e) {
28
- // Ignore errors during polling and retry
29
- console.error("Error polling UI tree:", e);
30
- }
31
-
32
- const elapsed = Date.now() - startTime;
33
- const remaining = timeout - elapsed;
34
- if (remaining <= 0) break;
35
-
36
- await new Promise(resolve => setTimeout(resolve, Math.min(500, remaining)));
37
- }
38
- return { device: deviceInfo, found: false };
39
- }
40
-
41
10
  async tap(x: number, y: number, deviceId?: string): Promise<TapResponse> {
42
11
  const metadata = await getAndroidDeviceMetadata("", deviceId)
43
12
  const deviceInfo = getDeviceInfo(deviceId || 'default', metadata)
@@ -39,12 +39,6 @@ export class ToolsInteract {
39
39
  return { interact: interact as any, resolved, platform: effectivePlatform }
40
40
  }
41
41
 
42
- static async waitForElementHandler({ platform, text, timeout, deviceId }: { platform: 'android' | 'ios', text: string, timeout?: number, deviceId?: string }) {
43
- const effectiveTimeout = timeout ?? 10000
44
- const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
45
- return await interact.waitForElement(text, effectiveTimeout, resolved.id)
46
- }
47
-
48
42
  static async tapHandler({ platform, x, y, deviceId }: { platform?: 'android' | 'ios', x: number, y: number, deviceId?: string }) {
49
43
  const { interact, resolved } = await ToolsInteract.getInteractionService(platform, deviceId)
50
44
  return await interact.tap(x, y, resolved.id)
@@ -224,6 +218,11 @@ export class ToolsInteract {
224
218
  return { found: true, element: outEl, score: scoreVal, confidence: scoreVal }
225
219
  }
226
220
 
221
+ static async waitForUIHandler({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
222
+ // Backwards-compatible wrapper that delegates to the core waitForUICore implementation
223
+ return await ToolsInteract.waitForUICore({ type, query, timeoutMs, pollIntervalMs, includeSnapshotOnFailure, match, stability_ms, observationDelayMs, platform, deviceId })
224
+ }
225
+
227
226
  static async waitForScreenChangeHandler({ platform, previousFingerprint, timeoutMs = 5000, pollIntervalMs = 300, deviceId }: { platform?: 'android' | 'ios', previousFingerprint: string, timeoutMs?: number, pollIntervalMs?: number, deviceId?: string }) {
228
227
  const start = Date.now()
229
228
  let lastFingerprint: string | null = null
@@ -261,7 +260,7 @@ export class ToolsInteract {
261
260
  return { success: false, reason: 'timeout', lastFingerprint, elapsedMs: Date.now() - start }
262
261
  }
263
262
 
264
- static async observeUntilHandler({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
263
+ static async waitForUICore({ type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId }: { type?: 'ui' | 'log' | 'screen' | 'idle', query?: string, timeoutMs?: number, pollIntervalMs?: number, includeSnapshotOnFailure?: boolean, match?: 'present'|'absent', stability_ms?: number, observationDelayMs?: number, platform?: 'android' | 'ios', deviceId?: string }) {
265
264
  const start = Date.now()
266
265
  const deadline = start + (timeoutMs || 0)
267
266
  const q = (query === null || query === undefined) ? '' : String(query)
@@ -283,7 +282,7 @@ export class ToolsInteract {
283
282
  baselineLastLine = logsArr.length ? logsArr[logsArr.length - 1] : null
284
283
  }
285
284
  } catch (err) {
286
- try { console.warn('observeUntil: failed to get baseline data (non-fatal):', err instanceof Error ? err.message : String(err)) } catch { }
285
+ try { console.warn('waitForUI: failed to get baseline data (non-fatal):', err instanceof Error ? err.message : String(err)) } catch { }
287
286
  }
288
287
 
289
288
  // Network-based waiting removed. Rely on UI and screen fingerprints for determinism.
@@ -294,7 +293,7 @@ export class ToolsInteract {
294
293
 
295
294
  // Optional initial observation delay requested by caller
296
295
  if (typeof observationDelayMs === 'number' && observationDelayMs > 0) {
297
- try { console.log(`observeUntil: delaying observation for ${observationDelayMs}ms`) } catch { }
296
+ try { console.log(`waitForUI: delaying observation for ${observationDelayMs}ms`) } catch { }
298
297
  await sleep(observationDelayMs)
299
298
  }
300
299
 
@@ -311,40 +310,29 @@ export class ToolsInteract {
311
310
  // Evaluate condition per type
312
311
  if (type === 'ui') {
313
312
  try {
314
- // Lightweight UI check: fetch UI tree and perform a normalized substring match to reduce overhead
315
- try {
316
- // Bound the UI tree fetch to avoid long blocking calls; prefer quick failure over hanging a poll
317
- const withTimeout = (p: Promise<any>, ms: number) => Promise.race([p, new Promise(resolve => setTimeout(()=>resolve(null), ms))])
318
- const tree = await withTimeout(ToolsObserve.getUITreeHandler({ platform, deviceId }), Math.min(pollInterval, 500)) as any
319
- const elems = Array.isArray(tree && tree.elements) ? tree.elements : []
320
- const qnorm = q.toLowerCase()
321
- let matched: any = null
322
- for (const el of elems) {
323
- try {
324
- const txt = ((el && (el.text || el.label || el.value || el.contentDescription || el.accessibilityLabel)) || '')
325
- if (!txt) continue
326
- if (String(txt).toLowerCase().includes(qnorm)) { matched = el; break }
327
- } catch { continue }
328
- }
329
- const isPresent = !!matched
330
- const conditionTrue = (match === 'present') ? isPresent : !isPresent
331
- if (conditionTrue) {
332
- if (matchedAt === null) matchedAt = Date.now()
333
- stableDuration = Date.now() - (matchedAt as number)
334
- lastObservedState = true
335
- if (stableDuration >= stability_ms) {
336
- matchSource = 'ui-tree-' + (match === 'present' ? 'present' : 'absent')
337
- const element = isPresent ? matched : null
338
- const now2 = Date.now()
339
- return { success: true, condition: match, query: q, poll_count: pollCount, duration_ms: now2 - start, stable_duration_ms: stableDuration, matchedElement: element, matchSource, timestamp: now2, type: 'ui', observed_state: lastObservedState ?? null }
313
+ // Prefer using the public findElementHandler which tests can override. This avoids relying
314
+ // on resolveObserve/getUITree for unit tests which may not have devices available.
315
+ try {
316
+ const findRes = await (ToolsInteract as any).findElementHandler({ query: q, exact: false, timeoutMs: Math.min(500, pollInterval), platform, deviceId })
317
+ const isPresent = !!(findRes && (findRes as any).found)
318
+ const conditionTrue = (match === 'present') ? isPresent : !isPresent
319
+ if (conditionTrue) {
320
+ if (matchedAt === null) matchedAt = Date.now()
321
+ stableDuration = Date.now() - (matchedAt as number)
322
+ lastObservedState = true
323
+ if (stableDuration >= stability_ms) {
324
+ matchSource = 'ui-find'
325
+ const element = isPresent ? (findRes as any).element : null
326
+ const now2 = Date.now()
327
+ return { success: true, condition: match, query: q, poll_count: pollCount, duration_ms: now2 - start, stable_duration_ms: stableDuration, matchedElement: element, matchSource, timestamp: now2, type: 'ui', observed_state: lastObservedState ?? null }
328
+ }
329
+ } else {
330
+ matchedAt = null
331
+ stableDuration = 0
332
+ lastObservedState = false
340
333
  }
341
- } else {
342
- matchedAt = null
343
- stableDuration = 0
344
- lastObservedState = false
345
- }
346
- } catch (err) { console.error('observeUntil(ui) tree error:', err) }
347
- } catch (err) { console.error('observeUntil(ui) find error:', err) }
334
+ } catch (err) { console.error('waitForUI(ui) find error:', err) }
335
+ } catch (err) { console.error('waitForUI(ui) outer error:', err) }
348
336
  } else if (type === 'log') {
349
337
  try {
350
338
  // Logs: presence semantics only (match 'present'). Stability not applicable (immediate)
@@ -372,7 +360,7 @@ export class ToolsInteract {
372
360
  return { success: true, condition: 'present', query: q, poll_count: pollCount, duration_ms: now2 - start, stable_duration_ms: 0, matchedLog: { message: line }, matchSource: 'log-snapshot', timestamp: now2, type: 'log', observed_state: true }
373
361
  }
374
362
  }
375
- } catch (err) { console.error('observeUntil(log) error:', err) }
363
+ } catch (err) { console.error('waitForUI(log) error:', err) }
376
364
  } else if (type === 'screen') {
377
365
  try {
378
366
  const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
@@ -394,7 +382,7 @@ export class ToolsInteract {
394
382
  lastObservedState = false
395
383
  }
396
384
  }
397
- } catch (err) { console.error('observeUntil(screen) error:', err) }
385
+ } catch (err) { console.error('waitForUI(screen) error:', err) }
398
386
  } else if (type === 'idle') {
399
387
  try {
400
388
  const fpRes = await ToolsObserve.getScreenFingerprintHandler({ platform, deviceId }) as ScreenFingerprintResponse | null
@@ -413,7 +401,7 @@ export class ToolsInteract {
413
401
  return { success: true, condition: 'present', query: q, poll_count: pollCount, duration_ms: now2 - start, stable_duration_ms: idleMs, matchSource: 'idle-stable', timestamp: now2, type: 'idle', observed_state: lastObservedState ?? null }
414
402
  }
415
403
  }
416
- } catch (err) { console.error('observeUntil(idle) error:', err) }
404
+ } catch (err) { console.error('waitForUI(idle) error:', err) }
417
405
  }
418
406
 
419
407
  // Respect poll interval and avoid tight loop
@@ -424,7 +412,11 @@ export class ToolsInteract {
424
412
  let snapshot: any = null
425
413
  if (includeSnapshotOnFailure) {
426
414
  try {
427
- snapshot = await ToolsObserve.captureDebugSnapshotHandler({ reason: `observe_until timeout for ${type}`, includeLogs: true, platform, deviceId })
415
+ // Use dynamic import to avoid circular-initialization issues where the ToolsObserve
416
+ // binding captured earlier may not reflect test-time overrides. Importing at call
417
+ // time ensures the latest exported ToolsObserve object is used.
418
+ const Obs = await import('../observe/index.js')
419
+ snapshot = await (Obs as any).ToolsObserve.captureDebugSnapshotHandler({ reason: `wait_for_ui timeout for ${type}`, includeLogs: true, platform, deviceId })
428
420
  } catch (err) {
429
421
  snapshot = { error: err instanceof Error ? err.message : String(err) }
430
422
  }
@@ -1,5 +1,5 @@
1
1
  import { spawn } from "child_process"
2
- import { WaitForElementResponse, TapResponse, SwipeResponse } from "../types.js"
2
+ import { TapResponse, SwipeResponse } from "../types.js"
3
3
  import { getIOSDeviceMetadata, getIdbCmd, isIDBInstalled } from "../utils/ios/utils.js"
4
4
  import { iOSObserve } from "../observe/index.js"
5
5
  import { scrollToElementShared } from "../utils/ui/index.js"
@@ -7,36 +7,6 @@ import { scrollToElementShared } from "../utils/ui/index.js"
7
7
  export class iOSInteract {
8
8
  private observe = new iOSObserve();
9
9
 
10
- async waitForElement(text: string, timeout: number, deviceId: string = "booted"): Promise<WaitForElementResponse> {
11
- const device = await getIOSDeviceMetadata(deviceId);
12
- const startTime = Date.now();
13
-
14
- while (Date.now() - startTime < timeout) {
15
- try {
16
- const tree = await this.observe.getUITree(deviceId);
17
-
18
- if (tree.error) {
19
- return { device, found: false, error: tree.error };
20
- }
21
-
22
- const element = tree.elements.find(e => e.text === text);
23
- if (element) {
24
- return { device, found: true, element };
25
- }
26
- } catch (e) {
27
- // Ignore errors during polling and retry
28
- console.error("Error polling UI tree:", e);
29
- }
30
-
31
- const elapsed = Date.now() - startTime;
32
- const remaining = timeout - elapsed;
33
- if (remaining <= 0) break;
34
-
35
- await new Promise(resolve => setTimeout(resolve, Math.min(500, remaining)));
36
- }
37
- return { device, found: false };
38
- }
39
-
40
10
  async tap(x: number, y: number, deviceId: string = "booted"): Promise<TapResponse> {
41
11
  const device = await getIOSDeviceMetadata(deviceId)
42
12
 
@@ -6,6 +6,7 @@ import { createWriteStream } from "fs"
6
6
  import { promises as fsPromises } from "fs"
7
7
  import path from "path"
8
8
  import { computeScreenFingerprint } from "../utils/ui/index.js"
9
+ import { parsePngSize } from "../utils/image.js"
9
10
 
10
11
  const activeLogStreams: Map<string, { proc: any, file: string }> = new Map()
11
12
 
@@ -142,7 +143,7 @@ export class AndroidObserve {
142
143
  reject(new Error(`ADB screencap timed out after 10s`))
143
144
  }, 10000)
144
145
 
145
- child.on('close', (code) => {
146
+ child.on('close', async (code) => {
146
147
  clearTimeout(timeout)
147
148
  if (code !== 0) {
148
149
  reject(new Error(stderr.trim() || `Screencap failed with code ${code}`))
@@ -152,28 +153,91 @@ export class AndroidObserve {
152
153
  const screenshotBuffer = Buffer.concat(chunks)
153
154
  const screenshotBase64 = screenshotBuffer.toString('base64')
154
155
 
155
- execAdb(['shell', 'wm', 'size'], deviceId)
156
- .then(sizeStdout => {
157
- let width = 0
158
- let height = 0
159
- const match = sizeStdout.match(/Physical size: (\d+)x(\d+)/)
160
- if (match) {
161
- width = parseInt(match[1], 10)
162
- height = parseInt(match[2], 10)
156
+ const parsed = parsePngSize(screenshotBuffer)
157
+ if (parsed.width > 0 && parsed.height > 0) {
158
+ // Attempt to convert to WebP (preferred) and provide JPEG fallback (awaited to avoid race)
159
+ try {
160
+ const sharpModule = await import('sharp'); const sharp = sharpModule && (sharpModule as any).default ? (sharpModule as any).default : sharpModule;
161
+ const buf = screenshotBuffer;
162
+ const img = sharp(buf);
163
+ const meta = await img.metadata().catch((err: any) => { console.error('sharp.metadata failed (Android):', err); return {} as any });
164
+ const hasAlpha = !!meta.hasAlpha || (meta.channels && meta.channels > 3);
165
+
166
+ let webpBuf: Buffer | null = null;
167
+ let jpegBuf: Buffer | null = null;
168
+ try {
169
+ webpBuf = await img.webp({ quality: 80 }).toBuffer();
170
+ } catch (err) {
171
+ console.error('WebP conversion failed (Android):', err instanceof Error ? err.message : String(err));
172
+ webpBuf = null;
163
173
  }
164
- resolve({
165
- device: deviceInfo,
166
- screenshot: screenshotBase64,
167
- resolution: { width, height }
174
+ try {
175
+ jpegBuf = await img.jpeg({ quality: 80 }).toBuffer();
176
+ } catch (err) {
177
+ console.error('JPEG conversion failed (Android):', err instanceof Error ? err.message : String(err));
178
+ jpegBuf = null;
179
+ }
180
+
181
+ if (hasAlpha) {
182
+ if (webpBuf) {
183
+ const webpB64 = webpBuf.toString('base64')
184
+ const jpegB64 = jpegBuf ? jpegBuf.toString('base64') : null
185
+ resolve({ device: deviceInfo, screenshot: webpB64, screenshot_mime: 'image/webp', screenshot_fallback: jpegB64, screenshot_fallback_mime: jpegB64 ? 'image/jpeg' : undefined, resolution: { width: parsed.width, height: parsed.height } } as any)
186
+ return
187
+ }
188
+ const pngB64 = buf.toString('base64')
189
+ resolve({ device: deviceInfo, screenshot: pngB64, screenshot_mime: 'image/png', resolution: { width: parsed.width, height: parsed.height } })
190
+ return
191
+ }
192
+
193
+ if (webpBuf) {
194
+ const webpB64 = webpBuf.toString('base64')
195
+ const jpegB64 = jpegBuf ? jpegBuf.toString('base64') : null
196
+ resolve({ device: deviceInfo, screenshot: webpB64, screenshot_mime: 'image/webp', screenshot_fallback: jpegB64, screenshot_fallback_mime: jpegB64 ? 'image/jpeg' : undefined, resolution: { width: parsed.width, height: parsed.height } } as any)
197
+ return
198
+ }
199
+
200
+ if (jpegBuf) {
201
+ resolve({ device: deviceInfo, screenshot: jpegBuf.toString('base64'), screenshot_mime: 'image/jpeg', resolution: { width: parsed.width, height: parsed.height } })
202
+ return
203
+ }
204
+
205
+ // No conversions succeeded; return original PNG
206
+ resolve({ device: deviceInfo, screenshot: screenshotBase64, screenshot_mime: 'image/png', resolution: { width: parsed.width, height: parsed.height } })
207
+ return
208
+ } catch (err) {
209
+ console.error('Screenshot conversion pipeline failed (Android):', err instanceof Error ? err.message : String(err));
210
+ // Conversion failed - fall back to original PNG with parsed resolution
211
+ resolve({ device: deviceInfo, screenshot: screenshotBase64, screenshot_mime: 'image/png', resolution: { width: parsed.width, height: parsed.height } })
212
+ return
213
+ }
214
+ } else {
215
+ // Fallback to querying wm size if parsing failed
216
+ execAdb(['shell', 'wm', 'size'], deviceId)
217
+ .then(sizeStdout => {
218
+ let width = 0
219
+ let height = 0
220
+ const match = sizeStdout.match(/Physical size: (\d+)x(\d+)/)
221
+ if (match) {
222
+ width = parseInt(match[1], 10)
223
+ height = parseInt(match[2], 10)
224
+ }
225
+ resolve({
226
+ device: deviceInfo,
227
+ screenshot: screenshotBase64,
228
+ screenshot_mime: 'image/png',
229
+ resolution: { width, height }
230
+ })
168
231
  })
169
- })
170
- .catch(() => {
171
- resolve({
172
- device: deviceInfo,
173
- screenshot: screenshotBase64,
174
- resolution: { width: 0, height: 0 }
232
+ .catch(() => {
233
+ resolve({
234
+ device: deviceInfo,
235
+ screenshot: screenshotBase64,
236
+ screenshot_mime: 'image/png',
237
+ resolution: { width: 0, height: 0 }
238
+ })
175
239
  })
176
- })
240
+ }
177
241
  })
178
242
 
179
243
  child.on('error', (err) => {
@@ -6,6 +6,7 @@ import { createWriteStream, promises as fsPromises } from 'fs'
6
6
  import path from 'path'
7
7
  import { parseLogLine } from '../utils/android/utils.js'
8
8
  import { computeScreenFingerprint } from '../utils/ui/index.js'
9
+ import { parsePngSize } from '../utils/image.js'
9
10
 
10
11
  const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
11
12
 
@@ -146,13 +147,63 @@ export class iOSObserve {
146
147
 
147
148
  const buffer = await fs.readFile(tmpFile)
148
149
  const base64 = buffer.toString('base64')
149
-
150
- await fs.rm(tmpFile).catch(() => {})
151
150
 
151
+ const dims = parsePngSize(buffer)
152
+
153
+ // Try to generate WebP (preferred) and JPEG fallback using sharp (in-process, cross-platform)
154
+ try {
155
+ const sharpModule = await import('sharp'); const sharp = sharpModule && (sharpModule as any).default ? (sharpModule as any).default : sharpModule;
156
+ const img = sharp(buffer);
157
+ const meta = await img.metadata().catch((err: any) => { console.error('sharp.metadata failed:', err); return {} as any });
158
+
159
+ // If image has alpha channel, prefer lossless PNG to preserve transparency
160
+ const hasAlpha = !!meta.hasAlpha || (meta.channels && meta.channels > 3);
161
+
162
+ // Generate WebP and JPEG buffers; log failures
163
+ let webpBuf: Buffer | null = null;
164
+ let jpegBuf: Buffer | null = null;
165
+ try {
166
+ webpBuf = await img.webp({ quality: 80 }).toBuffer();
167
+ } catch (err) {
168
+ console.error('WebP conversion failed (iOS):', err instanceof Error ? err.message : String(err));
169
+ webpBuf = null;
170
+ }
171
+ try {
172
+ jpegBuf = await img.jpeg({ quality: 80 }).toBuffer();
173
+ } catch (err) {
174
+ console.error('JPEG conversion failed (iOS):', err instanceof Error ? err.message : String(err));
175
+ jpegBuf = null;
176
+ }
177
+
178
+ await fs.rm(tmpFile).catch(() => {});
179
+
180
+ if (hasAlpha) {
181
+ // preserve alpha: return PNG if WebP not available
182
+ if (webpBuf) {
183
+ return { device, screenshot: webpBuf.toString('base64'), screenshot_mime: 'image/webp', screenshot_fallback: base64, screenshot_fallback_mime: 'image/png', resolution: { width: dims.width, height: dims.height } }
184
+ }
185
+ // if webp unavailable, return original PNG
186
+ return { device, screenshot: base64, screenshot_mime: 'image/png', resolution: { width: dims.width, height: dims.height } }
187
+ }
188
+
189
+ // No alpha: prefer webp, fall back to jpeg
190
+ if (webpBuf) {
191
+ return { device, screenshot: webpBuf.toString('base64'), screenshot_mime: 'image/webp', screenshot_fallback: jpegBuf ? jpegBuf.toString('base64') : undefined, screenshot_fallback_mime: jpegBuf ? 'image/jpeg' : undefined, resolution: { width: dims.width, height: dims.height } }
192
+ }
193
+ if (jpegBuf) {
194
+ return { device, screenshot: jpegBuf.toString('base64'), screenshot_mime: 'image/jpeg', resolution: { width: dims.width, height: dims.height } }
195
+ }
196
+ } catch (err) {
197
+ console.error('Screenshot conversion pipeline failed (iOS):', err instanceof Error ? err.message : String(err));
198
+ // fall through to png fallback
199
+ }
200
+
201
+ await fs.rm(tmpFile).catch(() => {})
152
202
  return {
153
203
  device,
154
204
  screenshot: base64,
155
- resolution: { width: 0, height: 0 },
205
+ screenshot_mime: 'image/png',
206
+ resolution: { width: dims.width, height: dims.height },
156
207
  }
157
208
  } catch (e) {
158
209
  await fs.rm(tmpFile).catch(() => {})
package/src/server.ts CHANGED
@@ -340,8 +340,8 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
340
340
  }
341
341
  },
342
342
  {
343
- name: "observe_until",
344
- description: "Wait for a UI condition (element present/absent) and require a stability window before returning success. Network-based waiting is not required; UI-only synchronization is the default and primary mode.",
343
+ name: "wait_for_ui",
344
+ description: "Wait for a UI/log/screen/idle condition with a stability window before returning success.",
345
345
  inputSchema: {
346
346
  type: "object",
347
347
  properties: {
@@ -359,34 +359,6 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
359
359
  },
360
360
 
361
361
 
362
- {
363
- name: "wait_for_element",
364
- description: "Wait until a UI element with matching text appears on screen or timeout is reached.",
365
- inputSchema: {
366
- type: "object",
367
- properties: {
368
- platform: {
369
- type: "string",
370
- enum: ["android", "ios"],
371
- description: "Platform to check"
372
- },
373
- text: {
374
- type: "string",
375
- description: "Text content of the element to wait for"
376
- },
377
- timeout: {
378
- type: "number",
379
- description: "Max wait time in ms (default 10000)",
380
- default: 10000
381
- },
382
- deviceId: {
383
- type: "string",
384
- description: "Device Serial/UDID. Defaults to connected/booted device."
385
- }
386
- },
387
- required: ["platform", "text"]
388
- }
389
- },
390
362
  {
391
363
  name: "find_element",
392
364
  description: "Find a UI element by semantic query (text, content-desc, resource-id, class). Returns best match.",
@@ -656,12 +628,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request: SchemaOutput<typ
656
628
  if (name === "capture_screenshot") {
657
629
  const { platform, deviceId } = args as any
658
630
  const res = await ToolsObserve.captureScreenshotHandler({ platform, deviceId })
659
- return {
660
- content: [
661
- { type: 'text', text: JSON.stringify({ device: res.device, result: { resolution: (res as any).resolution } }, null, 2) },
662
- { type: 'image', data: (res as any).screenshot, mimeType: 'image/png' }
663
- ]
664
- }
631
+ const mime = (res as any).screenshot_mime || 'image/png'
632
+ const content: any[] = [
633
+ { type: 'text', text: JSON.stringify({ device: res.device, result: { resolution: (res as any).resolution, mimeType: mime } }, null, 2) },
634
+ { type: 'image', data: (res as any).screenshot, mimeType: mime }
635
+ ]
636
+ // If a jpeg fallback is available, include a small note and the fallback as an additional image block for compatibility
637
+ if ((res as any).screenshot_fallback) {
638
+ content.push({ type: 'text', text: JSON.stringify({ note: 'JPEG fallback included for compatibility', mimeType: (res as any).screenshot_fallback_mime || 'image/jpeg' }) })
639
+ content.push({ type: 'image', data: (res as any).screenshot_fallback, mimeType: (res as any).screenshot_fallback_mime || 'image/jpeg' })
640
+ }
641
+ return { content }
665
642
  }
666
643
 
667
644
  if (name === "capture_debug_snapshot") {
@@ -694,9 +671,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request: SchemaOutput<typ
694
671
  return wrapResponse(res)
695
672
  }
696
673
 
697
- if (name === "wait_for_element") {
698
- const { platform, text, timeout, deviceId } = (args || {}) as any
699
- const res = await ToolsInteract.waitForElementHandler({ platform, text, timeout, deviceId })
674
+
675
+ if (name === "wait_for_ui") {
676
+ const { type = 'ui', query, timeoutMs = 30000, pollIntervalMs = 300, includeSnapshotOnFailure = true, match = 'present', stability_ms = 700, observationDelayMs = 0, platform, deviceId } = (args || {}) as any
677
+ const res = await ToolsInteract.waitForUIHandler({ type, query, timeoutMs, pollIntervalMs, includeSnapshotOnFailure, match, stability_ms, observationDelayMs, platform, deviceId })
700
678
  return wrapResponse(res)
701
679
  }
702
680
 
package/src/types.ts CHANGED
@@ -50,6 +50,9 @@ export interface GetCrashResponse {
50
50
  export interface CaptureAndroidScreenResponse {
51
51
  device: DeviceInfo;
52
52
  screenshot: string; // base64 encoded string
53
+ screenshot_mime?: string; // e.g. image/webp, image/jpeg, image/png
54
+ screenshot_fallback?: string; // optional fallback base64 (e.g., jpeg)
55
+ screenshot_fallback_mime?: string;
53
56
  resolution: {
54
57
  width: number;
55
58
  height: number;
@@ -59,6 +62,9 @@ export interface CaptureAndroidScreenResponse {
59
62
  export interface CaptureIOSScreenshotResponse {
60
63
  device: DeviceInfo;
61
64
  screenshot: string; // base64 encoded string
65
+ screenshot_mime?: string; // e.g. image/webp, image/jpeg, image/png
66
+ screenshot_fallback?: string; // optional fallback base64 (e.g., jpeg)
67
+ screenshot_fallback_mime?: string;
62
68
  resolution: {
63
69
  width: number;
64
70
  height: number;
@@ -0,0 +1,14 @@
1
+ export function parsePngSize(buf: Buffer): { width: number; height: number } {
2
+ try {
3
+ if (!buf || buf.length < 24) return { width: 0, height: 0 };
4
+ // PNG signature + IHDR checks
5
+ if (buf.readUInt32BE(0) !== 0x89504e47 || buf.readUInt32BE(4) !== 0x0d0a1a0a) return { width: 0, height: 0 };
6
+ const ihdr = buf.toString('ascii', 12, 16);
7
+ if (ihdr !== 'IHDR') return { width: 0, height: 0 };
8
+ const width = buf.readUInt32BE(16);
9
+ const height = buf.readUInt32BE(20);
10
+ return { width, height };
11
+ } catch {
12
+ return { width: 0, height: 0 };
13
+ }
14
+ }
@@ -32,6 +32,12 @@ export async function resolveTargetDevice(opts: ResolveOptions): Promise<DeviceI
32
32
  const { platform, appId, prefer, deviceId } = opts
33
33
  const devices = await listDevices(platform, appId)
34
34
 
35
+ // During unit tests (no adb/xcrun available), provide a lightweight mock device so
36
+ // the observe/interact unit tests can run without real devices.
37
+ if ((!devices || devices.length === 0) && (process.env.NODE_ENV === 'test' || process.env.MCP_TEST_MOCK_DEVICES === '1')) {
38
+ return { id: 'mock', platform: platform || 'android', osVersion: '12', model: 'Pixel', simulator: true } as DeviceInfo
39
+ }
40
+
35
41
  if (deviceId) {
36
42
  const found = devices.find(d => d.id === deviceId)
37
43
  if (!found) throw new Error(`Device '${deviceId}' not found for platform ${platform}`)
@@ -1,19 +1,3 @@
1
- import { spawn } from 'child_process';
2
- import path from 'path';
3
- import { fileURLToPath } from 'url';
4
- const __filename = fileURLToPath(import.meta.url);
5
- const __dirname = path.dirname(__filename);
6
- const ADB_PATH = process.env.ADB_PATH || process.env.ADB || 'adb';
7
- const TEST_FILE = path.join(__dirname, 'wait_for_element_real.ts');
8
-
9
- const childEnv = { ...process.env, ADB_PATH };
10
- const runner = process.env.RUNNER || 'npx';
11
- const runnerArgs = ['tsx', TEST_FILE];
12
-
13
- const child = spawn(runner, runnerArgs, {
14
- env: childEnv,
15
- stdio: 'inherit'
16
- });
17
- child.on('exit', (code) => {
18
- process.exit(code || 0);
19
- });
1
+ // wait_for_element device runner removed
2
+ console.log('wait_for_element device runner removed');
3
+ process.exit(0);