mcp-android-emulator 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,66 +1,57 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * MCP Server for Android Emulator
4
- * Enables AI assistants to interact with Android devices/emulators via ADB
3
+ * MCP Server for Android Emulator.
4
+ *
5
+ * Finalidad:
6
+ * Expone 43 tools MCP que permiten a un asistente LLM controlar un device
7
+ * Android vía ADB (screenshot, tap, type, launch apps, logs, asserts...).
8
+ *
9
+ * Interrelación:
10
+ * - src/adb/runner.ts → ejecución segura de adb (execFile, sin shell del host).
11
+ * - src/adb/validators.ts → allowlists zod para inputs que llegan al sh del device.
12
+ * - test/ → smoke tests que validan que payloads shell-metachar son
13
+ * rechazados por los validators y que los argv construidos
14
+ * son los esperados.
15
+ *
16
+ * Seguridad:
17
+ * Fix de la issue #1 (command injection). TODOS los argumentos derivados del
18
+ * LLM pasan por zod.refine antes de llegar al runner, y el runner usa execFile
19
+ * (no exec), por lo que /bin/sh del host nunca reinterpreta la línea de comando.
5
20
  *
6
21
  * @license MIT
7
22
  */
8
23
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
9
24
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
10
25
  import { z } from "zod";
11
- import { execSync, exec } from "child_process";
12
- import { promisify } from "util";
13
- import * as fs from "fs";
14
- import * as path from "path";
15
- const execAsync = promisify(exec);
26
+ import * as fs from "node:fs";
27
+ import * as os from "node:os";
28
+ import * as path from "node:path";
29
+ import { runAdb, runAdbShell, runAdbExecOutBinary, } from "./adb/runner.js";
30
+ import { packageNameSchema, apkPathSchema, resourceIdSchema, freeTextSchema, typeableTextSchema, searchFilterSchema, positiveCountSchema, coordinateSchema, durationMsSchema, } from "./adb/validators.js";
31
+ // =====================================================
16
32
  // Configuration
17
- const ADB_PATH = process.env.ADB_PATH || "adb";
33
+ // =====================================================
18
34
  const SCREENSHOT_DIR = process.env.SCREENSHOT_DIR || "/tmp/android-screenshots";
19
- // Create screenshot directory if it doesn't exist
20
35
  if (!fs.existsSync(SCREENSHOT_DIR)) {
21
36
  fs.mkdirSync(SCREENSHOT_DIR, { recursive: true });
22
37
  }
23
- /**
24
- * Execute an ADB command
25
- */
26
- async function adb(command) {
27
- try {
28
- const { stdout } = await execAsync(`${ADB_PATH} ${command}`);
29
- return stdout.trim();
30
- }
31
- catch (error) {
32
- throw new Error(`ADB Error: ${error.message}`);
33
- }
34
- }
35
- /**
36
- * Execute a shell command on the device
37
- */
38
- async function shell(command) {
39
- return adb(`shell ${command}`);
40
- }
41
- // Create MCP server
38
+ // =====================================================
39
+ // MCP Server
40
+ // =====================================================
42
41
  const server = new McpServer({
43
42
  name: "android-emulator",
44
- version: "1.4.0",
43
+ version: "2.0.0",
45
44
  });
46
45
  // =====================================================
47
46
  // TOOL: screenshot
48
47
  // =====================================================
49
48
  server.tool("screenshot", "Take a screenshot of the Android device/emulator and return it as a base64 image", {}, async () => {
50
- const filename = `screenshot_${Date.now()}.png`;
51
- const filepath = path.join(SCREENSHOT_DIR, filename);
52
- // Capture screenshot
53
- execSync(`${ADB_PATH} exec-out screencap -p > ${filepath}`);
54
- // Read as base64
55
- const imageBuffer = fs.readFileSync(filepath);
56
- const base64 = imageBuffer.toString("base64");
57
- // Clean up temp file
58
- fs.unlinkSync(filepath);
49
+ const buffer = await runAdbExecOutBinary(["screencap", "-p"]);
59
50
  return {
60
51
  content: [
61
52
  {
62
53
  type: "image",
63
- data: base64,
54
+ data: buffer.toString("base64"),
64
55
  mimeType: "image/png",
65
56
  },
66
57
  ],
@@ -70,10 +61,8 @@ server.tool("screenshot", "Take a screenshot of the Android device/emulator and
70
61
  // TOOL: get_ui_tree
71
62
  // =====================================================
72
63
  server.tool("get_ui_tree", "Get the UI element tree of the device (like DOM but for Android). Returns clickable elements with their coordinates.", {}, async () => {
73
- // Dump UI hierarchy
74
- await shell("uiautomator dump /sdcard/ui_dump.xml");
75
- const xml = await shell("cat /sdcard/ui_dump.xml");
76
- // Parse clickable elements
64
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
65
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
77
66
  const elements = [];
78
67
  const regex = /text="([^"]*)".*?bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
79
68
  let match;
@@ -98,30 +87,21 @@ server.tool("get_ui_tree", "Get the UI element tree of the device (like DOM but
98
87
  // TOOL: tap
99
88
  // =====================================================
100
89
  server.tool("tap", "Tap at the specified coordinates on the screen", {
101
- x: z.number().describe("X coordinate"),
102
- y: z.number().describe("Y coordinate"),
90
+ x: coordinateSchema.describe("X coordinate"),
91
+ y: coordinateSchema.describe("Y coordinate"),
103
92
  }, async ({ x, y }) => {
104
- await shell(`input tap ${x} ${y}`);
105
- return {
106
- content: [
107
- {
108
- type: "text",
109
- text: `Tapped at (${x}, ${y})`,
110
- },
111
- ],
112
- };
93
+ await runAdbShell(["input", "tap", String(x), String(y)]);
94
+ return { content: [{ type: "text", text: `Tapped at (${x}, ${y})` }] };
113
95
  });
114
96
  // =====================================================
115
97
  // TOOL: tap_text
116
98
  // =====================================================
117
99
  server.tool("tap_text", "Find an element by its text content and tap on it", {
118
- text: z.string().describe("Text of the element to find and tap"),
100
+ text: freeTextSchema.describe("Text of the element to find and tap"),
119
101
  exact: z.boolean().optional().describe("If true, match exact text. Default: false (partial match)"),
120
102
  }, async ({ text, exact = false }) => {
121
- // Dump UI hierarchy
122
- await shell("uiautomator dump /sdcard/ui_dump.xml");
123
- const xml = await shell("cat /sdcard/ui_dump.xml");
124
- // Build regex based on exact match preference
103
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
104
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
125
105
  const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
126
106
  const pattern = exact
127
107
  ? `text="${escapedText}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`
@@ -129,235 +109,189 @@ server.tool("tap_text", "Find an element by its text content and tap on it", {
129
109
  const regex = new RegExp(pattern, "i");
130
110
  const match = regex.exec(xml);
131
111
  if (!match) {
132
- return {
133
- content: [
134
- {
135
- type: "text",
136
- text: `Element with text "${text}" not found`,
137
- },
138
- ],
139
- };
112
+ return { content: [{ type: "text", text: `Element with text "${text}" not found` }] };
140
113
  }
141
114
  const [, x1, y1, x2, y2] = match;
142
115
  const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
143
116
  const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
144
- await shell(`input tap ${centerX} ${centerY}`);
117
+ await runAdbShell(["input", "tap", String(centerX), String(centerY)]);
145
118
  return {
146
- content: [
147
- {
148
- type: "text",
149
- text: `Tapped on "${text}" at (${centerX}, ${centerY})`,
150
- },
151
- ],
119
+ content: [{ type: "text", text: `Tapped on "${text}" at (${centerX}, ${centerY})` }],
152
120
  };
153
121
  });
154
122
  // =====================================================
155
123
  // TOOL: type_text
156
124
  // =====================================================
157
- server.tool("type_text", "Type text into the currently focused input field", {
158
- text: z.string().describe("Text to type"),
125
+ /**
126
+ * Android `input text` interpreta %s como espacio y %XX como byte URL-encoded.
127
+ * Percent-encodear el UTF-8 del texto:
128
+ * - soporta acentos, CJK, emoji (Android decodifica %XX internamente)
129
+ * - evita el NPE conocido de `input text` con UTF-8 directo
130
+ * - los metacaracteres shell ya fueron rechazados por typeableTextSchema
131
+ */
132
+ function encodeTextForInput(text) {
133
+ return encodeURIComponent(text).replace(/%20/g, "%s");
134
+ }
135
+ server.tool("type_text", "Type text into the currently focused input field. Unicode is supported via URL-encoding. Shell metacharacters (; & | ` $ ( ) < > \\ quotes) are rejected.", {
136
+ text: typeableTextSchema.describe("Text to type"),
159
137
  }, async ({ text }) => {
160
- // Escape special characters for shell
161
- const escaped = text.replace(/ /g, "%s").replace(/'/g, "\\'");
162
- await shell(`input text "${escaped}"`);
163
- return {
164
- content: [
165
- {
166
- type: "text",
167
- text: `Typed: "${text}"`,
168
- },
169
- ],
170
- };
138
+ await runAdbShell(["input", "text", encodeTextForInput(text)]);
139
+ return { content: [{ type: "text", text: `Typed: "${text}"` }] };
171
140
  });
172
141
  // =====================================================
173
142
  // TOOL: swipe
174
143
  // =====================================================
175
144
  server.tool("swipe", "Perform a swipe gesture on the screen", {
176
- x1: z.number().describe("Starting X coordinate"),
177
- y1: z.number().describe("Starting Y coordinate"),
178
- x2: z.number().describe("Ending X coordinate"),
179
- y2: z.number().describe("Ending Y coordinate"),
180
- duration: z.number().optional().describe("Duration in milliseconds (default: 300)"),
145
+ x1: coordinateSchema.describe("Starting X coordinate"),
146
+ y1: coordinateSchema.describe("Starting Y coordinate"),
147
+ x2: coordinateSchema.describe("Ending X coordinate"),
148
+ y2: coordinateSchema.describe("Ending Y coordinate"),
149
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 300)"),
181
150
  }, async ({ x1, y1, x2, y2, duration = 300 }) => {
182
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} ${duration}`);
183
- return {
184
- content: [
185
- {
186
- type: "text",
187
- text: `Swiped from (${x1}, ${y1}) to (${x2}, ${y2})`,
188
- },
189
- ],
190
- };
151
+ await runAdbShell([
152
+ "input", "swipe",
153
+ String(x1), String(y1), String(x2), String(y2), String(duration),
154
+ ]);
155
+ return { content: [{ type: "text", text: `Swiped from (${x1}, ${y1}) to (${x2}, ${y2})` }] };
191
156
  });
192
157
  // =====================================================
193
158
  // TOOL: scroll
194
159
  // =====================================================
195
160
  server.tool("scroll", "Scroll the screen in a direction", {
196
161
  direction: z.enum(["up", "down", "left", "right"]).describe("Direction to scroll"),
197
- amount: z.number().optional().describe("Scroll amount in pixels (default: 500)"),
162
+ amount: z.number().int().min(1).max(10_000).optional().describe("Scroll amount in pixels (default: 500)"),
198
163
  }, async ({ direction, amount = 500 }) => {
199
- // Get screen dimensions for centering the scroll
200
- const sizeOutput = await shell("wm size");
164
+ const sizeOutput = await runAdbShell(["wm", "size"]);
201
165
  const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
202
166
  const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
203
167
  const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
204
168
  const centerX = Math.round(width / 2);
205
169
  const centerY = Math.round(height / 2);
206
170
  let x1 = centerX, y1 = centerY, x2 = centerX, y2 = centerY;
171
+ const half = Math.round(amount / 2);
207
172
  switch (direction) {
208
173
  case "up":
209
- y1 = centerY + amount / 2;
210
- y2 = centerY - amount / 2;
174
+ y1 = centerY + half;
175
+ y2 = centerY - half;
211
176
  break;
212
177
  case "down":
213
- y1 = centerY - amount / 2;
214
- y2 = centerY + amount / 2;
178
+ y1 = centerY - half;
179
+ y2 = centerY + half;
215
180
  break;
216
181
  case "left":
217
- x1 = centerX + amount / 2;
218
- x2 = centerX - amount / 2;
182
+ x1 = centerX + half;
183
+ x2 = centerX - half;
219
184
  break;
220
185
  case "right":
221
- x1 = centerX - amount / 2;
222
- x2 = centerX + amount / 2;
186
+ x1 = centerX - half;
187
+ x2 = centerX + half;
223
188
  break;
224
189
  }
225
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} 300`);
226
- return {
227
- content: [
228
- {
229
- type: "text",
230
- text: `Scrolled ${direction}`,
231
- },
232
- ],
233
- };
190
+ await runAdbShell([
191
+ "input", "swipe",
192
+ String(x1), String(y1), String(x2), String(y2), "300",
193
+ ]);
194
+ return { content: [{ type: "text", text: `Scrolled ${direction}` }] };
234
195
  });
235
196
  // =====================================================
236
197
  // TOOL: press_key
237
198
  // =====================================================
238
199
  server.tool("press_key", "Press a system key (BACK, HOME, ENTER, etc)", {
239
- key: z.enum(["BACK", "HOME", "ENTER", "TAB", "DELETE", "MENU", "POWER", "VOLUME_UP", "VOLUME_DOWN"]).describe("Key to press"),
200
+ key: z.enum([
201
+ "BACK", "HOME", "ENTER", "TAB", "DELETE", "MENU", "POWER",
202
+ "VOLUME_UP", "VOLUME_DOWN",
203
+ ]).describe("Key to press"),
240
204
  }, async ({ key }) => {
241
205
  const keycodes = {
242
- BACK: 4,
243
- HOME: 3,
244
- ENTER: 66,
245
- TAB: 61,
246
- DELETE: 67,
247
- MENU: 82,
248
- POWER: 26,
249
- VOLUME_UP: 24,
250
- VOLUME_DOWN: 25,
251
- };
252
- await shell(`input keyevent ${keycodes[key]}`);
253
- return {
254
- content: [
255
- {
256
- type: "text",
257
- text: `Pressed ${key} key`,
258
- },
259
- ],
206
+ BACK: 4, HOME: 3, ENTER: 66, TAB: 61, DELETE: 67,
207
+ MENU: 82, POWER: 26, VOLUME_UP: 24, VOLUME_DOWN: 25,
260
208
  };
209
+ await runAdbShell(["input", "keyevent", String(keycodes[key])]);
210
+ return { content: [{ type: "text", text: `Pressed ${key} key` }] };
261
211
  });
262
212
  // =====================================================
263
213
  // TOOL: launch_app
264
214
  // =====================================================
265
- server.tool("launch_app", "Launch an application by its package name", {
266
- package: z.string().describe("Package name of the app (e.g., com.android.chrome)"),
215
+ server.tool("launch_app", "Launch an application by its package name (e.g., com.android.chrome). Package name is validated against the Android package naming convention.", {
216
+ package: packageNameSchema.describe("Package name of the app (e.g., com.android.chrome)"),
267
217
  }, async ({ package: pkg }) => {
268
- await shell(`monkey -p ${pkg} -c android.intent.category.LAUNCHER 1`);
269
- return {
270
- content: [
271
- {
272
- type: "text",
273
- text: `Launched ${pkg}`,
274
- },
275
- ],
276
- };
218
+ await runAdbShell([
219
+ "monkey",
220
+ "-p", pkg,
221
+ "-c", "android.intent.category.LAUNCHER",
222
+ "1",
223
+ ]);
224
+ return { content: [{ type: "text", text: `Launched ${pkg}` }] };
277
225
  });
278
226
  // =====================================================
279
227
  // TOOL: install_apk
280
228
  // =====================================================
281
- server.tool("install_apk", "Install an APK file on the device", {
282
- path: z.string().describe("Path to the APK file"),
229
+ server.tool("install_apk", "Install an APK file on the device. Path must end in .apk and contain no shell metacharacters.", {
230
+ path: apkPathSchema.describe("Path to the APK file on the host"),
283
231
  }, async ({ path: apkPath }) => {
284
- const result = await adb(`install -r ${apkPath}`);
285
- return {
286
- content: [
287
- {
288
- type: "text",
289
- text: `APK installed: ${result}`,
290
- },
291
- ],
292
- };
232
+ if (!fs.existsSync(apkPath)) {
233
+ throw new Error(`APK file not found: ${apkPath}`);
234
+ }
235
+ const result = await runAdb(["install", "-r", apkPath]);
236
+ return { content: [{ type: "text", text: `APK installed: ${result}` }] };
293
237
  });
294
238
  // =====================================================
295
239
  // TOOL: list_packages
296
240
  // =====================================================
297
- server.tool("list_packages", "List installed packages on the device", {
298
- filter: z.string().optional().describe("Filter packages by name (optional)"),
241
+ server.tool("list_packages", "List installed packages on the device. Optional filter is applied in-process (JavaScript), never on the device shell.", {
242
+ filter: searchFilterSchema.optional().describe("Filter packages by name (optional)"),
299
243
  }, async ({ filter }) => {
300
- let cmd = "pm list packages";
301
- if (filter) {
302
- cmd += ` | grep -i "${filter}"`;
303
- }
304
- const result = await shell(cmd);
305
- const packages = result.split("\n").map((p) => p.replace("package:", "")).filter(Boolean);
244
+ const raw = await runAdbShell(["pm", "list", "packages"]);
245
+ const needle = filter?.toLowerCase();
246
+ const packages = raw
247
+ .split("\n")
248
+ .map((line) => line.replace("package:", "").trim())
249
+ .filter((p) => p.length > 0)
250
+ .filter((p) => !needle || p.toLowerCase().includes(needle));
306
251
  return {
307
- content: [
308
- {
309
- type: "text",
310
- text: `Installed packages:\n${packages.join("\n")}`,
311
- },
312
- ],
252
+ content: [{ type: "text", text: `Installed packages:\n${packages.join("\n")}` }],
313
253
  };
314
254
  });
315
255
  // =====================================================
316
256
  // TOOL: get_logs
317
257
  // =====================================================
318
- server.tool("get_logs", "Get device logs (logcat)", {
319
- filter: z.string().optional().describe("Filter logs by tag or keyword"),
320
- lines: z.number().optional().describe("Number of lines to retrieve (default: 50)"),
321
- level: z.enum(["V", "D", "I", "W", "E"]).optional().describe("Minimum log level (V=Verbose, D=Debug, I=Info, W=Warn, E=Error)"),
258
+ server.tool("get_logs", "Get device logs (logcat). Filtering is applied in-process, never on the device shell.", {
259
+ filter: searchFilterSchema.optional().describe("Filter logs by tag or keyword (substring match in-process)"),
260
+ lines: positiveCountSchema.optional().describe("Number of lines to retrieve (default: 50, max 100000)"),
261
+ level: z.enum(["V", "D", "I", "W", "E"]).optional().describe("Minimum log level"),
322
262
  }, async ({ filter, lines = 50, level }) => {
323
- let cmd = `logcat -d -t ${lines}`;
324
- if (level) {
325
- cmd += ` *:${level}`;
326
- }
327
- if (filter) {
328
- cmd += ` | grep -i "${filter}"`;
329
- }
330
- const logs = await shell(cmd);
331
- return {
332
- content: [
333
- {
334
- type: "text",
335
- text: `Logs:\n${logs}`,
336
- },
337
- ],
338
- };
263
+ const argv = ["logcat", "-d", "-t", String(lines)];
264
+ if (level)
265
+ argv.push(`*:${level}`);
266
+ const raw = await runAdbShell(argv);
267
+ const needle = filter?.toLowerCase();
268
+ const filtered = needle
269
+ ? raw.split("\n").filter((l) => l.toLowerCase().includes(needle)).join("\n")
270
+ : raw;
271
+ return { content: [{ type: "text", text: `Logs:\n${filtered}` }] };
339
272
  });
340
273
  // =====================================================
341
274
  // TOOL: device_info
342
275
  // =====================================================
343
276
  server.tool("device_info", "Get information about the connected device", {}, async () => {
344
- const [model, android, sdk, density, size, battery] = await Promise.all([
345
- shell("getprop ro.product.model"),
346
- shell("getprop ro.build.version.release"),
347
- shell("getprop ro.build.version.sdk"),
348
- shell("wm density"),
349
- shell("wm size"),
350
- shell("dumpsys battery | grep level"),
277
+ const [model, android, sdk, density, size, batteryDump] = await Promise.all([
278
+ runAdbShell(["getprop", "ro.product.model"]),
279
+ runAdbShell(["getprop", "ro.build.version.release"]),
280
+ runAdbShell(["getprop", "ro.build.version.sdk"]),
281
+ runAdbShell(["wm", "density"]),
282
+ runAdbShell(["wm", "size"]),
283
+ runAdbShell(["dumpsys", "battery"]),
351
284
  ]);
285
+ const batteryLine = batteryDump.split("\n").find((l) => /level:/i.test(l)) || "";
352
286
  return {
353
287
  content: [
354
288
  {
355
289
  type: "text",
356
- text: `Device: ${model}
357
- Android: ${android} (SDK ${sdk})
358
- Screen: ${size.replace("Physical size: ", "")}
359
- Density: ${density.replace("Physical density: ", "")}
360
- Battery: ${battery.replace("level: ", "")}%`,
290
+ text: `Device: ${model}
291
+ Android: ${android} (SDK ${sdk})
292
+ Screen: ${size.replace("Physical size: ", "")}
293
+ Density: ${density.replace("Physical density: ", "")}
294
+ Battery: ${batteryLine.replace(/^\s*level:\s*/, "")}%`,
361
295
  },
362
296
  ],
363
297
  };
@@ -366,95 +300,64 @@ Battery: ${battery.replace("level: ", "")}%`,
366
300
  // TOOL: clear_app_data
367
301
  // =====================================================
368
302
  server.tool("clear_app_data", "Clear all data for an application", {
369
- package: z.string().describe("Package name of the app"),
303
+ package: packageNameSchema.describe("Package name of the app"),
370
304
  }, async ({ package: pkg }) => {
371
- await shell(`pm clear ${pkg}`);
372
- return {
373
- content: [
374
- {
375
- type: "text",
376
- text: `Data cleared for ${pkg}`,
377
- },
378
- ],
379
- };
305
+ await runAdbShell(["pm", "clear", pkg]);
306
+ return { content: [{ type: "text", text: `Data cleared for ${pkg}` }] };
380
307
  });
381
308
  // =====================================================
382
309
  // TOOL: force_stop
383
310
  // =====================================================
384
311
  server.tool("force_stop", "Force stop an application", {
385
- package: z.string().describe("Package name of the app"),
312
+ package: packageNameSchema.describe("Package name of the app"),
386
313
  }, async ({ package: pkg }) => {
387
- await shell(`am force-stop ${pkg}`);
388
- return {
389
- content: [
390
- {
391
- type: "text",
392
- text: `Force stopped ${pkg}`,
393
- },
394
- ],
395
- };
314
+ await runAdbShell(["am", "force-stop", pkg]);
315
+ return { content: [{ type: "text", text: `Force stopped ${pkg}` }] };
396
316
  });
397
317
  // =====================================================
398
318
  // TOOL: get_current_activity
399
319
  // =====================================================
400
320
  server.tool("get_current_activity", "Get the currently focused activity/screen", {}, async () => {
401
321
  let activity = "Unknown";
402
- // Try multiple methods for compatibility across emulators
403
322
  try {
404
- // Method 1: mResumedActivity (standard Android)
405
- const result1 = await shell("dumpsys activity activities | grep -E 'mResumedActivity|mCurrentFocus' || true");
406
- if (result1 && result1.trim()) {
407
- activity = result1.trim();
408
- }
409
- }
410
- catch {
411
- // Ignore
323
+ const dump = await runAdbShell(["dumpsys", "activity", "activities"]);
324
+ const line = dump.split("\n").find((l) => /mResumedActivity|mCurrentFocus/.test(l));
325
+ if (line?.trim())
326
+ activity = line.trim();
412
327
  }
328
+ catch { /* ignore */ }
413
329
  if (activity === "Unknown") {
414
330
  try {
415
- // Method 2: topActivity (alternative)
416
- const result2 = await shell("dumpsys activity top | head -5 || true");
417
- if (result2 && result2.trim()) {
418
- activity = result2.trim();
419
- }
420
- }
421
- catch {
422
- // Ignore
331
+ const top = await runAdbShell(["dumpsys", "activity", "top"]);
332
+ const first5 = top.split("\n").slice(0, 5).join("\n").trim();
333
+ if (first5)
334
+ activity = first5;
423
335
  }
336
+ catch { /* ignore */ }
424
337
  }
425
338
  if (activity === "Unknown") {
426
339
  try {
427
- // Method 3: window focus (Redroid/Docker compatible)
428
- const result3 = await shell("dumpsys window | grep -E 'mCurrentFocus|mFocusedApp' || true");
429
- if (result3 && result3.trim()) {
430
- activity = result3.trim();
431
- }
432
- }
433
- catch {
434
- // Ignore
340
+ const win = await runAdbShell(["dumpsys", "window"]);
341
+ const line = win.split("\n").find((l) => /mCurrentFocus|mFocusedApp/.test(l));
342
+ if (line?.trim())
343
+ activity = line.trim();
435
344
  }
345
+ catch { /* ignore */ }
436
346
  }
437
- return {
438
- content: [
439
- {
440
- type: "text",
441
- text: `Current activity:\n${activity}`,
442
- },
443
- ],
444
- };
347
+ return { content: [{ type: "text", text: `Current activity:\n${activity}` }] };
445
348
  });
446
349
  // =====================================================
447
350
  // TOOL: wait_for_element
448
351
  // =====================================================
449
352
  server.tool("wait_for_element", "Wait for a UI element with specific text to appear", {
450
- text: z.string().describe("Text of the element to wait for"),
451
- timeout: z.number().optional().describe("Timeout in seconds (default: 10)"),
353
+ text: freeTextSchema.describe("Text of the element to wait for"),
354
+ timeout: z.number().int().min(1).max(600).optional().describe("Timeout in seconds (default: 10)"),
452
355
  }, async ({ text, timeout = 10 }) => {
453
356
  const startTime = Date.now();
454
357
  const timeoutMs = timeout * 1000;
455
358
  while (Date.now() - startTime < timeoutMs) {
456
- await shell("uiautomator dump /sdcard/ui_dump.xml");
457
- const xml = await shell("cat /sdcard/ui_dump.xml");
359
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
360
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
458
361
  if (xml.toLowerCase().includes(text.toLowerCase())) {
459
362
  return {
460
363
  content: [
@@ -465,179 +368,119 @@ server.tool("wait_for_element", "Wait for a UI element with specific text to app
465
368
  ],
466
369
  };
467
370
  }
468
- // Wait 500ms before next check
469
371
  await new Promise((resolve) => setTimeout(resolve, 500));
470
372
  }
471
373
  return {
472
- content: [
473
- {
474
- type: "text",
475
- text: `Timeout: Element "${text}" not found after ${timeout}s`,
476
- },
477
- ],
374
+ content: [{ type: "text", text: `Timeout: Element "${text}" not found after ${timeout}s` }],
478
375
  };
479
376
  });
480
377
  // =====================================================
481
378
  // TOOL: long_press
482
379
  // =====================================================
483
380
  server.tool("long_press", "Perform a long press at the specified coordinates (useful for context menus)", {
484
- x: z.number().describe("X coordinate"),
485
- y: z.number().describe("Y coordinate"),
486
- duration: z.number().optional().describe("Duration in milliseconds (default: 1000)"),
381
+ x: coordinateSchema.describe("X coordinate"),
382
+ y: coordinateSchema.describe("Y coordinate"),
383
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 1000)"),
487
384
  }, async ({ x, y, duration = 1000 }) => {
488
- // Long press is simulated with a swipe to the same position
489
- await shell(`input swipe ${x} ${y} ${x} ${y} ${duration}`);
490
- return {
491
- content: [
492
- {
493
- type: "text",
494
- text: `Long pressed at (${x}, ${y}) for ${duration}ms`,
495
- },
496
- ],
497
- };
385
+ await runAdbShell([
386
+ "input", "swipe",
387
+ String(x), String(y), String(x), String(y), String(duration),
388
+ ]);
389
+ return { content: [{ type: "text", text: `Long pressed at (${x}, ${y}) for ${duration}ms` }] };
498
390
  });
499
391
  // =====================================================
500
392
  // TOOL: clear_input
501
393
  // =====================================================
502
394
  server.tool("clear_input", "Clear the currently focused text input field", {
503
- maxChars: z.number().optional().describe("Maximum characters to delete (default: 100)"),
395
+ maxChars: z.number().int().min(1).max(10_000).optional().describe("Maximum characters to delete (default: 100)"),
504
396
  }, async ({ maxChars = 100 }) => {
505
- // Move cursor to end, then delete all characters
506
- // KEYCODE_MOVE_END = 123, KEYCODE_DEL = 67
507
- await shell("input keyevent 123"); // Move to end
508
- // Delete characters one by one
397
+ await runAdbShell(["input", "keyevent", "123"]); // MOVE_END
509
398
  for (let i = 0; i < maxChars; i++) {
510
- await shell("input keyevent 67"); // Delete
399
+ await runAdbShell(["input", "keyevent", "67"]); // DEL
511
400
  }
512
- return {
513
- content: [
514
- {
515
- type: "text",
516
- text: `Cleared input field (deleted up to ${maxChars} characters)`,
517
- },
518
- ],
519
- };
401
+ return { content: [{ type: "text", text: `Cleared input field (deleted up to ${maxChars} characters)` }] };
520
402
  });
521
403
  // =====================================================
522
404
  // TOOL: select_all
523
405
  // =====================================================
524
406
  server.tool("select_all", "Select all text in the currently focused input field", {}, async () => {
525
- // CTRL+A = KEYCODE_CTRL_LEFT (113) + KEYCODE_A (29)
526
- // Using input keyevent with --longpress for modifier keys
527
- await shell("input keyevent --longpress 113 29");
528
- return {
529
- content: [
530
- {
531
- type: "text",
532
- text: "Selected all text in focused field",
533
- },
534
- ],
535
- };
407
+ // CTRL+A = KEYCODE_CTRL_LEFT (113) + KEYCODE_A (29) via --longpress combo
408
+ await runAdbShell(["input", "keyevent", "--longpress", "113", "29"]);
409
+ return { content: [{ type: "text", text: "Selected all text in focused field" }] };
536
410
  });
537
411
  // =====================================================
538
412
  // TOOL: set_text
539
413
  // =====================================================
540
- server.tool("set_text", "Clear the current input field and type new text (combines clear + type)", {
541
- text: z.string().describe("Text to type after clearing"),
542
- maxClearChars: z.number().optional().describe("Maximum characters to clear (default: 100)"),
414
+ server.tool("set_text", "Clear the current input field and type new text. Unicode is supported via URL-encoding. Shell metacharacters are rejected.", {
415
+ text: typeableTextSchema.describe("Text to type after clearing"),
416
+ maxClearChars: z.number().int().min(1).max(10_000).optional().describe("Maximum characters to clear (default: 100)"),
543
417
  }, async ({ text, maxClearChars = 100 }) => {
544
- // First clear the field
545
- await shell("input keyevent 123"); // Move to end
418
+ await runAdbShell(["input", "keyevent", "123"]);
546
419
  for (let i = 0; i < maxClearChars; i++) {
547
- await shell("input keyevent 67"); // Delete
420
+ await runAdbShell(["input", "keyevent", "67"]);
548
421
  }
549
- // Then type new text
550
- const escaped = text.replace(/ /g, "%s").replace(/'/g, "\\'");
551
- await shell(`input text "${escaped}"`);
552
- return {
553
- content: [
554
- {
555
- type: "text",
556
- text: `Cleared field and typed: "${text}"`,
557
- },
558
- ],
559
- };
422
+ await runAdbShell(["input", "text", encodeTextForInput(text)]);
423
+ return { content: [{ type: "text", text: `Cleared field and typed: "${text}"` }] };
560
424
  });
561
425
  // =====================================================
562
426
  // TOOL: drag
563
427
  // =====================================================
564
428
  server.tool("drag", "Perform a drag gesture from one point to another (slower than swipe, for drag & drop)", {
565
- x1: z.number().describe("Starting X coordinate"),
566
- y1: z.number().describe("Starting Y coordinate"),
567
- x2: z.number().describe("Ending X coordinate"),
568
- y2: z.number().describe("Ending Y coordinate"),
569
- duration: z.number().optional().describe("Duration in milliseconds (default: 1000)"),
429
+ x1: coordinateSchema.describe("Starting X coordinate"),
430
+ y1: coordinateSchema.describe("Starting Y coordinate"),
431
+ x2: coordinateSchema.describe("Ending X coordinate"),
432
+ y2: coordinateSchema.describe("Ending Y coordinate"),
433
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 1000)"),
570
434
  }, async ({ x1, y1, x2, y2, duration = 1000 }) => {
571
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} ${duration}`);
435
+ await runAdbShell([
436
+ "input", "swipe",
437
+ String(x1), String(y1), String(x2), String(y2), String(duration),
438
+ ]);
572
439
  return {
573
- content: [
574
- {
575
- type: "text",
576
- text: `Dragged from (${x1}, ${y1}) to (${x2}, ${y2}) over ${duration}ms`,
577
- },
578
- ],
440
+ content: [{ type: "text", text: `Dragged from (${x1}, ${y1}) to (${x2}, ${y2}) over ${duration}ms` }],
579
441
  };
580
442
  });
581
443
  // =====================================================
582
444
  // TOOL: double_tap
583
445
  // =====================================================
584
446
  server.tool("double_tap", "Perform a double tap at the specified coordinates", {
585
- x: z.number().describe("X coordinate"),
586
- y: z.number().describe("Y coordinate"),
447
+ x: coordinateSchema.describe("X coordinate"),
448
+ y: coordinateSchema.describe("Y coordinate"),
587
449
  }, async ({ x, y }) => {
588
- await shell(`input tap ${x} ${y}`);
450
+ await runAdbShell(["input", "tap", String(x), String(y)]);
589
451
  await new Promise((resolve) => setTimeout(resolve, 100));
590
- await shell(`input tap ${x} ${y}`);
591
- return {
592
- content: [
593
- {
594
- type: "text",
595
- text: `Double tapped at (${x}, ${y})`,
596
- },
597
- ],
598
- };
452
+ await runAdbShell(["input", "tap", String(x), String(y)]);
453
+ return { content: [{ type: "text", text: `Double tapped at (${x}, ${y})` }] };
599
454
  });
600
455
  // =====================================================
601
456
  // TOOL: get_screen_size
602
457
  // =====================================================
603
458
  server.tool("get_screen_size", "Get the screen dimensions and density of the device", {}, async () => {
604
459
  const [sizeOutput, densityOutput] = await Promise.all([
605
- shell("wm size"),
606
- shell("wm density"),
460
+ runAdbShell(["wm", "size"]),
461
+ runAdbShell(["wm", "density"]),
607
462
  ]);
608
463
  const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
609
464
  const densityMatch = densityOutput.match(/(\d+)/);
610
465
  const width = sizeMatch ? parseInt(sizeMatch[1]) : 0;
611
466
  const height = sizeMatch ? parseInt(sizeMatch[2]) : 0;
612
467
  const density = densityMatch ? parseInt(densityMatch[1]) : 0;
613
- return {
614
- content: [
615
- {
616
- type: "text",
617
- text: JSON.stringify({ width, height, density }, null, 2),
618
- },
619
- ],
620
- };
468
+ return { content: [{ type: "text", text: JSON.stringify({ width, height, density }, null, 2) }] };
621
469
  });
622
470
  // =====================================================
623
471
  // TOOL: is_element_visible
624
472
  // =====================================================
625
473
  server.tool("is_element_visible", "Check if an element with specific text or resource-id is visible on screen", {
626
- text: z.string().optional().describe("Text to search for"),
627
- resourceId: z.string().optional().describe("Resource ID to search for"),
474
+ text: freeTextSchema.optional().describe("Text to search for"),
475
+ resourceId: resourceIdSchema.optional().describe("Resource ID to search for"),
628
476
  }, async ({ text, resourceId }) => {
629
477
  if (!text && !resourceId) {
630
478
  return {
631
- content: [
632
- {
633
- type: "text",
634
- text: JSON.stringify({ visible: false, error: "Must provide text or resourceId" }),
635
- },
636
- ],
479
+ content: [{ type: "text", text: JSON.stringify({ visible: false, error: "Must provide text or resourceId" }) }],
637
480
  };
638
481
  }
639
- await shell("uiautomator dump /sdcard/ui_dump.xml");
640
- const xml = await shell("cat /sdcard/ui_dump.xml");
482
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
483
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
641
484
  let found = false;
642
485
  let bounds = null;
643
486
  if (text) {
@@ -648,100 +491,71 @@ server.tool("is_element_visible", "Check if an element with specific text or res
648
491
  found = true;
649
492
  const [, x1, y1, x2, y2] = match;
650
493
  bounds = {
651
- x: parseInt(x1),
652
- y: parseInt(y1),
653
- width: parseInt(x2) - parseInt(x1),
654
- height: parseInt(y2) - parseInt(y1),
494
+ x: parseInt(x1), y: parseInt(y1),
495
+ width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1),
655
496
  centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
656
497
  centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
657
498
  };
658
499
  }
659
500
  }
660
501
  if (resourceId && !found) {
661
- const regex = new RegExp(`resource-id="${resourceId}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`, "i");
502
+ const regex = new RegExp(`resource-id="${resourceId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`, "i");
662
503
  const match = regex.exec(xml);
663
504
  if (match) {
664
505
  found = true;
665
506
  const [, x1, y1, x2, y2] = match;
666
507
  bounds = {
667
- x: parseInt(x1),
668
- y: parseInt(y1),
669
- width: parseInt(x2) - parseInt(x1),
670
- height: parseInt(y2) - parseInt(y1),
508
+ x: parseInt(x1), y: parseInt(y1),
509
+ width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1),
671
510
  centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
672
511
  centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
673
512
  };
674
513
  }
675
514
  }
676
- return {
677
- content: [
678
- {
679
- type: "text",
680
- text: JSON.stringify({ visible: found, bounds }, null, 2),
681
- },
682
- ],
683
- };
515
+ return { content: [{ type: "text", text: JSON.stringify({ visible: found, bounds }, null, 2) }] };
684
516
  });
685
517
  // =====================================================
686
518
  // TOOL: get_element_bounds
687
519
  // =====================================================
688
520
  server.tool("get_element_bounds", "Get the exact bounds and center coordinates of an element", {
689
- text: z.string().optional().describe("Text of the element"),
690
- resourceId: z.string().optional().describe("Resource ID of the element"),
691
- index: z.number().optional().describe("Index if multiple matches (0-based, default: 0)"),
521
+ text: freeTextSchema.optional().describe("Text of the element"),
522
+ resourceId: resourceIdSchema.optional().describe("Resource ID of the element"),
523
+ index: z.number().int().min(0).max(10_000).optional().describe("Index if multiple matches (0-based, default: 0)"),
692
524
  }, async ({ text, resourceId, index = 0 }) => {
693
525
  if (!text && !resourceId) {
694
- return {
695
- content: [
696
- {
697
- type: "text",
698
- text: JSON.stringify({ error: "Must provide text or resourceId" }),
699
- },
700
- ],
701
- };
526
+ return { content: [{ type: "text", text: JSON.stringify({ error: "Must provide text or resourceId" }) }] };
702
527
  }
703
- await shell("uiautomator dump /sdcard/ui_dump.xml");
704
- const xml = await shell("cat /sdcard/ui_dump.xml");
528
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
529
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
705
530
  let pattern;
706
531
  if (text) {
707
532
  const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
708
533
  pattern = `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
709
534
  }
710
535
  else {
711
- pattern = `resource-id="${resourceId}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
536
+ pattern = `resource-id="${resourceId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
712
537
  }
713
538
  const regex = new RegExp(pattern, "gi");
714
539
  const matches = [];
715
540
  let match;
716
541
  while ((match = regex.exec(xml)) !== null) {
717
542
  matches.push({
718
- x1: parseInt(match[1]),
719
- y1: parseInt(match[2]),
720
- x2: parseInt(match[3]),
721
- y2: parseInt(match[4]),
543
+ x1: parseInt(match[1]), y1: parseInt(match[2]),
544
+ x2: parseInt(match[3]), y2: parseInt(match[4]),
722
545
  });
723
546
  }
724
547
  if (matches.length === 0) {
725
- return {
726
- content: [
727
- {
728
- type: "text",
729
- text: JSON.stringify({ found: false, error: "Element not found" }),
730
- },
731
- ],
732
- };
548
+ return { content: [{ type: "text", text: JSON.stringify({ found: false, error: "Element not found" }) }] };
733
549
  }
734
550
  if (index >= matches.length) {
735
551
  return {
736
- content: [
737
- {
552
+ content: [{
738
553
  type: "text",
739
554
  text: JSON.stringify({
740
555
  found: false,
741
556
  error: `Index ${index} out of range. Found ${matches.length} matches.`,
742
557
  }),
743
- },
744
- ],
558
+ }],
745
559
  };
746
560
  }
747
561
  const m = matches[index];
@@ -749,35 +563,20 @@ server.tool("get_element_bounds", "Get the exact bounds and center coordinates o
749
563
  found: true,
750
564
  matchCount: matches.length,
751
565
  index,
752
- bounds: {
753
- x: m.x1,
754
- y: m.y1,
755
- width: m.x2 - m.x1,
756
- height: m.y2 - m.y1,
757
- },
758
- center: {
759
- x: Math.round((m.x1 + m.x2) / 2),
760
- y: Math.round((m.y1 + m.y2) / 2),
761
- },
762
- };
763
- return {
764
- content: [
765
- {
766
- type: "text",
767
- text: JSON.stringify(result, null, 2),
768
- },
769
- ],
566
+ bounds: { x: m.x1, y: m.y1, width: m.x2 - m.x1, height: m.y2 - m.y1 },
567
+ center: { x: Math.round((m.x1 + m.x2) / 2), y: Math.round((m.y1 + m.y2) / 2) },
770
568
  };
569
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
771
570
  });
772
571
  // =====================================================
773
572
  // TOOL: scroll_to_text
774
573
  // =====================================================
775
574
  server.tool("scroll_to_text", "Scroll the screen until an element with specific text is visible", {
776
- text: z.string().describe("Text to search for"),
575
+ text: freeTextSchema.describe("Text to search for"),
777
576
  direction: z.enum(["up", "down"]).optional().describe("Scroll direction (default: down)"),
778
- maxScrolls: z.number().optional().describe("Maximum scroll attempts (default: 10)"),
577
+ maxScrolls: z.number().int().min(1).max(100).optional().describe("Maximum scroll attempts (default: 10)"),
779
578
  }, async ({ text, direction = "down", maxScrolls = 10 }) => {
780
- const sizeOutput = await shell("wm size");
579
+ const sizeOutput = await runAdbShell(["wm", "size"]);
781
580
  const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
782
581
  const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
783
582
  const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
@@ -785,45 +584,28 @@ server.tool("scroll_to_text", "Scroll the screen until an element with specific
785
584
  const startY = direction === "down" ? Math.round(height * 0.7) : Math.round(height * 0.3);
786
585
  const endY = direction === "down" ? Math.round(height * 0.3) : Math.round(height * 0.7);
787
586
  for (let i = 0; i < maxScrolls; i++) {
788
- await shell("uiautomator dump /sdcard/ui_dump.xml");
789
- const xml = await shell("cat /sdcard/ui_dump.xml");
587
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
588
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
790
589
  if (xml.toLowerCase().includes(text.toLowerCase())) {
791
- return {
792
- content: [
793
- {
794
- type: "text",
795
- text: `Found "${text}" after ${i} scroll(s)`,
796
- },
797
- ],
798
- };
590
+ return { content: [{ type: "text", text: `Found "${text}" after ${i} scroll(s)` }] };
799
591
  }
800
- await shell(`input swipe ${centerX} ${startY} ${centerX} ${endY} 300`);
592
+ await runAdbShell([
593
+ "input", "swipe",
594
+ String(centerX), String(startY), String(centerX), String(endY), "300",
595
+ ]);
801
596
  await new Promise((resolve) => setTimeout(resolve, 500));
802
597
  }
803
- return {
804
- content: [
805
- {
806
- type: "text",
807
- text: `Text "${text}" not found after ${maxScrolls} scrolls`,
808
- },
809
- ],
810
- };
598
+ return { content: [{ type: "text", text: `Text "${text}" not found after ${maxScrolls} scrolls` }] };
811
599
  });
812
600
  // =====================================================
813
601
  // TOOL: wait_for_ui_stable
814
602
  // =====================================================
815
- /**
816
- * Extract a normalized fingerprint of UI elements from XML
817
- * Only considers text, bounds, and class - ignores dynamic attributes
818
- */
819
603
  function extractUIFingerprint(xml) {
820
604
  const elements = [];
821
- // Match elements with text or class and bounds
822
605
  const regex = /(?:text="([^"]*)")?[^>]*(?:class="([^"]*)")?[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
823
606
  let match;
824
607
  while ((match = regex.exec(xml)) !== null) {
825
608
  const [, text, className, x1, y1, x2, y2] = match;
826
- // Only include elements with text or meaningful classes
827
609
  if (text || className) {
828
610
  elements.push(`${text || ""}|${className || ""}|${x1},${y1},${x2},${y2}`);
829
611
  }
@@ -831,27 +613,25 @@ function extractUIFingerprint(xml) {
831
613
  return elements.sort().join("\n");
832
614
  }
833
615
  server.tool("wait_for_ui_stable", "Wait for the UI to stop changing (useful after animations)", {
834
- timeout: z.number().optional().describe("Timeout in milliseconds (default: 5000)"),
835
- checkInterval: z.number().optional().describe("Check interval in milliseconds (default: 500)"),
616
+ timeout: z.number().int().min(100).max(600_000).optional().describe("Timeout in milliseconds (default: 5000)"),
617
+ checkInterval: z.number().int().min(50).max(10_000).optional().describe("Check interval in milliseconds (default: 500)"),
836
618
  }, async ({ timeout = 5000, checkInterval = 500 }) => {
837
619
  const startTime = Date.now();
838
620
  let lastFingerprint = "";
839
621
  let stableCount = 0;
840
622
  while (Date.now() - startTime < timeout) {
841
- await shell("uiautomator dump /sdcard/ui_dump.xml");
842
- const currentXml = await shell("cat /sdcard/ui_dump.xml");
623
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
624
+ const currentXml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
843
625
  const currentFingerprint = extractUIFingerprint(currentXml);
844
626
  if (currentFingerprint === lastFingerprint) {
845
627
  stableCount++;
846
628
  if (stableCount >= 2) {
847
629
  const elapsed = Date.now() - startTime;
848
630
  return {
849
- content: [
850
- {
631
+ content: [{
851
632
  type: "text",
852
633
  text: `UI stable after ${elapsed < 1000 ? elapsed + "ms" : Math.round(elapsed / 1000) + "s"}`,
853
- },
854
- ],
634
+ }],
855
635
  };
856
636
  }
857
637
  }
@@ -861,183 +641,151 @@ server.tool("wait_for_ui_stable", "Wait for the UI to stop changing (useful afte
861
641
  }
862
642
  await new Promise((resolve) => setTimeout(resolve, checkInterval));
863
643
  }
864
- return {
865
- content: [
866
- {
867
- type: "text",
868
- text: `Timeout: UI did not stabilize within ${timeout}ms`,
869
- },
870
- ],
871
- };
644
+ return { content: [{ type: "text", text: `Timeout: UI did not stabilize within ${timeout}ms` }] };
872
645
  });
873
646
  // =====================================================
874
647
  // TOOL: wait_for_element_gone
875
648
  // =====================================================
876
649
  server.tool("wait_for_element_gone", "Wait for an element to disappear from the screen", {
877
- text: z.string().describe("Text of the element to wait for disappearance"),
878
- timeout: z.number().optional().describe("Timeout in milliseconds (default: 10000)"),
879
- }, async ({ text, timeout = 10000 }) => {
650
+ text: freeTextSchema.describe("Text of the element to wait for disappearance"),
651
+ timeout: z.number().int().min(100).max(600_000).optional().describe("Timeout in milliseconds (default: 10000)"),
652
+ }, async ({ text, timeout = 10_000 }) => {
880
653
  const startTime = Date.now();
881
654
  while (Date.now() - startTime < timeout) {
882
- await shell("uiautomator dump /sdcard/ui_dump.xml");
883
- const xml = await shell("cat /sdcard/ui_dump.xml");
655
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
656
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
884
657
  if (!xml.toLowerCase().includes(text.toLowerCase())) {
885
658
  return {
886
- content: [
887
- {
659
+ content: [{
888
660
  type: "text",
889
661
  text: `Element "${text}" disappeared after ${Math.round((Date.now() - startTime) / 1000)}s`,
890
- },
891
- ],
662
+ }],
892
663
  };
893
664
  }
894
665
  await new Promise((resolve) => setTimeout(resolve, 500));
895
666
  }
896
- return {
897
- content: [
898
- {
899
- type: "text",
900
- text: `Timeout: Element "${text}" still visible after ${timeout}ms`,
901
- },
902
- ],
903
- };
667
+ return { content: [{ type: "text", text: `Timeout: Element "${text}" still visible after ${timeout}ms` }] };
904
668
  });
905
669
  // =====================================================
906
670
  // TOOL: multi_tap
907
671
  // =====================================================
908
672
  server.tool("multi_tap", "Perform multiple rapid taps at the same position", {
909
- x: z.number().describe("X coordinate"),
910
- y: z.number().describe("Y coordinate"),
911
- taps: z.number().optional().describe("Number of taps (default: 2)"),
912
- interval: z.number().optional().describe("Interval between taps in ms (default: 100)"),
673
+ x: coordinateSchema.describe("X coordinate"),
674
+ y: coordinateSchema.describe("Y coordinate"),
675
+ taps: z.number().int().min(1).max(100).optional().describe("Number of taps (default: 2)"),
676
+ interval: durationMsSchema.optional().describe("Interval between taps in ms (default: 100)"),
913
677
  }, async ({ x, y, taps = 2, interval = 100 }) => {
914
678
  for (let i = 0; i < taps; i++) {
915
- await shell(`input tap ${x} ${y}`);
679
+ await runAdbShell(["input", "tap", String(x), String(y)]);
916
680
  if (i < taps - 1) {
917
681
  await new Promise((resolve) => setTimeout(resolve, interval));
918
682
  }
919
683
  }
920
- return {
921
- content: [
922
- {
923
- type: "text",
924
- text: `Performed ${taps} taps at (${x}, ${y})`,
925
- },
926
- ],
927
- };
684
+ return { content: [{ type: "text", text: `Performed ${taps} taps at (${x}, ${y})` }] };
928
685
  });
929
686
  // =====================================================
930
687
  // TOOL: pinch_zoom
931
688
  // =====================================================
932
689
  server.tool("pinch_zoom", "Perform a pinch zoom gesture (requires Android 8+)", {
933
- x: z.number().describe("Center X coordinate"),
934
- y: z.number().describe("Center Y coordinate"),
935
- scale: z.number().describe("Scale factor (>1 zoom in, <1 zoom out)"),
936
- duration: z.number().optional().describe("Duration in milliseconds (default: 500)"),
690
+ x: coordinateSchema.describe("Center X coordinate"),
691
+ y: coordinateSchema.describe("Center Y coordinate"),
692
+ scale: z.number().min(0.1).max(10).describe("Scale factor (>1 zoom in, <1 zoom out)"),
693
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 500)"),
937
694
  }, async ({ x, y, scale, duration = 500 }) => {
938
- // Pinch zoom simulation using two swipe gestures
939
- // This is a simplified approach - real multitouch requires instrumentation
940
695
  const distance = 200;
941
696
  const scaledDistance = Math.round(distance * scale);
942
697
  if (scale > 1) {
943
- // Zoom in: fingers move apart
944
- // Simulate with two sequential swipes from center outward
945
698
  const halfDist = Math.round(scaledDistance / 2);
946
- await shell(`input swipe ${x} ${y - 50} ${x} ${y - halfDist} ${duration}`);
947
- await shell(`input swipe ${x} ${y + 50} ${x} ${y + halfDist} ${duration}`);
699
+ await runAdbShell([
700
+ "input", "swipe",
701
+ String(x), String(y - 50), String(x), String(y - halfDist), String(duration),
702
+ ]);
703
+ await runAdbShell([
704
+ "input", "swipe",
705
+ String(x), String(y + 50), String(x), String(y + halfDist), String(duration),
706
+ ]);
948
707
  }
949
708
  else {
950
- // Zoom out: fingers move together
951
709
  const halfDist = Math.round(distance / 2);
952
710
  const targetDist = Math.round((distance * scale) / 2);
953
- await shell(`input swipe ${x} ${y - halfDist} ${x} ${y - targetDist} ${duration}`);
954
- await shell(`input swipe ${x} ${y + halfDist} ${x} ${y + targetDist} ${duration}`);
711
+ await runAdbShell([
712
+ "input", "swipe",
713
+ String(x), String(y - halfDist), String(x), String(y - targetDist), String(duration),
714
+ ]);
715
+ await runAdbShell([
716
+ "input", "swipe",
717
+ String(x), String(y + halfDist), String(x), String(y + targetDist), String(duration),
718
+ ]);
955
719
  }
956
720
  return {
957
- content: [
958
- {
721
+ content: [{
959
722
  type: "text",
960
723
  text: `Pinch zoom at (${x}, ${y}) with scale ${scale}. Note: True multitouch requires instrumentation.`,
961
- },
962
- ],
724
+ }],
963
725
  };
964
726
  });
965
727
  // =====================================================
966
728
  // TOOL: set_clipboard
967
729
  // =====================================================
968
- server.tool("set_clipboard", "Set text to the device clipboard", {
969
- text: z.string().describe("Text to copy to clipboard"),
730
+ server.tool("set_clipboard", "Set text to the device clipboard. Text is transferred via `adb push` (binary transfer, no shell involvement, full Unicode support).", {
731
+ text: freeTextSchema.describe("Text to copy to clipboard"),
970
732
  }, async ({ text }) => {
971
- const base64Text = Buffer.from(text).toString("base64");
972
- // Try multiple paths for compatibility (standard emulators vs Redroid/Docker)
973
733
  const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
734
+ const tmpLocal = path.join(os.tmpdir(), `mcp-clipboard-${process.pid}-${Date.now()}.txt`);
735
+ fs.writeFileSync(tmpLocal, text, "utf8");
974
736
  let success = false;
975
- for (const clipPath of paths) {
976
- try {
977
- // Use single quotes to ensure the entire command runs on device (pipe included)
978
- await shell(`'echo "${base64Text}" | base64 -d > ${clipPath}'`);
979
- // Verify write succeeded
980
- const verify = await shell(`cat ${clipPath} 2>/dev/null`);
981
- if (verify && verify.length > 0) {
982
- success = true;
983
- break;
737
+ let usedPath = "";
738
+ try {
739
+ for (const clipPath of paths) {
740
+ try {
741
+ await runAdb(["push", tmpLocal, clipPath]);
742
+ // Verificar con cat (ruta fija, sin input de LLM)
743
+ const verify = await runAdbShell(["cat", clipPath]);
744
+ if (verify && verify.length > 0) {
745
+ success = true;
746
+ usedPath = clipPath;
747
+ break;
748
+ }
984
749
  }
750
+ catch { /* try next path */ }
985
751
  }
986
- catch {
987
- // Try next path
752
+ }
753
+ finally {
754
+ try {
755
+ fs.unlinkSync(tmpLocal);
988
756
  }
757
+ catch { /* ignore */ }
989
758
  }
990
759
  if (!success) {
991
760
  return {
992
- content: [
993
- {
761
+ content: [{
994
762
  type: "text",
995
763
  text: `Error: Could not write clipboard. Tried paths: ${paths.join(", ")}`,
996
- },
997
- ],
764
+ }],
998
765
  };
999
766
  }
1000
767
  return {
1001
- content: [
1002
- {
768
+ content: [{
1003
769
  type: "text",
1004
- text: `Clipboard set to: "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}"`,
1005
- },
1006
- ],
770
+ text: `Clipboard set to: "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}" (stored at ${usedPath})`,
771
+ }],
1007
772
  };
1008
773
  });
1009
774
  // =====================================================
1010
775
  // TOOL: get_clipboard
1011
776
  // =====================================================
1012
777
  server.tool("get_clipboard", "Get the current device clipboard content", {}, async () => {
1013
- // Try multiple paths for compatibility (standard emulators vs Redroid/Docker)
1014
778
  const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
1015
779
  for (const clipPath of paths) {
1016
780
  try {
1017
- const content = await shell(`cat ${clipPath} 2>/dev/null`);
781
+ const content = await runAdbShell(["cat", clipPath]);
1018
782
  if (content && content.trim()) {
1019
- return {
1020
- content: [
1021
- {
1022
- type: "text",
1023
- text: `Clipboard content: "${content}"`,
1024
- },
1025
- ],
1026
- };
783
+ return { content: [{ type: "text", text: `Clipboard content: "${content}"` }] };
1027
784
  }
1028
785
  }
1029
- catch {
1030
- // Try next path
1031
- }
786
+ catch { /* try next */ }
1032
787
  }
1033
- return {
1034
- content: [
1035
- {
1036
- type: "text",
1037
- text: `Clipboard content: ""`,
1038
- },
1039
- ],
1040
- };
788
+ return { content: [{ type: "text", text: `Clipboard content: ""` }] };
1041
789
  });
1042
790
  // =====================================================
1043
791
  // TOOL: rotate_device
@@ -1045,92 +793,65 @@ server.tool("get_clipboard", "Get the current device clipboard content", {}, asy
1045
793
  server.tool("rotate_device", "Rotate the device to portrait or landscape orientation", {
1046
794
  orientation: z.enum(["portrait", "landscape"]).describe("Target orientation"),
1047
795
  }, async ({ orientation }) => {
1048
- // Disable auto-rotation first
1049
- await shell("settings put system accelerometer_rotation 0");
1050
- // Set user rotation (0 = portrait, 1 = landscape)
1051
- const rotation = orientation === "portrait" ? 0 : 1;
1052
- await shell(`settings put system user_rotation ${rotation}`);
1053
- return {
1054
- content: [
1055
- {
1056
- type: "text",
1057
- text: `Device rotated to ${orientation}`,
1058
- },
1059
- ],
1060
- };
796
+ await runAdbShell(["settings", "put", "system", "accelerometer_rotation", "0"]);
797
+ const rotation = orientation === "portrait" ? "0" : "1";
798
+ await runAdbShell(["settings", "put", "system", "user_rotation", rotation]);
799
+ return { content: [{ type: "text", text: `Device rotated to ${orientation}` }] };
1061
800
  });
1062
801
  // =====================================================
1063
802
  // TOOL: tap_safe
1064
803
  // =====================================================
1065
804
  server.tool("tap_safe", "Tap at coordinates while avoiding system navigation bars", {
1066
- x: z.number().describe("X coordinate"),
1067
- y: z.number().describe("Y coordinate"),
805
+ x: coordinateSchema.describe("X coordinate"),
806
+ y: coordinateSchema.describe("Y coordinate"),
1068
807
  avoidStatusBar: z.boolean().optional().describe("Avoid status bar area (default: true)"),
1069
808
  avoidNavBar: z.boolean().optional().describe("Avoid navigation bar area (default: true)"),
1070
809
  }, async ({ x, y, avoidStatusBar = true, avoidNavBar = true }) => {
1071
- // Get screen dimensions
1072
- const sizeOutput = await shell("wm size");
810
+ const sizeOutput = await runAdbShell(["wm", "size"]);
1073
811
  const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1074
812
  const screenWidth = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
1075
813
  const screenHeight = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
1076
- // Typical safe areas (approximate)
1077
- const statusBarHeight = 50; // ~50px for status bar
1078
- const navBarHeight = 120; // ~120px for navigation bar
814
+ const statusBarHeight = 50;
815
+ const navBarHeight = 120;
1079
816
  let safeY = y;
1080
817
  let adjusted = false;
1081
818
  const adjustments = [];
1082
- // Check and adjust for status bar
1083
819
  if (avoidStatusBar && y < statusBarHeight) {
1084
820
  safeY = statusBarHeight + 10;
1085
821
  adjusted = true;
1086
822
  adjustments.push(`status bar (${y} -> ${safeY})`);
1087
823
  }
1088
- // Check and adjust for navigation bar
1089
824
  if (avoidNavBar && y > screenHeight - navBarHeight) {
1090
825
  safeY = screenHeight - navBarHeight - 10;
1091
826
  adjusted = true;
1092
827
  adjustments.push(`nav bar (${y} -> ${safeY})`);
1093
828
  }
1094
- // Ensure X is within bounds
1095
- let safeX = Math.max(10, Math.min(x, screenWidth - 10));
1096
- await shell(`input tap ${safeX} ${safeY}`);
829
+ const safeX = Math.max(10, Math.min(x, screenWidth - 10));
830
+ await runAdbShell(["input", "tap", String(safeX), String(safeY)]);
1097
831
  const message = adjusted
1098
832
  ? `Tapped at (${safeX}, ${safeY}) [adjusted to avoid ${adjustments.join(", ")}]`
1099
833
  : `Tapped at (${safeX}, ${safeY})`;
1100
- return {
1101
- content: [
1102
- {
1103
- type: "text",
1104
- text: message,
1105
- },
1106
- ],
1107
- };
834
+ return { content: [{ type: "text", text: message }] };
1108
835
  });
1109
836
  // =====================================================
1110
837
  // TOOL: tap_element
1111
838
  // =====================================================
1112
839
  server.tool("tap_element", "Find and tap an element by text or resource-id (more reliable than tap_text)", {
1113
- text: z.string().optional().describe("Text to search for"),
1114
- resourceId: z.string().optional().describe("Resource ID to search for"),
1115
- index: z.number().optional().describe("Index if multiple matches (0-based, default: 0)"),
840
+ text: freeTextSchema.optional().describe("Text to search for"),
841
+ resourceId: resourceIdSchema.optional().describe("Resource ID to search for"),
842
+ index: z.number().int().min(0).max(10_000).optional().describe("Index if multiple matches (0-based, default: 0)"),
1116
843
  exact: z.boolean().optional().describe("Exact text match (default: false)"),
1117
844
  }, async ({ text, resourceId, index = 0, exact = false }) => {
1118
845
  if (!text && !resourceId) {
1119
- return {
1120
- content: [
1121
- {
1122
- type: "text",
1123
- text: "Error: Must provide either text or resourceId",
1124
- },
1125
- ],
1126
- };
846
+ return { content: [{ type: "text", text: "Error: Must provide either text or resourceId" }] };
1127
847
  }
1128
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1129
- const xml = await shell("cat /sdcard/ui_dump.xml");
848
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
849
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1130
850
  let pattern;
1131
851
  let searchType;
1132
852
  if (resourceId) {
1133
- pattern = `resource-id="${resourceId}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
853
+ const escId = resourceId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
854
+ pattern = `resource-id="${escId}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1134
855
  searchType = `resource-id="${resourceId}"`;
1135
856
  }
1136
857
  else if (exact) {
@@ -1148,71 +869,46 @@ server.tool("tap_element", "Find and tap an element by text or resource-id (more
1148
869
  let match;
1149
870
  while ((match = regex.exec(xml)) !== null) {
1150
871
  matches.push({
1151
- x1: parseInt(match[1]),
1152
- y1: parseInt(match[2]),
1153
- x2: parseInt(match[3]),
1154
- y2: parseInt(match[4]),
872
+ x1: parseInt(match[1]), y1: parseInt(match[2]),
873
+ x2: parseInt(match[3]), y2: parseInt(match[4]),
1155
874
  });
1156
875
  }
1157
876
  if (matches.length === 0) {
1158
- return {
1159
- content: [
1160
- {
1161
- type: "text",
1162
- text: `Element with ${searchType} not found`,
1163
- },
1164
- ],
1165
- };
877
+ return { content: [{ type: "text", text: `Element with ${searchType} not found` }] };
1166
878
  }
1167
879
  if (index >= matches.length) {
1168
880
  return {
1169
- content: [
1170
- {
1171
- type: "text",
1172
- text: `Index ${index} out of range. Found ${matches.length} matches for ${searchType}`,
1173
- },
1174
- ],
881
+ content: [{ type: "text", text: `Index ${index} out of range. Found ${matches.length} matches for ${searchType}` }],
1175
882
  };
1176
883
  }
1177
884
  const m = matches[index];
1178
885
  const centerX = Math.round((m.x1 + m.x2) / 2);
1179
886
  const centerY = Math.round((m.y1 + m.y2) / 2);
1180
- await shell(`input tap ${centerX} ${centerY}`);
887
+ await runAdbShell(["input", "tap", String(centerX), String(centerY)]);
1181
888
  return {
1182
- content: [
1183
- {
889
+ content: [{
1184
890
  type: "text",
1185
891
  text: `Tapped element with ${searchType} at (${centerX}, ${centerY})${matches.length > 1 ? ` [match ${index + 1}/${matches.length}]` : ""}`,
1186
- },
1187
- ],
892
+ }],
1188
893
  };
1189
894
  });
1190
895
  // =====================================================
1191
896
  // TOOL: get_focused_element
1192
897
  // =====================================================
1193
898
  server.tool("get_focused_element", "Get information about the currently focused UI element", {}, async () => {
1194
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1195
- const xml = await shell("cat /sdcard/ui_dump.xml");
899
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
900
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1196
901
  const focusedRegex = /focused="true"[^>]*text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/;
1197
902
  const match = focusedRegex.exec(xml);
1198
903
  if (!match) {
1199
- // Try alternative pattern
1200
904
  const altRegex = /bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"[^>]*focused="true"[^>]*text="([^"]*)"/;
1201
905
  const altMatch = altRegex.exec(xml);
1202
906
  if (!altMatch) {
1203
- return {
1204
- content: [
1205
- {
1206
- type: "text",
1207
- text: JSON.stringify({ focused: false, element: null }),
1208
- },
1209
- ],
1210
- };
907
+ return { content: [{ type: "text", text: JSON.stringify({ focused: false, element: null }) }] };
1211
908
  }
1212
909
  const [, x1, y1, x2, y2, text] = altMatch;
1213
910
  return {
1214
- content: [
1215
- {
911
+ content: [{
1216
912
  type: "text",
1217
913
  text: JSON.stringify({
1218
914
  focused: true,
@@ -1222,14 +918,12 @@ server.tool("get_focused_element", "Get information about the currently focused
1222
918
  center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1223
919
  },
1224
920
  }, null, 2),
1225
- },
1226
- ],
921
+ }],
1227
922
  };
1228
923
  }
1229
924
  const [, text, x1, y1, x2, y2] = match;
1230
925
  return {
1231
- content: [
1232
- {
926
+ content: [{
1233
927
  type: "text",
1234
928
  text: JSON.stringify({
1235
929
  focused: true,
@@ -1239,37 +933,26 @@ server.tool("get_focused_element", "Get information about the currently focused
1239
933
  center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1240
934
  },
1241
935
  }, null, 2),
1242
- },
1243
- ],
936
+ }],
1244
937
  };
1245
938
  });
1246
939
  // =====================================================
1247
940
  // TOOL: assert_screen_contains
1248
941
  // =====================================================
1249
942
  server.tool("assert_screen_contains", "Assert that specific text is visible on screen (useful for testing)", {
1250
- text: z.string().describe("Text that should be visible"),
943
+ text: freeTextSchema.describe("Text that should be visible"),
1251
944
  exact: z.boolean().optional().describe("Exact match (default: false)"),
1252
945
  }, async ({ text, exact = false }) => {
1253
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1254
- const xml = await shell("cat /sdcard/ui_dump.xml");
1255
- let found;
1256
- if (exact) {
1257
- found = xml.includes(`text="${text}"`);
1258
- }
1259
- else {
1260
- found = xml.toLowerCase().includes(text.toLowerCase());
1261
- }
946
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
947
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
948
+ const found = exact
949
+ ? xml.includes(`text="${text}"`)
950
+ : xml.toLowerCase().includes(text.toLowerCase());
1262
951
  return {
1263
- content: [
1264
- {
952
+ content: [{
1265
953
  type: "text",
1266
- text: JSON.stringify({
1267
- assertion: found ? "PASS" : "FAIL",
1268
- expected: text,
1269
- found,
1270
- }, null, 2),
1271
- },
1272
- ],
954
+ text: JSON.stringify({ assertion: found ? "PASS" : "FAIL", expected: text, found }, null, 2),
955
+ }],
1273
956
  };
1274
957
  });
1275
958
  // =====================================================
@@ -1278,8 +961,8 @@ server.tool("assert_screen_contains", "Assert that specific text is visible on s
1278
961
  server.tool("get_all_text", "Get all visible text elements on screen (useful for debugging and verification)", {
1279
962
  includeEmpty: z.boolean().optional().describe("Include elements with empty text (default: false)"),
1280
963
  }, async ({ includeEmpty = false }) => {
1281
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1282
- const xml = await shell("cat /sdcard/ui_dump.xml");
964
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
965
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1283
966
  const texts = [];
1284
967
  const regex = /text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
1285
968
  let match;
@@ -1293,16 +976,10 @@ server.tool("get_all_text", "Get all visible text elements on screen (useful for
1293
976
  });
1294
977
  }
1295
978
  }
1296
- // Sort by Y position (top to bottom), then X (left to right)
1297
979
  texts.sort((a, b) => a.centerY - b.centerY || a.centerX - b.centerX);
1298
980
  const textList = texts.map((t) => `"${t.text}" at (${t.centerX}, ${t.centerY})`).join("\n");
1299
981
  return {
1300
- content: [
1301
- {
1302
- type: "text",
1303
- text: `Found ${texts.length} text elements:\n${textList}`,
1304
- },
1305
- ],
982
+ content: [{ type: "text", text: `Found ${texts.length} text elements:\n${textList}` }],
1306
983
  };
1307
984
  });
1308
985
  // =====================================================
@@ -1311,19 +988,15 @@ server.tool("get_all_text", "Get all visible text elements on screen (useful for
1311
988
  server.tool("get_clickable_elements", "Get all clickable elements on screen with their text, resource-id, and coordinates (useful when tap_text fails)", {
1312
989
  includeDisabled: z.boolean().optional().describe("Include disabled elements (default: false)"),
1313
990
  }, async ({ includeDisabled = false }) => {
1314
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1315
- const xml = await shell("cat /sdcard/ui_dump.xml");
991
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
992
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1316
993
  const elements = [];
1317
- // Match clickable elements with their attributes
1318
994
  const regex = /<node[^>]*clickable="true"[^>]*>/g;
1319
995
  let nodeMatch;
1320
996
  while ((nodeMatch = regex.exec(xml)) !== null) {
1321
997
  const node = nodeMatch[0];
1322
- // Skip disabled elements unless requested
1323
- if (!includeDisabled && node.includes('enabled="false"')) {
998
+ if (!includeDisabled && node.includes('enabled="false"'))
1324
999
  continue;
1325
- }
1326
- // Extract attributes
1327
1000
  const textMatch = node.match(/text="([^"]*)"/);
1328
1001
  const resourceIdMatch = node.match(/resource-id="([^"]*)"/);
1329
1002
  const classMatch = node.match(/class="([^"]*)"/);
@@ -1336,15 +1009,12 @@ server.tool("get_clickable_elements", "Get all clickable elements on screen with
1336
1009
  text: textMatch ? textMatch[1] : "",
1337
1010
  resourceId: resourceIdMatch ? resourceIdMatch[1] : "",
1338
1011
  className: classMatch ? classMatch[1].split(".").pop() || "" : "",
1339
- centerX,
1340
- centerY,
1012
+ centerX, centerY,
1341
1013
  bounds: `[${x1},${y1}][${x2},${y2}]`,
1342
1014
  });
1343
1015
  }
1344
1016
  }
1345
- // Sort by Y position (top to bottom), then X (left to right)
1346
1017
  elements.sort((a, b) => a.centerY - b.centerY || a.centerX - b.centerX);
1347
- // Format output
1348
1018
  const formatted = elements.map((el, i) => {
1349
1019
  const parts = [];
1350
1020
  if (el.text)
@@ -1356,12 +1026,7 @@ server.tool("get_clickable_elements", "Get all clickable elements on screen with
1356
1026
  return `${i + 1}. ${parts.join(" ") || "(no text/id)"} at (${el.centerX}, ${el.centerY})`;
1357
1027
  }).join("\n");
1358
1028
  return {
1359
- content: [
1360
- {
1361
- type: "text",
1362
- text: `Found ${elements.length} clickable elements:\n${formatted}`,
1363
- },
1364
- ],
1029
+ content: [{ type: "text", text: `Found ${elements.length} clickable elements:\n${formatted}` }],
1365
1030
  };
1366
1031
  });
1367
1032
  // =====================================================
@@ -1371,45 +1036,38 @@ server.tool("is_keyboard_visible", "Check if the soft keyboard is currently visi
1371
1036
  let isShowingViaIme = false;
1372
1037
  let hasKeyboardWindow = false;
1373
1038
  let heightMethod = false;
1374
- // Method 1: Check InputMethod visibility via dumpsys
1375
1039
  try {
1376
- const imeDump = await shell("dumpsys input_method | grep mInputShown || true");
1377
- isShowingViaIme = imeDump.includes("mInputShown=true");
1378
- }
1379
- catch {
1380
- // Ignore errors
1040
+ const imeDump = await runAdbShell(["dumpsys", "input_method"]);
1041
+ isShowingViaIme = imeDump
1042
+ .split("\n")
1043
+ .some((l) => /mInputShown=true/.test(l));
1381
1044
  }
1382
- // Method 2: Check if keyboard window is visible
1045
+ catch { /* ignore */ }
1383
1046
  try {
1384
- const windowDump = await shell("dumpsys window windows | grep -i inputmethod || true");
1385
- hasKeyboardWindow = windowDump.toLowerCase().includes("inputmethod") &&
1386
- windowDump.includes("mHasSurface=true");
1387
- }
1388
- catch {
1389
- // Ignore errors
1047
+ const windowDump = await runAdbShell(["dumpsys", "window", "windows"]);
1048
+ hasKeyboardWindow = windowDump
1049
+ .split("\n")
1050
+ .some((l) => /inputmethod/i.test(l) && /mHasSurface=true/.test(l));
1390
1051
  }
1391
- // Method 3: Check visible height vs screen height
1052
+ catch { /* ignore */ }
1392
1053
  try {
1393
- const visibleFrame = await shell("dumpsys window | grep 'mVisibleFrame' || true");
1394
- const sizeOutput = await shell("wm size");
1054
+ const win = await runAdbShell(["dumpsys", "window"]);
1055
+ const sizeOutput = await runAdbShell(["wm", "size"]);
1395
1056
  const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1057
+ const visibleFrame = win.split("\n").find((l) => /mVisibleFrame/.test(l)) || "";
1396
1058
  if (sizeMatch && visibleFrame) {
1397
1059
  const screenHeight = parseInt(sizeMatch[2]);
1398
1060
  const frameMatch = visibleFrame.match(/mVisibleFrame=\[\d+,\d+\]\[\d+,(\d+)\]/);
1399
1061
  if (frameMatch) {
1400
1062
  const visibleHeight = parseInt(frameMatch[1]);
1401
- // If visible area is significantly less than screen, keyboard is likely shown
1402
1063
  heightMethod = visibleHeight < screenHeight * 0.8;
1403
1064
  }
1404
1065
  }
1405
1066
  }
1406
- catch {
1407
- // Ignore height method errors
1408
- }
1067
+ catch { /* ignore */ }
1409
1068
  const isVisible = isShowingViaIme || hasKeyboardWindow || heightMethod;
1410
1069
  return {
1411
- content: [
1412
- {
1070
+ content: [{
1413
1071
  type: "text",
1414
1072
  text: JSON.stringify({
1415
1073
  visible: isVisible,
@@ -1419,79 +1077,63 @@ server.tool("is_keyboard_visible", "Check if the soft keyboard is currently visi
1419
1077
  heightReduced: heightMethod,
1420
1078
  },
1421
1079
  }, null, 2),
1422
- },
1423
- ],
1080
+ }],
1424
1081
  };
1425
1082
  });
1426
1083
  // =====================================================
1427
1084
  // TOOL: get_focused_input_value
1428
1085
  // =====================================================
1429
1086
  server.tool("get_focused_input_value", "Get the current text value of the focused input field", {}, async () => {
1430
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1431
- const xml = await shell("cat /sdcard/ui_dump.xml");
1432
- // Look for focused element that is an input field (EditText or similar)
1433
- // Pattern matches focused="true" along with text attribute
1087
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1088
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1434
1089
  const patterns = [
1435
- // Pattern 1: focused before text
1436
1090
  /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*focused="true"[^>]*text="([^"]*)"/gi,
1437
- // Pattern 2: text before focused
1438
1091
  /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*text="([^"]*)"[^>]*focused="true"/gi,
1439
- // Pattern 3: Generic focused with text
1440
1092
  /focused="true"[^>]*text="([^"]*)"[^>]*class="[^"]*(?:Edit|Input|Text)[^"]*"/gi,
1441
1093
  ];
1442
1094
  for (const pattern of patterns) {
1443
1095
  const match = pattern.exec(xml);
1444
1096
  if (match) {
1445
1097
  return {
1446
- content: [
1447
- {
1098
+ content: [{
1448
1099
  type: "text",
1449
1100
  text: JSON.stringify({
1450
- found: true,
1451
- value: match[1],
1452
- isEmpty: match[1] === "",
1101
+ found: true, value: match[1], isEmpty: match[1] === "",
1453
1102
  }, null, 2),
1454
- },
1455
- ],
1103
+ }],
1456
1104
  };
1457
1105
  }
1458
1106
  }
1459
- // Try broader search for any focused element with text
1460
1107
  const broadPattern = /focused="true"[^>]*text="([^"]*)"|text="([^"]*)"[^>]*focused="true"/gi;
1461
1108
  const broadMatch = broadPattern.exec(xml);
1462
1109
  if (broadMatch) {
1463
1110
  const value = broadMatch[1] || broadMatch[2] || "";
1464
1111
  return {
1465
- content: [
1466
- {
1112
+ content: [{
1467
1113
  type: "text",
1468
1114
  text: JSON.stringify({
1469
- found: true,
1470
- value,
1471
- isEmpty: value === "",
1115
+ found: true, value, isEmpty: value === "",
1472
1116
  note: "Found focused element (may not be an input field)",
1473
1117
  }, null, 2),
1474
- },
1475
- ],
1118
+ }],
1476
1119
  };
1477
1120
  }
1478
1121
  return {
1479
- content: [
1480
- {
1122
+ content: [{
1481
1123
  type: "text",
1482
- text: JSON.stringify({
1483
- found: false,
1484
- value: null,
1485
- error: "No focused input field found",
1486
- }, null, 2),
1487
- },
1488
- ],
1124
+ text: JSON.stringify({ found: false, value: null, error: "No focused input field found" }, null, 2),
1125
+ }],
1489
1126
  };
1490
1127
  });
1128
+ // =====================================================
1491
1129
  // Start server
1130
+ // =====================================================
1492
1131
  async function main() {
1493
1132
  const transport = new StdioServerTransport();
1494
1133
  await server.connect(transport);
1495
1134
  console.error("MCP Android Emulator Server running on stdio");
1496
1135
  }
1497
- main().catch(console.error);
1136
+ main().catch((err) => {
1137
+ console.error(err);
1138
+ process.exit(1);
1139
+ });