mcp-android-emulator 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,1814 +1,1463 @@
1
- #!/usr/bin/env node
2
- /**
3
- * MCP Server for Android Emulator
4
- * Enables AI assistants to interact with Android devices/emulators via ADB
5
- *
6
- * @license MIT
7
- */
8
-
9
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
10
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
11
- import { z } from "zod";
12
- import { execSync, exec } from "child_process";
13
- import { promisify } from "util";
14
- import * as fs from "fs";
15
- import * as path from "path";
16
-
17
- const execAsync = promisify(exec);
18
-
19
- // Configuration
20
- const ADB_PATH = process.env.ADB_PATH || "adb";
21
- const SCREENSHOT_DIR = process.env.SCREENSHOT_DIR || "/tmp/android-screenshots";
22
-
23
- // Create screenshot directory if it doesn't exist
24
- if (!fs.existsSync(SCREENSHOT_DIR)) {
25
- fs.mkdirSync(SCREENSHOT_DIR, { recursive: true });
26
- }
27
-
28
- /**
29
- * Execute an ADB command
30
- */
31
- async function adb(command: string): Promise<string> {
32
- try {
33
- const { stdout } = await execAsync(`${ADB_PATH} ${command}`);
34
- return stdout.trim();
35
- } catch (error: any) {
36
- throw new Error(`ADB Error: ${error.message}`);
37
- }
38
- }
39
-
40
- /**
41
- * Execute a shell command on the device
42
- */
43
- async function shell(command: string): Promise<string> {
44
- return adb(`shell ${command}`);
45
- }
46
-
47
- // Create MCP server
48
- const server = new McpServer({
49
- name: "android-emulator",
50
- version: "1.3.0",
51
- });
52
-
53
- // =====================================================
54
- // TOOL: screenshot
55
- // =====================================================
56
- server.tool(
57
- "screenshot",
58
- "Take a screenshot of the Android device/emulator and return it as a base64 image",
59
- {},
60
- async () => {
61
- const filename = `screenshot_${Date.now()}.png`;
62
- const filepath = path.join(SCREENSHOT_DIR, filename);
63
-
64
- // Capture screenshot
65
- execSync(`${ADB_PATH} exec-out screencap -p > ${filepath}`);
66
-
67
- // Read as base64
68
- const imageBuffer = fs.readFileSync(filepath);
69
- const base64 = imageBuffer.toString("base64");
70
-
71
- // Clean up temp file
72
- fs.unlinkSync(filepath);
73
-
74
- return {
75
- content: [
76
- {
77
- type: "image",
78
- data: base64,
79
- mimeType: "image/png",
80
- },
81
- ],
82
- };
83
- }
84
- );
85
-
86
- // =====================================================
87
- // TOOL: get_ui_tree
88
- // =====================================================
89
- server.tool(
90
- "get_ui_tree",
91
- "Get the UI element tree of the device (like DOM but for Android). Returns clickable elements with their coordinates.",
92
- {},
93
- async () => {
94
- // Dump UI hierarchy
95
- await shell("uiautomator dump /sdcard/ui_dump.xml");
96
- const xml = await shell("cat /sdcard/ui_dump.xml");
97
-
98
- // Parse clickable elements
99
- const elements: string[] = [];
100
- const regex = /text="([^"]*)".*?bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
101
- let match;
102
-
103
- while ((match = regex.exec(xml)) !== null) {
104
- const [, text, x1, y1, x2, y2] = match;
105
- if (text) {
106
- const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
107
- const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
108
- elements.push(`"${text}" at (${centerX}, ${centerY})`);
109
- }
110
- }
111
-
112
- return {
113
- content: [
114
- {
115
- type: "text",
116
- text: `Elements found:\n${elements.join("\n")}\n\nFull XML:\n${xml.substring(0, 5000)}...`,
117
- },
118
- ],
119
- };
120
- }
121
- );
122
-
123
- // =====================================================
124
- // TOOL: tap
125
- // =====================================================
126
- server.tool(
127
- "tap",
128
- "Tap at the specified coordinates on the screen",
129
- {
130
- x: z.number().describe("X coordinate"),
131
- y: z.number().describe("Y coordinate"),
132
- },
133
- async ({ x, y }) => {
134
- await shell(`input tap ${x} ${y}`);
135
- return {
136
- content: [
137
- {
138
- type: "text",
139
- text: `Tapped at (${x}, ${y})`,
140
- },
141
- ],
142
- };
143
- }
144
- );
145
-
146
- // =====================================================
147
- // TOOL: tap_text
148
- // =====================================================
149
- server.tool(
150
- "tap_text",
151
- "Find an element by its text content and tap on it",
152
- {
153
- text: z.string().describe("Text of the element to find and tap"),
154
- exact: z.boolean().optional().describe("If true, match exact text. Default: false (partial match)"),
155
- },
156
- async ({ text, exact = false }) => {
157
- // Dump UI hierarchy
158
- await shell("uiautomator dump /sdcard/ui_dump.xml");
159
- const xml = await shell("cat /sdcard/ui_dump.xml");
160
-
161
- // Build regex based on exact match preference
162
- const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
163
- const pattern = exact
164
- ? `text="${escapedText}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`
165
- : `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
166
-
167
- const regex = new RegExp(pattern, "i");
168
- const match = regex.exec(xml);
169
-
170
- if (!match) {
171
- return {
172
- content: [
173
- {
174
- type: "text",
175
- text: `Element with text "${text}" not found`,
176
- },
177
- ],
178
- };
179
- }
180
-
181
- const [, x1, y1, x2, y2] = match;
182
- const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
183
- const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
184
-
185
- await shell(`input tap ${centerX} ${centerY}`);
186
-
187
- return {
188
- content: [
189
- {
190
- type: "text",
191
- text: `Tapped on "${text}" at (${centerX}, ${centerY})`,
192
- },
193
- ],
194
- };
195
- }
196
- );
197
-
198
- // =====================================================
199
- // TOOL: type_text
200
- // =====================================================
201
- server.tool(
202
- "type_text",
203
- "Type text into the currently focused input field",
204
- {
205
- text: z.string().describe("Text to type"),
206
- },
207
- async ({ text }) => {
208
- // Escape special characters for shell
209
- const escaped = text.replace(/ /g, "%s").replace(/'/g, "\\'");
210
- await shell(`input text "${escaped}"`);
211
-
212
- return {
213
- content: [
214
- {
215
- type: "text",
216
- text: `Typed: "${text}"`,
217
- },
218
- ],
219
- };
220
- }
221
- );
222
-
223
- // =====================================================
224
- // TOOL: swipe
225
- // =====================================================
226
- server.tool(
227
- "swipe",
228
- "Perform a swipe gesture on the screen",
229
- {
230
- x1: z.number().describe("Starting X coordinate"),
231
- y1: z.number().describe("Starting Y coordinate"),
232
- x2: z.number().describe("Ending X coordinate"),
233
- y2: z.number().describe("Ending Y coordinate"),
234
- duration: z.number().optional().describe("Duration in milliseconds (default: 300)"),
235
- },
236
- async ({ x1, y1, x2, y2, duration = 300 }) => {
237
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} ${duration}`);
238
-
239
- return {
240
- content: [
241
- {
242
- type: "text",
243
- text: `Swiped from (${x1}, ${y1}) to (${x2}, ${y2})`,
244
- },
245
- ],
246
- };
247
- }
248
- );
249
-
250
- // =====================================================
251
- // TOOL: scroll
252
- // =====================================================
253
- server.tool(
254
- "scroll",
255
- "Scroll the screen in a direction",
256
- {
257
- direction: z.enum(["up", "down", "left", "right"]).describe("Direction to scroll"),
258
- amount: z.number().optional().describe("Scroll amount in pixels (default: 500)"),
259
- },
260
- async ({ direction, amount = 500 }) => {
261
- // Get screen dimensions for centering the scroll
262
- const sizeOutput = await shell("wm size");
263
- const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
264
- const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
265
- const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
266
-
267
- const centerX = Math.round(width / 2);
268
- const centerY = Math.round(height / 2);
269
-
270
- let x1 = centerX, y1 = centerY, x2 = centerX, y2 = centerY;
271
-
272
- switch (direction) {
273
- case "up":
274
- y1 = centerY + amount / 2;
275
- y2 = centerY - amount / 2;
276
- break;
277
- case "down":
278
- y1 = centerY - amount / 2;
279
- y2 = centerY + amount / 2;
280
- break;
281
- case "left":
282
- x1 = centerX + amount / 2;
283
- x2 = centerX - amount / 2;
284
- break;
285
- case "right":
286
- x1 = centerX - amount / 2;
287
- x2 = centerX + amount / 2;
288
- break;
289
- }
290
-
291
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} 300`);
292
-
293
- return {
294
- content: [
295
- {
296
- type: "text",
297
- text: `Scrolled ${direction}`,
298
- },
299
- ],
300
- };
301
- }
302
- );
303
-
304
- // =====================================================
305
- // TOOL: press_key
306
- // =====================================================
307
- server.tool(
308
- "press_key",
309
- "Press a system key (BACK, HOME, ENTER, etc)",
310
- {
311
- key: z.enum(["BACK", "HOME", "ENTER", "TAB", "DELETE", "MENU", "POWER", "VOLUME_UP", "VOLUME_DOWN"]).describe("Key to press"),
312
- },
313
- async ({ key }) => {
314
- const keycodes: Record<string, number> = {
315
- BACK: 4,
316
- HOME: 3,
317
- ENTER: 66,
318
- TAB: 61,
319
- DELETE: 67,
320
- MENU: 82,
321
- POWER: 26,
322
- VOLUME_UP: 24,
323
- VOLUME_DOWN: 25,
324
- };
325
-
326
- await shell(`input keyevent ${keycodes[key]}`);
327
-
328
- return {
329
- content: [
330
- {
331
- type: "text",
332
- text: `Pressed ${key} key`,
333
- },
334
- ],
335
- };
336
- }
337
- );
338
-
339
- // =====================================================
340
- // TOOL: launch_app
341
- // =====================================================
342
- server.tool(
343
- "launch_app",
344
- "Launch an application by its package name",
345
- {
346
- package: z.string().describe("Package name of the app (e.g., com.android.chrome)"),
347
- },
348
- async ({ package: pkg }) => {
349
- await shell(`monkey -p ${pkg} -c android.intent.category.LAUNCHER 1`);
350
-
351
- return {
352
- content: [
353
- {
354
- type: "text",
355
- text: `Launched ${pkg}`,
356
- },
357
- ],
358
- };
359
- }
360
- );
361
-
362
- // =====================================================
363
- // TOOL: install_apk
364
- // =====================================================
365
- server.tool(
366
- "install_apk",
367
- "Install an APK file on the device",
368
- {
369
- path: z.string().describe("Path to the APK file"),
370
- },
371
- async ({ path: apkPath }) => {
372
- const result = await adb(`install -r ${apkPath}`);
373
-
374
- return {
375
- content: [
376
- {
377
- type: "text",
378
- text: `APK installed: ${result}`,
379
- },
380
- ],
381
- };
382
- }
383
- );
384
-
385
- // =====================================================
386
- // TOOL: list_packages
387
- // =====================================================
388
- server.tool(
389
- "list_packages",
390
- "List installed packages on the device",
391
- {
392
- filter: z.string().optional().describe("Filter packages by name (optional)"),
393
- },
394
- async ({ filter }) => {
395
- let cmd = "pm list packages";
396
- if (filter) {
397
- cmd += ` | grep -i "${filter}"`;
398
- }
399
-
400
- const result = await shell(cmd);
401
- const packages = result.split("\n").map((p) => p.replace("package:", "")).filter(Boolean);
402
-
403
- return {
404
- content: [
405
- {
406
- type: "text",
407
- text: `Installed packages:\n${packages.join("\n")}`,
408
- },
409
- ],
410
- };
411
- }
412
- );
413
-
414
- // =====================================================
415
- // TOOL: get_logs
416
- // =====================================================
417
- server.tool(
418
- "get_logs",
419
- "Get device logs (logcat)",
420
- {
421
- filter: z.string().optional().describe("Filter logs by tag or keyword"),
422
- lines: z.number().optional().describe("Number of lines to retrieve (default: 50)"),
423
- level: z.enum(["V", "D", "I", "W", "E"]).optional().describe("Minimum log level (V=Verbose, D=Debug, I=Info, W=Warn, E=Error)"),
424
- },
425
- async ({ filter, lines = 50, level }) => {
426
- let cmd = `logcat -d -t ${lines}`;
427
- if (level) {
428
- cmd += ` *:${level}`;
429
- }
430
- if (filter) {
431
- cmd += ` | grep -i "${filter}"`;
432
- }
433
-
434
- const logs = await shell(cmd);
435
-
436
- return {
437
- content: [
438
- {
439
- type: "text",
440
- text: `Logs:\n${logs}`,
441
- },
442
- ],
443
- };
444
- }
445
- );
446
-
447
- // =====================================================
448
- // TOOL: device_info
449
- // =====================================================
450
- server.tool(
451
- "device_info",
452
- "Get information about the connected device",
453
- {},
454
- async () => {
455
- const [model, android, sdk, density, size, battery] = await Promise.all([
456
- shell("getprop ro.product.model"),
457
- shell("getprop ro.build.version.release"),
458
- shell("getprop ro.build.version.sdk"),
459
- shell("wm density"),
460
- shell("wm size"),
461
- shell("dumpsys battery | grep level"),
462
- ]);
463
-
464
- return {
465
- content: [
466
- {
467
- type: "text",
468
- text: `Device: ${model}
469
- Android: ${android} (SDK ${sdk})
470
- Screen: ${size.replace("Physical size: ", "")}
471
- Density: ${density.replace("Physical density: ", "")}
472
- Battery: ${battery.replace("level: ", "")}%`,
473
- },
474
- ],
475
- };
476
- }
477
- );
478
-
479
- // =====================================================
480
- // TOOL: clear_app_data
481
- // =====================================================
482
- server.tool(
483
- "clear_app_data",
484
- "Clear all data for an application",
485
- {
486
- package: z.string().describe("Package name of the app"),
487
- },
488
- async ({ package: pkg }) => {
489
- await shell(`pm clear ${pkg}`);
490
-
491
- return {
492
- content: [
493
- {
494
- type: "text",
495
- text: `Data cleared for ${pkg}`,
496
- },
497
- ],
498
- };
499
- }
500
- );
501
-
502
- // =====================================================
503
- // TOOL: force_stop
504
- // =====================================================
505
- server.tool(
506
- "force_stop",
507
- "Force stop an application",
508
- {
509
- package: z.string().describe("Package name of the app"),
510
- },
511
- async ({ package: pkg }) => {
512
- await shell(`am force-stop ${pkg}`);
513
-
514
- return {
515
- content: [
516
- {
517
- type: "text",
518
- text: `Force stopped ${pkg}`,
519
- },
520
- ],
521
- };
522
- }
523
- );
524
-
525
- // =====================================================
526
- // TOOL: get_current_activity
527
- // =====================================================
528
- server.tool(
529
- "get_current_activity",
530
- "Get the currently focused activity/screen",
531
- {},
532
- async () => {
533
- let activity = "Unknown";
534
-
535
- // Try multiple methods for compatibility across emulators
536
- try {
537
- // Method 1: mResumedActivity (standard Android)
538
- const result1 = await shell("dumpsys activity activities | grep -E 'mResumedActivity|mCurrentFocus' || true");
539
- if (result1 && result1.trim()) {
540
- activity = result1.trim();
541
- }
542
- } catch {
543
- // Ignore
544
- }
545
-
546
- if (activity === "Unknown") {
547
- try {
548
- // Method 2: topActivity (alternative)
549
- const result2 = await shell("dumpsys activity top | head -5 || true");
550
- if (result2 && result2.trim()) {
551
- activity = result2.trim();
552
- }
553
- } catch {
554
- // Ignore
555
- }
556
- }
557
-
558
- if (activity === "Unknown") {
559
- try {
560
- // Method 3: window focus (Redroid/Docker compatible)
561
- const result3 = await shell("dumpsys window | grep -E 'mCurrentFocus|mFocusedApp' || true");
562
- if (result3 && result3.trim()) {
563
- activity = result3.trim();
564
- }
565
- } catch {
566
- // Ignore
567
- }
568
- }
569
-
570
- return {
571
- content: [
572
- {
573
- type: "text",
574
- text: `Current activity:\n${activity}`,
575
- },
576
- ],
577
- };
578
- }
579
- );
580
-
581
- // =====================================================
582
- // TOOL: wait_for_element
583
- // =====================================================
584
- server.tool(
585
- "wait_for_element",
586
- "Wait for a UI element with specific text to appear",
587
- {
588
- text: z.string().describe("Text of the element to wait for"),
589
- timeout: z.number().optional().describe("Timeout in seconds (default: 10)"),
590
- },
591
- async ({ text, timeout = 10 }) => {
592
- const startTime = Date.now();
593
- const timeoutMs = timeout * 1000;
594
-
595
- while (Date.now() - startTime < timeoutMs) {
596
- await shell("uiautomator dump /sdcard/ui_dump.xml");
597
- const xml = await shell("cat /sdcard/ui_dump.xml");
598
-
599
- if (xml.toLowerCase().includes(text.toLowerCase())) {
600
- return {
601
- content: [
602
- {
603
- type: "text",
604
- text: `Element "${text}" found after ${Math.round((Date.now() - startTime) / 1000)}s`,
605
- },
606
- ],
607
- };
608
- }
609
-
610
- // Wait 500ms before next check
611
- await new Promise((resolve) => setTimeout(resolve, 500));
612
- }
613
-
614
- return {
615
- content: [
616
- {
617
- type: "text",
618
- text: `Timeout: Element "${text}" not found after ${timeout}s`,
619
- },
620
- ],
621
- };
622
- }
623
- );
624
-
625
- // =====================================================
626
- // TOOL: long_press
627
- // =====================================================
628
- server.tool(
629
- "long_press",
630
- "Perform a long press at the specified coordinates (useful for context menus)",
631
- {
632
- x: z.number().describe("X coordinate"),
633
- y: z.number().describe("Y coordinate"),
634
- duration: z.number().optional().describe("Duration in milliseconds (default: 1000)"),
635
- },
636
- async ({ x, y, duration = 1000 }) => {
637
- // Long press is simulated with a swipe to the same position
638
- await shell(`input swipe ${x} ${y} ${x} ${y} ${duration}`);
639
-
640
- return {
641
- content: [
642
- {
643
- type: "text",
644
- text: `Long pressed at (${x}, ${y}) for ${duration}ms`,
645
- },
646
- ],
647
- };
648
- }
649
- );
650
-
651
- // =====================================================
652
- // TOOL: clear_input
653
- // =====================================================
654
- server.tool(
655
- "clear_input",
656
- "Clear the currently focused text input field",
657
- {
658
- maxChars: z.number().optional().describe("Maximum characters to delete (default: 100)"),
659
- },
660
- async ({ maxChars = 100 }) => {
661
- // Move cursor to end, then delete all characters
662
- // KEYCODE_MOVE_END = 123, KEYCODE_DEL = 67
663
- await shell("input keyevent 123"); // Move to end
664
-
665
- // Delete characters one by one
666
- for (let i = 0; i < maxChars; i++) {
667
- await shell("input keyevent 67"); // Delete
668
- }
669
-
670
- return {
671
- content: [
672
- {
673
- type: "text",
674
- text: `Cleared input field (deleted up to ${maxChars} characters)`,
675
- },
676
- ],
677
- };
678
- }
679
- );
680
-
681
- // =====================================================
682
- // TOOL: select_all
683
- // =====================================================
684
- server.tool(
685
- "select_all",
686
- "Select all text in the currently focused input field",
687
- {},
688
- async () => {
689
- // CTRL+A = KEYCODE_CTRL_LEFT (113) + KEYCODE_A (29)
690
- // Using input keyevent with --longpress for modifier keys
691
- await shell("input keyevent --longpress 113 29");
692
-
693
- return {
694
- content: [
695
- {
696
- type: "text",
697
- text: "Selected all text in focused field",
698
- },
699
- ],
700
- };
701
- }
702
- );
703
-
704
- // =====================================================
705
- // TOOL: set_text
706
- // =====================================================
707
- server.tool(
708
- "set_text",
709
- "Clear the current input field and type new text (combines clear + type)",
710
- {
711
- text: z.string().describe("Text to type after clearing"),
712
- maxClearChars: z.number().optional().describe("Maximum characters to clear (default: 100)"),
713
- },
714
- async ({ text, maxClearChars = 100 }) => {
715
- // First clear the field
716
- await shell("input keyevent 123"); // Move to end
717
- for (let i = 0; i < maxClearChars; i++) {
718
- await shell("input keyevent 67"); // Delete
719
- }
720
-
721
- // Then type new text
722
- const escaped = text.replace(/ /g, "%s").replace(/'/g, "\\'");
723
- await shell(`input text "${escaped}"`);
724
-
725
- return {
726
- content: [
727
- {
728
- type: "text",
729
- text: `Cleared field and typed: "${text}"`,
730
- },
731
- ],
732
- };
733
- }
734
- );
735
-
736
- // =====================================================
737
- // TOOL: drag
738
- // =====================================================
739
- server.tool(
740
- "drag",
741
- "Perform a drag gesture from one point to another (slower than swipe, for drag & drop)",
742
- {
743
- x1: z.number().describe("Starting X coordinate"),
744
- y1: z.number().describe("Starting Y coordinate"),
745
- x2: z.number().describe("Ending X coordinate"),
746
- y2: z.number().describe("Ending Y coordinate"),
747
- duration: z.number().optional().describe("Duration in milliseconds (default: 1000)"),
748
- },
749
- async ({ x1, y1, x2, y2, duration = 1000 }) => {
750
- await shell(`input swipe ${x1} ${y1} ${x2} ${y2} ${duration}`);
751
-
752
- return {
753
- content: [
754
- {
755
- type: "text",
756
- text: `Dragged from (${x1}, ${y1}) to (${x2}, ${y2}) over ${duration}ms`,
757
- },
758
- ],
759
- };
760
- }
761
- );
762
-
763
- // =====================================================
764
- // TOOL: double_tap
765
- // =====================================================
766
- server.tool(
767
- "double_tap",
768
- "Perform a double tap at the specified coordinates",
769
- {
770
- x: z.number().describe("X coordinate"),
771
- y: z.number().describe("Y coordinate"),
772
- },
773
- async ({ x, y }) => {
774
- await shell(`input tap ${x} ${y}`);
775
- await new Promise((resolve) => setTimeout(resolve, 100));
776
- await shell(`input tap ${x} ${y}`);
777
-
778
- return {
779
- content: [
780
- {
781
- type: "text",
782
- text: `Double tapped at (${x}, ${y})`,
783
- },
784
- ],
785
- };
786
- }
787
- );
788
-
789
- // =====================================================
790
- // TOOL: get_screen_size
791
- // =====================================================
792
- server.tool(
793
- "get_screen_size",
794
- "Get the screen dimensions and density of the device",
795
- {},
796
- async () => {
797
- const [sizeOutput, densityOutput] = await Promise.all([
798
- shell("wm size"),
799
- shell("wm density"),
800
- ]);
801
-
802
- const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
803
- const densityMatch = densityOutput.match(/(\d+)/);
804
-
805
- const width = sizeMatch ? parseInt(sizeMatch[1]) : 0;
806
- const height = sizeMatch ? parseInt(sizeMatch[2]) : 0;
807
- const density = densityMatch ? parseInt(densityMatch[1]) : 0;
808
-
809
- return {
810
- content: [
811
- {
812
- type: "text",
813
- text: JSON.stringify({ width, height, density }, null, 2),
814
- },
815
- ],
816
- };
817
- }
818
- );
819
-
820
- // =====================================================
821
- // TOOL: is_element_visible
822
- // =====================================================
823
- server.tool(
824
- "is_element_visible",
825
- "Check if an element with specific text or resource-id is visible on screen",
826
- {
827
- text: z.string().optional().describe("Text to search for"),
828
- resourceId: z.string().optional().describe("Resource ID to search for"),
829
- },
830
- async ({ text, resourceId }) => {
831
- if (!text && !resourceId) {
832
- return {
833
- content: [
834
- {
835
- type: "text",
836
- text: JSON.stringify({ visible: false, error: "Must provide text or resourceId" }),
837
- },
838
- ],
839
- };
840
- }
841
-
842
- await shell("uiautomator dump /sdcard/ui_dump.xml");
843
- const xml = await shell("cat /sdcard/ui_dump.xml");
844
-
845
- let found = false;
846
- let bounds = null;
847
-
848
- if (text) {
849
- const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
850
- const regex = new RegExp(
851
- `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`,
852
- "i"
853
- );
854
- const match = regex.exec(xml);
855
- if (match) {
856
- found = true;
857
- const [, x1, y1, x2, y2] = match;
858
- bounds = {
859
- x: parseInt(x1),
860
- y: parseInt(y1),
861
- width: parseInt(x2) - parseInt(x1),
862
- height: parseInt(y2) - parseInt(y1),
863
- centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
864
- centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
865
- };
866
- }
867
- }
868
-
869
- if (resourceId && !found) {
870
- const regex = new RegExp(
871
- `resource-id="${resourceId}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`,
872
- "i"
873
- );
874
- const match = regex.exec(xml);
875
- if (match) {
876
- found = true;
877
- const [, x1, y1, x2, y2] = match;
878
- bounds = {
879
- x: parseInt(x1),
880
- y: parseInt(y1),
881
- width: parseInt(x2) - parseInt(x1),
882
- height: parseInt(y2) - parseInt(y1),
883
- centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
884
- centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
885
- };
886
- }
887
- }
888
-
889
- return {
890
- content: [
891
- {
892
- type: "text",
893
- text: JSON.stringify({ visible: found, bounds }, null, 2),
894
- },
895
- ],
896
- };
897
- }
898
- );
899
-
900
- // =====================================================
901
- // TOOL: get_element_bounds
902
- // =====================================================
903
- server.tool(
904
- "get_element_bounds",
905
- "Get the exact bounds and center coordinates of an element",
906
- {
907
- text: z.string().optional().describe("Text of the element"),
908
- resourceId: z.string().optional().describe("Resource ID of the element"),
909
- index: z.number().optional().describe("Index if multiple matches (0-based, default: 0)"),
910
- },
911
- async ({ text, resourceId, index = 0 }) => {
912
- if (!text && !resourceId) {
913
- return {
914
- content: [
915
- {
916
- type: "text",
917
- text: JSON.stringify({ error: "Must provide text or resourceId" }),
918
- },
919
- ],
920
- };
921
- }
922
-
923
- await shell("uiautomator dump /sdcard/ui_dump.xml");
924
- const xml = await shell("cat /sdcard/ui_dump.xml");
925
-
926
- let pattern: string;
927
- if (text) {
928
- const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
929
- pattern = `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
930
- } else {
931
- pattern = `resource-id="${resourceId}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
932
- }
933
-
934
- const regex = new RegExp(pattern, "gi");
935
- const matches: Array<{ x1: number; y1: number; x2: number; y2: number }> = [];
936
- let match;
937
-
938
- while ((match = regex.exec(xml)) !== null) {
939
- matches.push({
940
- x1: parseInt(match[1]),
941
- y1: parseInt(match[2]),
942
- x2: parseInt(match[3]),
943
- y2: parseInt(match[4]),
944
- });
945
- }
946
-
947
- if (matches.length === 0) {
948
- return {
949
- content: [
950
- {
951
- type: "text",
952
- text: JSON.stringify({ found: false, error: "Element not found" }),
953
- },
954
- ],
955
- };
956
- }
957
-
958
- if (index >= matches.length) {
959
- return {
960
- content: [
961
- {
962
- type: "text",
963
- text: JSON.stringify({
964
- found: false,
965
- error: `Index ${index} out of range. Found ${matches.length} matches.`,
966
- }),
967
- },
968
- ],
969
- };
970
- }
971
-
972
- const m = matches[index];
973
- const result = {
974
- found: true,
975
- matchCount: matches.length,
976
- index,
977
- bounds: {
978
- x: m.x1,
979
- y: m.y1,
980
- width: m.x2 - m.x1,
981
- height: m.y2 - m.y1,
982
- },
983
- center: {
984
- x: Math.round((m.x1 + m.x2) / 2),
985
- y: Math.round((m.y1 + m.y2) / 2),
986
- },
987
- };
988
-
989
- return {
990
- content: [
991
- {
992
- type: "text",
993
- text: JSON.stringify(result, null, 2),
994
- },
995
- ],
996
- };
997
- }
998
- );
999
-
1000
- // =====================================================
1001
- // TOOL: scroll_to_text
1002
- // =====================================================
1003
- server.tool(
1004
- "scroll_to_text",
1005
- "Scroll the screen until an element with specific text is visible",
1006
- {
1007
- text: z.string().describe("Text to search for"),
1008
- direction: z.enum(["up", "down"]).optional().describe("Scroll direction (default: down)"),
1009
- maxScrolls: z.number().optional().describe("Maximum scroll attempts (default: 10)"),
1010
- },
1011
- async ({ text, direction = "down", maxScrolls = 10 }) => {
1012
- const sizeOutput = await shell("wm size");
1013
- const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1014
- const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
1015
- const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
1016
-
1017
- const centerX = Math.round(width / 2);
1018
- const startY = direction === "down" ? Math.round(height * 0.7) : Math.round(height * 0.3);
1019
- const endY = direction === "down" ? Math.round(height * 0.3) : Math.round(height * 0.7);
1020
-
1021
- for (let i = 0; i < maxScrolls; i++) {
1022
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1023
- const xml = await shell("cat /sdcard/ui_dump.xml");
1024
-
1025
- if (xml.toLowerCase().includes(text.toLowerCase())) {
1026
- return {
1027
- content: [
1028
- {
1029
- type: "text",
1030
- text: `Found "${text}" after ${i} scroll(s)`,
1031
- },
1032
- ],
1033
- };
1034
- }
1035
-
1036
- await shell(`input swipe ${centerX} ${startY} ${centerX} ${endY} 300`);
1037
- await new Promise((resolve) => setTimeout(resolve, 500));
1038
- }
1039
-
1040
- return {
1041
- content: [
1042
- {
1043
- type: "text",
1044
- text: `Text "${text}" not found after ${maxScrolls} scrolls`,
1045
- },
1046
- ],
1047
- };
1048
- }
1049
- );
1050
-
1051
- // =====================================================
1052
- // TOOL: wait_for_ui_stable
1053
- // =====================================================
1054
- /**
1055
- * Extract a normalized fingerprint of UI elements from XML
1056
- * Only considers text, bounds, and class - ignores dynamic attributes
1057
- */
1058
- function extractUIFingerprint(xml: string): string {
1059
- const elements: string[] = [];
1060
- // Match elements with text or class and bounds
1061
- const regex = /(?:text="([^"]*)")?[^>]*(?:class="([^"]*)")?[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
1062
- let match;
1063
-
1064
- while ((match = regex.exec(xml)) !== null) {
1065
- const [, text, className, x1, y1, x2, y2] = match;
1066
- // Only include elements with text or meaningful classes
1067
- if (text || className) {
1068
- elements.push(`${text || ""}|${className || ""}|${x1},${y1},${x2},${y2}`);
1069
- }
1070
- }
1071
-
1072
- return elements.sort().join("\n");
1073
- }
1074
-
1075
- server.tool(
1076
- "wait_for_ui_stable",
1077
- "Wait for the UI to stop changing (useful after animations)",
1078
- {
1079
- timeout: z.number().optional().describe("Timeout in milliseconds (default: 5000)"),
1080
- checkInterval: z.number().optional().describe("Check interval in milliseconds (default: 500)"),
1081
- },
1082
- async ({ timeout = 5000, checkInterval = 500 }) => {
1083
- const startTime = Date.now();
1084
- let lastFingerprint = "";
1085
- let stableCount = 0;
1086
-
1087
- while (Date.now() - startTime < timeout) {
1088
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1089
- const currentXml = await shell("cat /sdcard/ui_dump.xml");
1090
- const currentFingerprint = extractUIFingerprint(currentXml);
1091
-
1092
- if (currentFingerprint === lastFingerprint) {
1093
- stableCount++;
1094
- if (stableCount >= 2) {
1095
- const elapsed = Date.now() - startTime;
1096
- return {
1097
- content: [
1098
- {
1099
- type: "text",
1100
- text: `UI stable after ${elapsed < 1000 ? elapsed + "ms" : Math.round(elapsed / 1000) + "s"}`,
1101
- },
1102
- ],
1103
- };
1104
- }
1105
- } else {
1106
- stableCount = 0;
1107
- lastFingerprint = currentFingerprint;
1108
- }
1109
-
1110
- await new Promise((resolve) => setTimeout(resolve, checkInterval));
1111
- }
1112
-
1113
- return {
1114
- content: [
1115
- {
1116
- type: "text",
1117
- text: `Timeout: UI did not stabilize within ${timeout}ms`,
1118
- },
1119
- ],
1120
- };
1121
- }
1122
- );
1123
-
1124
- // =====================================================
1125
- // TOOL: wait_for_element_gone
1126
- // =====================================================
1127
- server.tool(
1128
- "wait_for_element_gone",
1129
- "Wait for an element to disappear from the screen",
1130
- {
1131
- text: z.string().describe("Text of the element to wait for disappearance"),
1132
- timeout: z.number().optional().describe("Timeout in milliseconds (default: 10000)"),
1133
- },
1134
- async ({ text, timeout = 10000 }) => {
1135
- const startTime = Date.now();
1136
-
1137
- while (Date.now() - startTime < timeout) {
1138
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1139
- const xml = await shell("cat /sdcard/ui_dump.xml");
1140
-
1141
- if (!xml.toLowerCase().includes(text.toLowerCase())) {
1142
- return {
1143
- content: [
1144
- {
1145
- type: "text",
1146
- text: `Element "${text}" disappeared after ${Math.round((Date.now() - startTime) / 1000)}s`,
1147
- },
1148
- ],
1149
- };
1150
- }
1151
-
1152
- await new Promise((resolve) => setTimeout(resolve, 500));
1153
- }
1154
-
1155
- return {
1156
- content: [
1157
- {
1158
- type: "text",
1159
- text: `Timeout: Element "${text}" still visible after ${timeout}ms`,
1160
- },
1161
- ],
1162
- };
1163
- }
1164
- );
1165
-
1166
- // =====================================================
1167
- // TOOL: multi_tap
1168
- // =====================================================
1169
- server.tool(
1170
- "multi_tap",
1171
- "Perform multiple rapid taps at the same position",
1172
- {
1173
- x: z.number().describe("X coordinate"),
1174
- y: z.number().describe("Y coordinate"),
1175
- taps: z.number().optional().describe("Number of taps (default: 2)"),
1176
- interval: z.number().optional().describe("Interval between taps in ms (default: 100)"),
1177
- },
1178
- async ({ x, y, taps = 2, interval = 100 }) => {
1179
- for (let i = 0; i < taps; i++) {
1180
- await shell(`input tap ${x} ${y}`);
1181
- if (i < taps - 1) {
1182
- await new Promise((resolve) => setTimeout(resolve, interval));
1183
- }
1184
- }
1185
-
1186
- return {
1187
- content: [
1188
- {
1189
- type: "text",
1190
- text: `Performed ${taps} taps at (${x}, ${y})`,
1191
- },
1192
- ],
1193
- };
1194
- }
1195
- );
1196
-
1197
- // =====================================================
1198
- // TOOL: pinch_zoom
1199
- // =====================================================
1200
- server.tool(
1201
- "pinch_zoom",
1202
- "Perform a pinch zoom gesture (requires Android 8+)",
1203
- {
1204
- x: z.number().describe("Center X coordinate"),
1205
- y: z.number().describe("Center Y coordinate"),
1206
- scale: z.number().describe("Scale factor (>1 zoom in, <1 zoom out)"),
1207
- duration: z.number().optional().describe("Duration in milliseconds (default: 500)"),
1208
- },
1209
- async ({ x, y, scale, duration = 500 }) => {
1210
- // Pinch zoom simulation using two swipe gestures
1211
- // This is a simplified approach - real multitouch requires instrumentation
1212
- const distance = 200;
1213
- const scaledDistance = Math.round(distance * scale);
1214
-
1215
- if (scale > 1) {
1216
- // Zoom in: fingers move apart
1217
- // Simulate with two sequential swipes from center outward
1218
- const halfDist = Math.round(scaledDistance / 2);
1219
- await shell(`input swipe ${x} ${y - 50} ${x} ${y - halfDist} ${duration}`);
1220
- await shell(`input swipe ${x} ${y + 50} ${x} ${y + halfDist} ${duration}`);
1221
- } else {
1222
- // Zoom out: fingers move together
1223
- const halfDist = Math.round(distance / 2);
1224
- const targetDist = Math.round((distance * scale) / 2);
1225
- await shell(`input swipe ${x} ${y - halfDist} ${x} ${y - targetDist} ${duration}`);
1226
- await shell(`input swipe ${x} ${y + halfDist} ${x} ${y + targetDist} ${duration}`);
1227
- }
1228
-
1229
- return {
1230
- content: [
1231
- {
1232
- type: "text",
1233
- text: `Pinch zoom at (${x}, ${y}) with scale ${scale}. Note: True multitouch requires instrumentation.`,
1234
- },
1235
- ],
1236
- };
1237
- }
1238
- );
1239
-
1240
- // =====================================================
1241
- // TOOL: set_clipboard
1242
- // =====================================================
1243
- server.tool(
1244
- "set_clipboard",
1245
- "Set text to the device clipboard",
1246
- {
1247
- text: z.string().describe("Text to copy to clipboard"),
1248
- },
1249
- async ({ text }) => {
1250
- const base64Text = Buffer.from(text).toString("base64");
1251
-
1252
- // Try multiple paths for compatibility (standard emulators vs Redroid/Docker)
1253
- const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
1254
- let success = false;
1255
-
1256
- for (const clipPath of paths) {
1257
- try {
1258
- // Use single quotes to ensure the entire command runs on device (pipe included)
1259
- await shell(`'echo "${base64Text}" | base64 -d > ${clipPath}'`);
1260
- // Verify write succeeded
1261
- const verify = await shell(`cat ${clipPath} 2>/dev/null`);
1262
- if (verify && verify.length > 0) {
1263
- success = true;
1264
- break;
1265
- }
1266
- } catch {
1267
- // Try next path
1268
- }
1269
- }
1270
-
1271
- if (!success) {
1272
- return {
1273
- content: [
1274
- {
1275
- type: "text",
1276
- text: `Error: Could not write clipboard. Tried paths: ${paths.join(", ")}`,
1277
- },
1278
- ],
1279
- };
1280
- }
1281
-
1282
- return {
1283
- content: [
1284
- {
1285
- type: "text",
1286
- text: `Clipboard set to: "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}"`,
1287
- },
1288
- ],
1289
- };
1290
- }
1291
- );
1292
-
1293
- // =====================================================
1294
- // TOOL: get_clipboard
1295
- // =====================================================
1296
- server.tool(
1297
- "get_clipboard",
1298
- "Get the current device clipboard content",
1299
- {},
1300
- async () => {
1301
- // Try multiple paths for compatibility (standard emulators vs Redroid/Docker)
1302
- const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
1303
-
1304
- for (const clipPath of paths) {
1305
- try {
1306
- const content = await shell(`cat ${clipPath} 2>/dev/null`);
1307
- if (content && content.trim()) {
1308
- return {
1309
- content: [
1310
- {
1311
- type: "text",
1312
- text: `Clipboard content: "${content}"`,
1313
- },
1314
- ],
1315
- };
1316
- }
1317
- } catch {
1318
- // Try next path
1319
- }
1320
- }
1321
-
1322
- return {
1323
- content: [
1324
- {
1325
- type: "text",
1326
- text: `Clipboard content: ""`,
1327
- },
1328
- ],
1329
- };
1330
- }
1331
- );
1332
-
1333
- // =====================================================
1334
- // TOOL: rotate_device
1335
- // =====================================================
1336
- server.tool(
1337
- "rotate_device",
1338
- "Rotate the device to portrait or landscape orientation",
1339
- {
1340
- orientation: z.enum(["portrait", "landscape"]).describe("Target orientation"),
1341
- },
1342
- async ({ orientation }) => {
1343
- // Disable auto-rotation first
1344
- await shell("settings put system accelerometer_rotation 0");
1345
-
1346
- // Set user rotation (0 = portrait, 1 = landscape)
1347
- const rotation = orientation === "portrait" ? 0 : 1;
1348
- await shell(`settings put system user_rotation ${rotation}`);
1349
-
1350
- return {
1351
- content: [
1352
- {
1353
- type: "text",
1354
- text: `Device rotated to ${orientation}`,
1355
- },
1356
- ],
1357
- };
1358
- }
1359
- );
1360
-
1361
- // =====================================================
1362
- // TOOL: tap_safe
1363
- // =====================================================
1364
- server.tool(
1365
- "tap_safe",
1366
- "Tap at coordinates while avoiding system navigation bars",
1367
- {
1368
- x: z.number().describe("X coordinate"),
1369
- y: z.number().describe("Y coordinate"),
1370
- avoidStatusBar: z.boolean().optional().describe("Avoid status bar area (default: true)"),
1371
- avoidNavBar: z.boolean().optional().describe("Avoid navigation bar area (default: true)"),
1372
- },
1373
- async ({ x, y, avoidStatusBar = true, avoidNavBar = true }) => {
1374
- // Get screen dimensions
1375
- const sizeOutput = await shell("wm size");
1376
- const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1377
- const screenWidth = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
1378
- const screenHeight = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
1379
-
1380
- // Typical safe areas (approximate)
1381
- const statusBarHeight = 50; // ~50px for status bar
1382
- const navBarHeight = 120; // ~120px for navigation bar
1383
-
1384
- let safeY = y;
1385
- let adjusted = false;
1386
- const adjustments: string[] = [];
1387
-
1388
- // Check and adjust for status bar
1389
- if (avoidStatusBar && y < statusBarHeight) {
1390
- safeY = statusBarHeight + 10;
1391
- adjusted = true;
1392
- adjustments.push(`status bar (${y} -> ${safeY})`);
1393
- }
1394
-
1395
- // Check and adjust for navigation bar
1396
- if (avoidNavBar && y > screenHeight - navBarHeight) {
1397
- safeY = screenHeight - navBarHeight - 10;
1398
- adjusted = true;
1399
- adjustments.push(`nav bar (${y} -> ${safeY})`);
1400
- }
1401
-
1402
- // Ensure X is within bounds
1403
- let safeX = Math.max(10, Math.min(x, screenWidth - 10));
1404
-
1405
- await shell(`input tap ${safeX} ${safeY}`);
1406
-
1407
- const message = adjusted
1408
- ? `Tapped at (${safeX}, ${safeY}) [adjusted to avoid ${adjustments.join(", ")}]`
1409
- : `Tapped at (${safeX}, ${safeY})`;
1410
-
1411
- return {
1412
- content: [
1413
- {
1414
- type: "text",
1415
- text: message,
1416
- },
1417
- ],
1418
- };
1419
- }
1420
- );
1421
-
1422
- // =====================================================
1423
- // TOOL: tap_element
1424
- // =====================================================
1425
- server.tool(
1426
- "tap_element",
1427
- "Find and tap an element by text or resource-id (more reliable than tap_text)",
1428
- {
1429
- text: z.string().optional().describe("Text to search for"),
1430
- resourceId: z.string().optional().describe("Resource ID to search for"),
1431
- index: z.number().optional().describe("Index if multiple matches (0-based, default: 0)"),
1432
- exact: z.boolean().optional().describe("Exact text match (default: false)"),
1433
- },
1434
- async ({ text, resourceId, index = 0, exact = false }) => {
1435
- if (!text && !resourceId) {
1436
- return {
1437
- content: [
1438
- {
1439
- type: "text",
1440
- text: "Error: Must provide either text or resourceId",
1441
- },
1442
- ],
1443
- };
1444
- }
1445
-
1446
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1447
- const xml = await shell("cat /sdcard/ui_dump.xml");
1448
-
1449
- let pattern: string;
1450
- let searchType: string;
1451
-
1452
- if (resourceId) {
1453
- pattern = `resource-id="${resourceId}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1454
- searchType = `resource-id="${resourceId}"`;
1455
- } else if (exact) {
1456
- const escapedText = text!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1457
- pattern = `text="${escapedText}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1458
- searchType = `text="${text}"`;
1459
- } else {
1460
- const escapedText = text!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1461
- pattern = `text="[^"]*${escapedText}[^"]*"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1462
- searchType = `text containing "${text}"`;
1463
- }
1464
-
1465
- const regex = new RegExp(pattern, "gi");
1466
- const matches: Array<{ x1: number; y1: number; x2: number; y2: number }> = [];
1467
- let match;
1468
-
1469
- while ((match = regex.exec(xml)) !== null) {
1470
- matches.push({
1471
- x1: parseInt(match[1]),
1472
- y1: parseInt(match[2]),
1473
- x2: parseInt(match[3]),
1474
- y2: parseInt(match[4]),
1475
- });
1476
- }
1477
-
1478
- if (matches.length === 0) {
1479
- return {
1480
- content: [
1481
- {
1482
- type: "text",
1483
- text: `Element with ${searchType} not found`,
1484
- },
1485
- ],
1486
- };
1487
- }
1488
-
1489
- if (index >= matches.length) {
1490
- return {
1491
- content: [
1492
- {
1493
- type: "text",
1494
- text: `Index ${index} out of range. Found ${matches.length} matches for ${searchType}`,
1495
- },
1496
- ],
1497
- };
1498
- }
1499
-
1500
- const m = matches[index];
1501
- const centerX = Math.round((m.x1 + m.x2) / 2);
1502
- const centerY = Math.round((m.y1 + m.y2) / 2);
1503
-
1504
- await shell(`input tap ${centerX} ${centerY}`);
1505
-
1506
- return {
1507
- content: [
1508
- {
1509
- type: "text",
1510
- text: `Tapped element with ${searchType} at (${centerX}, ${centerY})${matches.length > 1 ? ` [match ${index + 1}/${matches.length}]` : ""}`,
1511
- },
1512
- ],
1513
- };
1514
- }
1515
- );
1516
-
1517
- // =====================================================
1518
- // TOOL: get_focused_element
1519
- // =====================================================
1520
- server.tool(
1521
- "get_focused_element",
1522
- "Get information about the currently focused UI element",
1523
- {},
1524
- async () => {
1525
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1526
- const xml = await shell("cat /sdcard/ui_dump.xml");
1527
-
1528
- const focusedRegex = /focused="true"[^>]*text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/;
1529
- const match = focusedRegex.exec(xml);
1530
-
1531
- if (!match) {
1532
- // Try alternative pattern
1533
- const altRegex = /bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"[^>]*focused="true"[^>]*text="([^"]*)"/;
1534
- const altMatch = altRegex.exec(xml);
1535
-
1536
- if (!altMatch) {
1537
- return {
1538
- content: [
1539
- {
1540
- type: "text",
1541
- text: JSON.stringify({ focused: false, element: null }),
1542
- },
1543
- ],
1544
- };
1545
- }
1546
-
1547
- const [, x1, y1, x2, y2, text] = altMatch;
1548
- return {
1549
- content: [
1550
- {
1551
- type: "text",
1552
- text: JSON.stringify({
1553
- focused: true,
1554
- element: {
1555
- text,
1556
- bounds: { x: parseInt(x1), y: parseInt(y1), width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1) },
1557
- center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1558
- },
1559
- }, null, 2),
1560
- },
1561
- ],
1562
- };
1563
- }
1564
-
1565
- const [, text, x1, y1, x2, y2] = match;
1566
- return {
1567
- content: [
1568
- {
1569
- type: "text",
1570
- text: JSON.stringify({
1571
- focused: true,
1572
- element: {
1573
- text,
1574
- bounds: { x: parseInt(x1), y: parseInt(y1), width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1) },
1575
- center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1576
- },
1577
- }, null, 2),
1578
- },
1579
- ],
1580
- };
1581
- }
1582
- );
1583
-
1584
- // =====================================================
1585
- // TOOL: assert_screen_contains
1586
- // =====================================================
1587
- server.tool(
1588
- "assert_screen_contains",
1589
- "Assert that specific text is visible on screen (useful for testing)",
1590
- {
1591
- text: z.string().describe("Text that should be visible"),
1592
- exact: z.boolean().optional().describe("Exact match (default: false)"),
1593
- },
1594
- async ({ text, exact = false }) => {
1595
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1596
- const xml = await shell("cat /sdcard/ui_dump.xml");
1597
-
1598
- let found: boolean;
1599
- if (exact) {
1600
- found = xml.includes(`text="${text}"`);
1601
- } else {
1602
- found = xml.toLowerCase().includes(text.toLowerCase());
1603
- }
1604
-
1605
- return {
1606
- content: [
1607
- {
1608
- type: "text",
1609
- text: JSON.stringify({
1610
- assertion: found ? "PASS" : "FAIL",
1611
- expected: text,
1612
- found,
1613
- }, null, 2),
1614
- },
1615
- ],
1616
- };
1617
- }
1618
- );
1619
-
1620
- // =====================================================
1621
- // TOOL: get_all_text
1622
- // =====================================================
1623
- server.tool(
1624
- "get_all_text",
1625
- "Get all visible text elements on screen (useful for debugging and verification)",
1626
- {
1627
- includeEmpty: z.boolean().optional().describe("Include elements with empty text (default: false)"),
1628
- },
1629
- async ({ includeEmpty = false }) => {
1630
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1631
- const xml = await shell("cat /sdcard/ui_dump.xml");
1632
-
1633
- const texts: Array<{ text: string; centerX: number; centerY: number }> = [];
1634
- const regex = /text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
1635
- let match;
1636
-
1637
- while ((match = regex.exec(xml)) !== null) {
1638
- const [, text, x1, y1, x2, y2] = match;
1639
- if (text || includeEmpty) {
1640
- texts.push({
1641
- text: text || "(empty)",
1642
- centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
1643
- centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
1644
- });
1645
- }
1646
- }
1647
-
1648
- // Sort by Y position (top to bottom), then X (left to right)
1649
- texts.sort((a, b) => a.centerY - b.centerY || a.centerX - b.centerX);
1650
-
1651
- const textList = texts.map((t) => `"${t.text}" at (${t.centerX}, ${t.centerY})`).join("\n");
1652
-
1653
- return {
1654
- content: [
1655
- {
1656
- type: "text",
1657
- text: `Found ${texts.length} text elements:\n${textList}`,
1658
- },
1659
- ],
1660
- };
1661
- }
1662
- );
1663
-
1664
- // =====================================================
1665
- // TOOL: is_keyboard_visible
1666
- // =====================================================
1667
- server.tool(
1668
- "is_keyboard_visible",
1669
- "Check if the soft keyboard is currently visible on screen",
1670
- {},
1671
- async () => {
1672
- let isShowingViaIme = false;
1673
- let hasKeyboardWindow = false;
1674
- let heightMethod = false;
1675
-
1676
- // Method 1: Check InputMethod visibility via dumpsys
1677
- try {
1678
- const imeDump = await shell("dumpsys input_method | grep mInputShown || true");
1679
- isShowingViaIme = imeDump.includes("mInputShown=true");
1680
- } catch {
1681
- // Ignore errors
1682
- }
1683
-
1684
- // Method 2: Check if keyboard window is visible
1685
- try {
1686
- const windowDump = await shell("dumpsys window windows | grep -i inputmethod || true");
1687
- hasKeyboardWindow = windowDump.toLowerCase().includes("inputmethod") &&
1688
- windowDump.includes("mHasSurface=true");
1689
- } catch {
1690
- // Ignore errors
1691
- }
1692
-
1693
- // Method 3: Check visible height vs screen height
1694
- try {
1695
- const visibleFrame = await shell("dumpsys window | grep 'mVisibleFrame' || true");
1696
- const sizeOutput = await shell("wm size");
1697
- const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1698
- if (sizeMatch && visibleFrame) {
1699
- const screenHeight = parseInt(sizeMatch[2]);
1700
- const frameMatch = visibleFrame.match(/mVisibleFrame=\[\d+,\d+\]\[\d+,(\d+)\]/);
1701
- if (frameMatch) {
1702
- const visibleHeight = parseInt(frameMatch[1]);
1703
- // If visible area is significantly less than screen, keyboard is likely shown
1704
- heightMethod = visibleHeight < screenHeight * 0.8;
1705
- }
1706
- }
1707
- } catch {
1708
- // Ignore height method errors
1709
- }
1710
-
1711
- const isVisible = isShowingViaIme || hasKeyboardWindow || heightMethod;
1712
-
1713
- return {
1714
- content: [
1715
- {
1716
- type: "text",
1717
- text: JSON.stringify({
1718
- visible: isVisible,
1719
- checks: {
1720
- inputMethodShown: isShowingViaIme,
1721
- keyboardWindowVisible: hasKeyboardWindow,
1722
- heightReduced: heightMethod,
1723
- },
1724
- }, null, 2),
1725
- },
1726
- ],
1727
- };
1728
- }
1729
- );
1730
-
1731
- // =====================================================
1732
- // TOOL: get_focused_input_value
1733
- // =====================================================
1734
- server.tool(
1735
- "get_focused_input_value",
1736
- "Get the current text value of the focused input field",
1737
- {},
1738
- async () => {
1739
- await shell("uiautomator dump /sdcard/ui_dump.xml");
1740
- const xml = await shell("cat /sdcard/ui_dump.xml");
1741
-
1742
- // Look for focused element that is an input field (EditText or similar)
1743
- // Pattern matches focused="true" along with text attribute
1744
- const patterns = [
1745
- // Pattern 1: focused before text
1746
- /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*focused="true"[^>]*text="([^"]*)"/gi,
1747
- // Pattern 2: text before focused
1748
- /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*text="([^"]*)"[^>]*focused="true"/gi,
1749
- // Pattern 3: Generic focused with text
1750
- /focused="true"[^>]*text="([^"]*)"[^>]*class="[^"]*(?:Edit|Input|Text)[^"]*"/gi,
1751
- ];
1752
-
1753
- for (const pattern of patterns) {
1754
- const match = pattern.exec(xml);
1755
- if (match) {
1756
- return {
1757
- content: [
1758
- {
1759
- type: "text",
1760
- text: JSON.stringify({
1761
- found: true,
1762
- value: match[1],
1763
- isEmpty: match[1] === "",
1764
- }, null, 2),
1765
- },
1766
- ],
1767
- };
1768
- }
1769
- }
1770
-
1771
- // Try broader search for any focused element with text
1772
- const broadPattern = /focused="true"[^>]*text="([^"]*)"|text="([^"]*)"[^>]*focused="true"/gi;
1773
- const broadMatch = broadPattern.exec(xml);
1774
-
1775
- if (broadMatch) {
1776
- const value = broadMatch[1] || broadMatch[2] || "";
1777
- return {
1778
- content: [
1779
- {
1780
- type: "text",
1781
- text: JSON.stringify({
1782
- found: true,
1783
- value,
1784
- isEmpty: value === "",
1785
- note: "Found focused element (may not be an input field)",
1786
- }, null, 2),
1787
- },
1788
- ],
1789
- };
1790
- }
1791
-
1792
- return {
1793
- content: [
1794
- {
1795
- type: "text",
1796
- text: JSON.stringify({
1797
- found: false,
1798
- value: null,
1799
- error: "No focused input field found",
1800
- }, null, 2),
1801
- },
1802
- ],
1803
- };
1804
- }
1805
- );
1806
-
1807
- // Start server
1808
- async function main() {
1809
- const transport = new StdioServerTransport();
1810
- await server.connect(transport);
1811
- console.error("MCP Android Emulator Server running on stdio");
1812
- }
1813
-
1814
- main().catch(console.error);
1
+ #!/usr/bin/env node
2
+ /**
3
+ * MCP Server for Android Emulator.
4
+ *
5
+ * Finalidad:
6
+ * Expone 43 tools MCP que permiten a un asistente LLM controlar un device
7
+ * Android vía ADB (screenshot, tap, type, launch apps, logs, asserts...).
8
+ *
9
+ * Interrelación:
10
+ * - src/adb/runner.ts → ejecución segura de adb (execFile, sin shell del host).
11
+ * - src/adb/validators.ts allowlists zod para inputs que llegan al sh del device.
12
+ * - test/ → smoke tests que validan que payloads shell-metachar son
13
+ * rechazados por los validators y que los argv construidos
14
+ * son los esperados.
15
+ *
16
+ * Seguridad:
17
+ * Fix de la issue #1 (command injection). TODOS los argumentos derivados del
18
+ * LLM pasan por zod.refine antes de llegar al runner, y el runner usa execFile
19
+ * (no exec), por lo que /bin/sh del host nunca reinterpreta la línea de comando.
20
+ *
21
+ * @license MIT
22
+ */
23
+
24
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
25
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
26
+ import { z } from "zod";
27
+ import * as fs from "node:fs";
28
+ import * as os from "node:os";
29
+ import * as path from "node:path";
30
+
31
+ import {
32
+ runAdb,
33
+ runAdbShell,
34
+ runAdbExecOutBinary,
35
+ } from "./adb/runner.js";
36
+ import {
37
+ packageNameSchema,
38
+ apkPathSchema,
39
+ resourceIdSchema,
40
+ freeTextSchema,
41
+ typeableTextSchema,
42
+ searchFilterSchema,
43
+ positiveCountSchema,
44
+ coordinateSchema,
45
+ durationMsSchema,
46
+ } from "./adb/validators.js";
47
+
48
+ // =====================================================
49
+ // Configuration
50
+ // =====================================================
51
+ const SCREENSHOT_DIR = process.env.SCREENSHOT_DIR || "/tmp/android-screenshots";
52
+
53
+ if (!fs.existsSync(SCREENSHOT_DIR)) {
54
+ fs.mkdirSync(SCREENSHOT_DIR, { recursive: true });
55
+ }
56
+
57
+ // =====================================================
58
+ // MCP Server
59
+ // =====================================================
60
+ const server = new McpServer({
61
+ name: "android-emulator",
62
+ version: "2.0.0",
63
+ });
64
+
65
+ // =====================================================
66
+ // TOOL: screenshot
67
+ // =====================================================
68
+ server.tool(
69
+ "screenshot",
70
+ "Take a screenshot of the Android device/emulator and return it as a base64 image",
71
+ {},
72
+ async () => {
73
+ const buffer = await runAdbExecOutBinary(["screencap", "-p"]);
74
+ return {
75
+ content: [
76
+ {
77
+ type: "image",
78
+ data: buffer.toString("base64"),
79
+ mimeType: "image/png",
80
+ },
81
+ ],
82
+ };
83
+ }
84
+ );
85
+
86
+ // =====================================================
87
+ // TOOL: get_ui_tree
88
+ // =====================================================
89
+ server.tool(
90
+ "get_ui_tree",
91
+ "Get the UI element tree of the device (like DOM but for Android). Returns clickable elements with their coordinates.",
92
+ {},
93
+ async () => {
94
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
95
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
96
+
97
+ const elements: string[] = [];
98
+ const regex = /text="([^"]*)".*?bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
99
+ let match;
100
+ while ((match = regex.exec(xml)) !== null) {
101
+ const [, text, x1, y1, x2, y2] = match;
102
+ if (text) {
103
+ const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
104
+ const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
105
+ elements.push(`"${text}" at (${centerX}, ${centerY})`);
106
+ }
107
+ }
108
+
109
+ return {
110
+ content: [
111
+ {
112
+ type: "text",
113
+ text: `Elements found:\n${elements.join("\n")}\n\nFull XML:\n${xml.substring(0, 5000)}...`,
114
+ },
115
+ ],
116
+ };
117
+ }
118
+ );
119
+
120
+ // =====================================================
121
+ // TOOL: tap
122
+ // =====================================================
123
+ server.tool(
124
+ "tap",
125
+ "Tap at the specified coordinates on the screen",
126
+ {
127
+ x: coordinateSchema.describe("X coordinate"),
128
+ y: coordinateSchema.describe("Y coordinate"),
129
+ },
130
+ async ({ x, y }) => {
131
+ await runAdbShell(["input", "tap", String(x), String(y)]);
132
+ return { content: [{ type: "text", text: `Tapped at (${x}, ${y})` }] };
133
+ }
134
+ );
135
+
136
+ // =====================================================
137
+ // TOOL: tap_text
138
+ // =====================================================
139
+ server.tool(
140
+ "tap_text",
141
+ "Find an element by its text content and tap on it",
142
+ {
143
+ text: freeTextSchema.describe("Text of the element to find and tap"),
144
+ exact: z.boolean().optional().describe("If true, match exact text. Default: false (partial match)"),
145
+ },
146
+ async ({ text, exact = false }) => {
147
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
148
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
149
+
150
+ const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
151
+ const pattern = exact
152
+ ? `text="${escapedText}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`
153
+ : `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
154
+ const regex = new RegExp(pattern, "i");
155
+ const match = regex.exec(xml);
156
+
157
+ if (!match) {
158
+ return { content: [{ type: "text", text: `Element with text "${text}" not found` }] };
159
+ }
160
+
161
+ const [, x1, y1, x2, y2] = match;
162
+ const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
163
+ const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
164
+ await runAdbShell(["input", "tap", String(centerX), String(centerY)]);
165
+
166
+ return {
167
+ content: [{ type: "text", text: `Tapped on "${text}" at (${centerX}, ${centerY})` }],
168
+ };
169
+ }
170
+ );
171
+
172
+ // =====================================================
173
+ // TOOL: type_text
174
+ // =====================================================
175
+ /**
176
+ * Android `input text` interpreta %s como espacio y %XX como byte URL-encoded.
177
+ * Percent-encodear el UTF-8 del texto:
178
+ * - soporta acentos, CJK, emoji (Android decodifica %XX internamente)
179
+ * - evita el NPE conocido de `input text` con UTF-8 directo
180
+ * - los metacaracteres shell ya fueron rechazados por typeableTextSchema
181
+ */
182
+ function encodeTextForInput(text: string): string {
183
+ return encodeURIComponent(text).replace(/%20/g, "%s");
184
+ }
185
+
186
+ server.tool(
187
+ "type_text",
188
+ "Type text into the currently focused input field. Unicode is supported via URL-encoding. Shell metacharacters (; & | ` $ ( ) < > \\ quotes) are rejected.",
189
+ {
190
+ text: typeableTextSchema.describe("Text to type"),
191
+ },
192
+ async ({ text }) => {
193
+ await runAdbShell(["input", "text", encodeTextForInput(text)]);
194
+ return { content: [{ type: "text", text: `Typed: "${text}"` }] };
195
+ }
196
+ );
197
+
198
+ // =====================================================
199
+ // TOOL: swipe
200
+ // =====================================================
201
+ server.tool(
202
+ "swipe",
203
+ "Perform a swipe gesture on the screen",
204
+ {
205
+ x1: coordinateSchema.describe("Starting X coordinate"),
206
+ y1: coordinateSchema.describe("Starting Y coordinate"),
207
+ x2: coordinateSchema.describe("Ending X coordinate"),
208
+ y2: coordinateSchema.describe("Ending Y coordinate"),
209
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 300)"),
210
+ },
211
+ async ({ x1, y1, x2, y2, duration = 300 }) => {
212
+ await runAdbShell([
213
+ "input", "swipe",
214
+ String(x1), String(y1), String(x2), String(y2), String(duration),
215
+ ]);
216
+ return { content: [{ type: "text", text: `Swiped from (${x1}, ${y1}) to (${x2}, ${y2})` }] };
217
+ }
218
+ );
219
+
220
+ // =====================================================
221
+ // TOOL: scroll
222
+ // =====================================================
223
+ server.tool(
224
+ "scroll",
225
+ "Scroll the screen in a direction",
226
+ {
227
+ direction: z.enum(["up", "down", "left", "right"]).describe("Direction to scroll"),
228
+ amount: z.number().int().min(1).max(10_000).optional().describe("Scroll amount in pixels (default: 500)"),
229
+ },
230
+ async ({ direction, amount = 500 }) => {
231
+ const sizeOutput = await runAdbShell(["wm", "size"]);
232
+ const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
233
+ const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
234
+ const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
235
+
236
+ const centerX = Math.round(width / 2);
237
+ const centerY = Math.round(height / 2);
238
+ let x1 = centerX, y1 = centerY, x2 = centerX, y2 = centerY;
239
+ const half = Math.round(amount / 2);
240
+
241
+ switch (direction) {
242
+ case "up": y1 = centerY + half; y2 = centerY - half; break;
243
+ case "down": y1 = centerY - half; y2 = centerY + half; break;
244
+ case "left": x1 = centerX + half; x2 = centerX - half; break;
245
+ case "right": x1 = centerX - half; x2 = centerX + half; break;
246
+ }
247
+
248
+ await runAdbShell([
249
+ "input", "swipe",
250
+ String(x1), String(y1), String(x2), String(y2), "300",
251
+ ]);
252
+ return { content: [{ type: "text", text: `Scrolled ${direction}` }] };
253
+ }
254
+ );
255
+
256
+ // =====================================================
257
+ // TOOL: press_key
258
+ // =====================================================
259
+ server.tool(
260
+ "press_key",
261
+ "Press a system key (BACK, HOME, ENTER, etc)",
262
+ {
263
+ key: z.enum([
264
+ "BACK", "HOME", "ENTER", "TAB", "DELETE", "MENU", "POWER",
265
+ "VOLUME_UP", "VOLUME_DOWN",
266
+ ]).describe("Key to press"),
267
+ },
268
+ async ({ key }) => {
269
+ const keycodes: Record<string, number> = {
270
+ BACK: 4, HOME: 3, ENTER: 66, TAB: 61, DELETE: 67,
271
+ MENU: 82, POWER: 26, VOLUME_UP: 24, VOLUME_DOWN: 25,
272
+ };
273
+ await runAdbShell(["input", "keyevent", String(keycodes[key])]);
274
+ return { content: [{ type: "text", text: `Pressed ${key} key` }] };
275
+ }
276
+ );
277
+
278
+ // =====================================================
279
+ // TOOL: launch_app
280
+ // =====================================================
281
+ server.tool(
282
+ "launch_app",
283
+ "Launch an application by its package name (e.g., com.android.chrome). Package name is validated against the Android package naming convention.",
284
+ {
285
+ package: packageNameSchema.describe("Package name of the app (e.g., com.android.chrome)"),
286
+ },
287
+ async ({ package: pkg }) => {
288
+ await runAdbShell([
289
+ "monkey",
290
+ "-p", pkg,
291
+ "-c", "android.intent.category.LAUNCHER",
292
+ "1",
293
+ ]);
294
+ return { content: [{ type: "text", text: `Launched ${pkg}` }] };
295
+ }
296
+ );
297
+
298
+ // =====================================================
299
+ // TOOL: install_apk
300
+ // =====================================================
301
+ server.tool(
302
+ "install_apk",
303
+ "Install an APK file on the device. Path must end in .apk and contain no shell metacharacters.",
304
+ {
305
+ path: apkPathSchema.describe("Path to the APK file on the host"),
306
+ },
307
+ async ({ path: apkPath }) => {
308
+ if (!fs.existsSync(apkPath)) {
309
+ throw new Error(`APK file not found: ${apkPath}`);
310
+ }
311
+ const result = await runAdb(["install", "-r", apkPath]);
312
+ return { content: [{ type: "text", text: `APK installed: ${result}` }] };
313
+ }
314
+ );
315
+
316
+ // =====================================================
317
+ // TOOL: list_packages
318
+ // =====================================================
319
+ server.tool(
320
+ "list_packages",
321
+ "List installed packages on the device. Optional filter is applied in-process (JavaScript), never on the device shell.",
322
+ {
323
+ filter: searchFilterSchema.optional().describe("Filter packages by name (optional)"),
324
+ },
325
+ async ({ filter }) => {
326
+ const raw = await runAdbShell(["pm", "list", "packages"]);
327
+ const needle = filter?.toLowerCase();
328
+ const packages = raw
329
+ .split("\n")
330
+ .map((line) => line.replace("package:", "").trim())
331
+ .filter((p) => p.length > 0)
332
+ .filter((p) => !needle || p.toLowerCase().includes(needle));
333
+
334
+ return {
335
+ content: [{ type: "text", text: `Installed packages:\n${packages.join("\n")}` }],
336
+ };
337
+ }
338
+ );
339
+
340
+ // =====================================================
341
+ // TOOL: get_logs
342
+ // =====================================================
343
+ server.tool(
344
+ "get_logs",
345
+ "Get device logs (logcat). Filtering is applied in-process, never on the device shell.",
346
+ {
347
+ filter: searchFilterSchema.optional().describe("Filter logs by tag or keyword (substring match in-process)"),
348
+ lines: positiveCountSchema.optional().describe("Number of lines to retrieve (default: 50, max 100000)"),
349
+ level: z.enum(["V", "D", "I", "W", "E"]).optional().describe("Minimum log level"),
350
+ },
351
+ async ({ filter, lines = 50, level }) => {
352
+ const argv = ["logcat", "-d", "-t", String(lines)];
353
+ if (level) argv.push(`*:${level}`);
354
+
355
+ const raw = await runAdbShell(argv);
356
+ const needle = filter?.toLowerCase();
357
+ const filtered = needle
358
+ ? raw.split("\n").filter((l) => l.toLowerCase().includes(needle)).join("\n")
359
+ : raw;
360
+
361
+ return { content: [{ type: "text", text: `Logs:\n${filtered}` }] };
362
+ }
363
+ );
364
+
365
+ // =====================================================
366
+ // TOOL: device_info
367
+ // =====================================================
368
+ server.tool(
369
+ "device_info",
370
+ "Get information about the connected device",
371
+ {},
372
+ async () => {
373
+ const [model, android, sdk, density, size, batteryDump] = await Promise.all([
374
+ runAdbShell(["getprop", "ro.product.model"]),
375
+ runAdbShell(["getprop", "ro.build.version.release"]),
376
+ runAdbShell(["getprop", "ro.build.version.sdk"]),
377
+ runAdbShell(["wm", "density"]),
378
+ runAdbShell(["wm", "size"]),
379
+ runAdbShell(["dumpsys", "battery"]),
380
+ ]);
381
+
382
+ const batteryLine = batteryDump.split("\n").find((l) => /level:/i.test(l)) || "";
383
+
384
+ return {
385
+ content: [
386
+ {
387
+ type: "text",
388
+ text: `Device: ${model}
389
+ Android: ${android} (SDK ${sdk})
390
+ Screen: ${size.replace("Physical size: ", "")}
391
+ Density: ${density.replace("Physical density: ", "")}
392
+ Battery: ${batteryLine.replace(/^\s*level:\s*/, "")}%`,
393
+ },
394
+ ],
395
+ };
396
+ }
397
+ );
398
+
399
+ // =====================================================
400
+ // TOOL: clear_app_data
401
+ // =====================================================
402
+ server.tool(
403
+ "clear_app_data",
404
+ "Clear all data for an application",
405
+ {
406
+ package: packageNameSchema.describe("Package name of the app"),
407
+ },
408
+ async ({ package: pkg }) => {
409
+ await runAdbShell(["pm", "clear", pkg]);
410
+ return { content: [{ type: "text", text: `Data cleared for ${pkg}` }] };
411
+ }
412
+ );
413
+
414
+ // =====================================================
415
+ // TOOL: force_stop
416
+ // =====================================================
417
+ server.tool(
418
+ "force_stop",
419
+ "Force stop an application",
420
+ {
421
+ package: packageNameSchema.describe("Package name of the app"),
422
+ },
423
+ async ({ package: pkg }) => {
424
+ await runAdbShell(["am", "force-stop", pkg]);
425
+ return { content: [{ type: "text", text: `Force stopped ${pkg}` }] };
426
+ }
427
+ );
428
+
429
+ // =====================================================
430
+ // TOOL: get_current_activity
431
+ // =====================================================
432
+ server.tool(
433
+ "get_current_activity",
434
+ "Get the currently focused activity/screen",
435
+ {},
436
+ async () => {
437
+ let activity = "Unknown";
438
+
439
+ try {
440
+ const dump = await runAdbShell(["dumpsys", "activity", "activities"]);
441
+ const line = dump.split("\n").find((l) => /mResumedActivity|mCurrentFocus/.test(l));
442
+ if (line?.trim()) activity = line.trim();
443
+ } catch { /* ignore */ }
444
+
445
+ if (activity === "Unknown") {
446
+ try {
447
+ const top = await runAdbShell(["dumpsys", "activity", "top"]);
448
+ const first5 = top.split("\n").slice(0, 5).join("\n").trim();
449
+ if (first5) activity = first5;
450
+ } catch { /* ignore */ }
451
+ }
452
+
453
+ if (activity === "Unknown") {
454
+ try {
455
+ const win = await runAdbShell(["dumpsys", "window"]);
456
+ const line = win.split("\n").find((l) => /mCurrentFocus|mFocusedApp/.test(l));
457
+ if (line?.trim()) activity = line.trim();
458
+ } catch { /* ignore */ }
459
+ }
460
+
461
+ return { content: [{ type: "text", text: `Current activity:\n${activity}` }] };
462
+ }
463
+ );
464
+
465
+ // =====================================================
466
+ // TOOL: wait_for_element
467
+ // =====================================================
468
+ server.tool(
469
+ "wait_for_element",
470
+ "Wait for a UI element with specific text to appear",
471
+ {
472
+ text: freeTextSchema.describe("Text of the element to wait for"),
473
+ timeout: z.number().int().min(1).max(600).optional().describe("Timeout in seconds (default: 10)"),
474
+ },
475
+ async ({ text, timeout = 10 }) => {
476
+ const startTime = Date.now();
477
+ const timeoutMs = timeout * 1000;
478
+
479
+ while (Date.now() - startTime < timeoutMs) {
480
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
481
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
482
+
483
+ if (xml.toLowerCase().includes(text.toLowerCase())) {
484
+ return {
485
+ content: [
486
+ {
487
+ type: "text",
488
+ text: `Element "${text}" found after ${Math.round((Date.now() - startTime) / 1000)}s`,
489
+ },
490
+ ],
491
+ };
492
+ }
493
+ await new Promise((resolve) => setTimeout(resolve, 500));
494
+ }
495
+
496
+ return {
497
+ content: [{ type: "text", text: `Timeout: Element "${text}" not found after ${timeout}s` }],
498
+ };
499
+ }
500
+ );
501
+
502
+ // =====================================================
503
+ // TOOL: long_press
504
+ // =====================================================
505
+ server.tool(
506
+ "long_press",
507
+ "Perform a long press at the specified coordinates (useful for context menus)",
508
+ {
509
+ x: coordinateSchema.describe("X coordinate"),
510
+ y: coordinateSchema.describe("Y coordinate"),
511
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 1000)"),
512
+ },
513
+ async ({ x, y, duration = 1000 }) => {
514
+ await runAdbShell([
515
+ "input", "swipe",
516
+ String(x), String(y), String(x), String(y), String(duration),
517
+ ]);
518
+ return { content: [{ type: "text", text: `Long pressed at (${x}, ${y}) for ${duration}ms` }] };
519
+ }
520
+ );
521
+
522
+ // =====================================================
523
+ // TOOL: clear_input
524
+ // =====================================================
525
+ server.tool(
526
+ "clear_input",
527
+ "Clear the currently focused text input field",
528
+ {
529
+ maxChars: z.number().int().min(1).max(10_000).optional().describe("Maximum characters to delete (default: 100)"),
530
+ },
531
+ async ({ maxChars = 100 }) => {
532
+ await runAdbShell(["input", "keyevent", "123"]); // MOVE_END
533
+ for (let i = 0; i < maxChars; i++) {
534
+ await runAdbShell(["input", "keyevent", "67"]); // DEL
535
+ }
536
+ return { content: [{ type: "text", text: `Cleared input field (deleted up to ${maxChars} characters)` }] };
537
+ }
538
+ );
539
+
540
+ // =====================================================
541
+ // TOOL: select_all
542
+ // =====================================================
543
+ server.tool(
544
+ "select_all",
545
+ "Select all text in the currently focused input field",
546
+ {},
547
+ async () => {
548
+ // CTRL+A = KEYCODE_CTRL_LEFT (113) + KEYCODE_A (29) via --longpress combo
549
+ await runAdbShell(["input", "keyevent", "--longpress", "113", "29"]);
550
+ return { content: [{ type: "text", text: "Selected all text in focused field" }] };
551
+ }
552
+ );
553
+
554
+ // =====================================================
555
+ // TOOL: set_text
556
+ // =====================================================
557
+ server.tool(
558
+ "set_text",
559
+ "Clear the current input field and type new text. Unicode is supported via URL-encoding. Shell metacharacters are rejected.",
560
+ {
561
+ text: typeableTextSchema.describe("Text to type after clearing"),
562
+ maxClearChars: z.number().int().min(1).max(10_000).optional().describe("Maximum characters to clear (default: 100)"),
563
+ },
564
+ async ({ text, maxClearChars = 100 }) => {
565
+ await runAdbShell(["input", "keyevent", "123"]);
566
+ for (let i = 0; i < maxClearChars; i++) {
567
+ await runAdbShell(["input", "keyevent", "67"]);
568
+ }
569
+ await runAdbShell(["input", "text", encodeTextForInput(text)]);
570
+ return { content: [{ type: "text", text: `Cleared field and typed: "${text}"` }] };
571
+ }
572
+ );
573
+
574
+ // =====================================================
575
+ // TOOL: drag
576
+ // =====================================================
577
+ server.tool(
578
+ "drag",
579
+ "Perform a drag gesture from one point to another (slower than swipe, for drag & drop)",
580
+ {
581
+ x1: coordinateSchema.describe("Starting X coordinate"),
582
+ y1: coordinateSchema.describe("Starting Y coordinate"),
583
+ x2: coordinateSchema.describe("Ending X coordinate"),
584
+ y2: coordinateSchema.describe("Ending Y coordinate"),
585
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 1000)"),
586
+ },
587
+ async ({ x1, y1, x2, y2, duration = 1000 }) => {
588
+ await runAdbShell([
589
+ "input", "swipe",
590
+ String(x1), String(y1), String(x2), String(y2), String(duration),
591
+ ]);
592
+ return {
593
+ content: [{ type: "text", text: `Dragged from (${x1}, ${y1}) to (${x2}, ${y2}) over ${duration}ms` }],
594
+ };
595
+ }
596
+ );
597
+
598
+ // =====================================================
599
+ // TOOL: double_tap
600
+ // =====================================================
601
+ server.tool(
602
+ "double_tap",
603
+ "Perform a double tap at the specified coordinates",
604
+ {
605
+ x: coordinateSchema.describe("X coordinate"),
606
+ y: coordinateSchema.describe("Y coordinate"),
607
+ },
608
+ async ({ x, y }) => {
609
+ await runAdbShell(["input", "tap", String(x), String(y)]);
610
+ await new Promise((resolve) => setTimeout(resolve, 100));
611
+ await runAdbShell(["input", "tap", String(x), String(y)]);
612
+ return { content: [{ type: "text", text: `Double tapped at (${x}, ${y})` }] };
613
+ }
614
+ );
615
+
616
+ // =====================================================
617
+ // TOOL: get_screen_size
618
+ // =====================================================
619
+ server.tool(
620
+ "get_screen_size",
621
+ "Get the screen dimensions and density of the device",
622
+ {},
623
+ async () => {
624
+ const [sizeOutput, densityOutput] = await Promise.all([
625
+ runAdbShell(["wm", "size"]),
626
+ runAdbShell(["wm", "density"]),
627
+ ]);
628
+ const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
629
+ const densityMatch = densityOutput.match(/(\d+)/);
630
+ const width = sizeMatch ? parseInt(sizeMatch[1]) : 0;
631
+ const height = sizeMatch ? parseInt(sizeMatch[2]) : 0;
632
+ const density = densityMatch ? parseInt(densityMatch[1]) : 0;
633
+ return { content: [{ type: "text", text: JSON.stringify({ width, height, density }, null, 2) }] };
634
+ }
635
+ );
636
+
637
+ // =====================================================
638
+ // TOOL: is_element_visible
639
+ // =====================================================
640
+ server.tool(
641
+ "is_element_visible",
642
+ "Check if an element with specific text or resource-id is visible on screen",
643
+ {
644
+ text: freeTextSchema.optional().describe("Text to search for"),
645
+ resourceId: resourceIdSchema.optional().describe("Resource ID to search for"),
646
+ },
647
+ async ({ text, resourceId }) => {
648
+ if (!text && !resourceId) {
649
+ return {
650
+ content: [{ type: "text", text: JSON.stringify({ visible: false, error: "Must provide text or resourceId" }) }],
651
+ };
652
+ }
653
+
654
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
655
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
656
+
657
+ let found = false;
658
+ let bounds: unknown = null;
659
+
660
+ if (text) {
661
+ const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
662
+ const regex = new RegExp(
663
+ `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`,
664
+ "i"
665
+ );
666
+ const match = regex.exec(xml);
667
+ if (match) {
668
+ found = true;
669
+ const [, x1, y1, x2, y2] = match;
670
+ bounds = {
671
+ x: parseInt(x1), y: parseInt(y1),
672
+ width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1),
673
+ centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
674
+ centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
675
+ };
676
+ }
677
+ }
678
+
679
+ if (resourceId && !found) {
680
+ const regex = new RegExp(
681
+ `resource-id="${resourceId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`,
682
+ "i"
683
+ );
684
+ const match = regex.exec(xml);
685
+ if (match) {
686
+ found = true;
687
+ const [, x1, y1, x2, y2] = match;
688
+ bounds = {
689
+ x: parseInt(x1), y: parseInt(y1),
690
+ width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1),
691
+ centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
692
+ centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
693
+ };
694
+ }
695
+ }
696
+
697
+ return { content: [{ type: "text", text: JSON.stringify({ visible: found, bounds }, null, 2) }] };
698
+ }
699
+ );
700
+
701
+ // =====================================================
702
+ // TOOL: get_element_bounds
703
+ // =====================================================
704
+ server.tool(
705
+ "get_element_bounds",
706
+ "Get the exact bounds and center coordinates of an element",
707
+ {
708
+ text: freeTextSchema.optional().describe("Text of the element"),
709
+ resourceId: resourceIdSchema.optional().describe("Resource ID of the element"),
710
+ index: z.number().int().min(0).max(10_000).optional().describe("Index if multiple matches (0-based, default: 0)"),
711
+ },
712
+ async ({ text, resourceId, index = 0 }) => {
713
+ if (!text && !resourceId) {
714
+ return { content: [{ type: "text", text: JSON.stringify({ error: "Must provide text or resourceId" }) }] };
715
+ }
716
+
717
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
718
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
719
+
720
+ let pattern: string;
721
+ if (text) {
722
+ const escapedText = text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
723
+ pattern = `text="[^"]*${escapedText}[^"]*".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
724
+ } else {
725
+ pattern = `resource-id="${resourceId!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}".*?bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
726
+ }
727
+
728
+ const regex = new RegExp(pattern, "gi");
729
+ const matches: Array<{ x1: number; y1: number; x2: number; y2: number }> = [];
730
+ let match;
731
+ while ((match = regex.exec(xml)) !== null) {
732
+ matches.push({
733
+ x1: parseInt(match[1]), y1: parseInt(match[2]),
734
+ x2: parseInt(match[3]), y2: parseInt(match[4]),
735
+ });
736
+ }
737
+
738
+ if (matches.length === 0) {
739
+ return { content: [{ type: "text", text: JSON.stringify({ found: false, error: "Element not found" }) }] };
740
+ }
741
+
742
+ if (index >= matches.length) {
743
+ return {
744
+ content: [{
745
+ type: "text",
746
+ text: JSON.stringify({
747
+ found: false,
748
+ error: `Index ${index} out of range. Found ${matches.length} matches.`,
749
+ }),
750
+ }],
751
+ };
752
+ }
753
+
754
+ const m = matches[index];
755
+ const result = {
756
+ found: true,
757
+ matchCount: matches.length,
758
+ index,
759
+ bounds: { x: m.x1, y: m.y1, width: m.x2 - m.x1, height: m.y2 - m.y1 },
760
+ center: { x: Math.round((m.x1 + m.x2) / 2), y: Math.round((m.y1 + m.y2) / 2) },
761
+ };
762
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
763
+ }
764
+ );
765
+
766
+ // =====================================================
767
+ // TOOL: scroll_to_text
768
+ // =====================================================
769
+ server.tool(
770
+ "scroll_to_text",
771
+ "Scroll the screen until an element with specific text is visible",
772
+ {
773
+ text: freeTextSchema.describe("Text to search for"),
774
+ direction: z.enum(["up", "down"]).optional().describe("Scroll direction (default: down)"),
775
+ maxScrolls: z.number().int().min(1).max(100).optional().describe("Maximum scroll attempts (default: 10)"),
776
+ },
777
+ async ({ text, direction = "down", maxScrolls = 10 }) => {
778
+ const sizeOutput = await runAdbShell(["wm", "size"]);
779
+ const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
780
+ const width = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
781
+ const height = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
782
+
783
+ const centerX = Math.round(width / 2);
784
+ const startY = direction === "down" ? Math.round(height * 0.7) : Math.round(height * 0.3);
785
+ const endY = direction === "down" ? Math.round(height * 0.3) : Math.round(height * 0.7);
786
+
787
+ for (let i = 0; i < maxScrolls; i++) {
788
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
789
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
790
+ if (xml.toLowerCase().includes(text.toLowerCase())) {
791
+ return { content: [{ type: "text", text: `Found "${text}" after ${i} scroll(s)` }] };
792
+ }
793
+ await runAdbShell([
794
+ "input", "swipe",
795
+ String(centerX), String(startY), String(centerX), String(endY), "300",
796
+ ]);
797
+ await new Promise((resolve) => setTimeout(resolve, 500));
798
+ }
799
+
800
+ return { content: [{ type: "text", text: `Text "${text}" not found after ${maxScrolls} scrolls` }] };
801
+ }
802
+ );
803
+
804
+ // =====================================================
805
+ // TOOL: wait_for_ui_stable
806
+ // =====================================================
807
+ function extractUIFingerprint(xml: string): string {
808
+ const elements: string[] = [];
809
+ const regex = /(?:text="([^"]*)")?[^>]*(?:class="([^"]*)")?[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
810
+ let match;
811
+ while ((match = regex.exec(xml)) !== null) {
812
+ const [, text, className, x1, y1, x2, y2] = match;
813
+ if (text || className) {
814
+ elements.push(`${text || ""}|${className || ""}|${x1},${y1},${x2},${y2}`);
815
+ }
816
+ }
817
+ return elements.sort().join("\n");
818
+ }
819
+
820
+ server.tool(
821
+ "wait_for_ui_stable",
822
+ "Wait for the UI to stop changing (useful after animations)",
823
+ {
824
+ timeout: z.number().int().min(100).max(600_000).optional().describe("Timeout in milliseconds (default: 5000)"),
825
+ checkInterval: z.number().int().min(50).max(10_000).optional().describe("Check interval in milliseconds (default: 500)"),
826
+ },
827
+ async ({ timeout = 5000, checkInterval = 500 }) => {
828
+ const startTime = Date.now();
829
+ let lastFingerprint = "";
830
+ let stableCount = 0;
831
+
832
+ while (Date.now() - startTime < timeout) {
833
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
834
+ const currentXml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
835
+ const currentFingerprint = extractUIFingerprint(currentXml);
836
+
837
+ if (currentFingerprint === lastFingerprint) {
838
+ stableCount++;
839
+ if (stableCount >= 2) {
840
+ const elapsed = Date.now() - startTime;
841
+ return {
842
+ content: [{
843
+ type: "text",
844
+ text: `UI stable after ${elapsed < 1000 ? elapsed + "ms" : Math.round(elapsed / 1000) + "s"}`,
845
+ }],
846
+ };
847
+ }
848
+ } else {
849
+ stableCount = 0;
850
+ lastFingerprint = currentFingerprint;
851
+ }
852
+ await new Promise((resolve) => setTimeout(resolve, checkInterval));
853
+ }
854
+
855
+ return { content: [{ type: "text", text: `Timeout: UI did not stabilize within ${timeout}ms` }] };
856
+ }
857
+ );
858
+
859
+ // =====================================================
860
+ // TOOL: wait_for_element_gone
861
+ // =====================================================
862
+ server.tool(
863
+ "wait_for_element_gone",
864
+ "Wait for an element to disappear from the screen",
865
+ {
866
+ text: freeTextSchema.describe("Text of the element to wait for disappearance"),
867
+ timeout: z.number().int().min(100).max(600_000).optional().describe("Timeout in milliseconds (default: 10000)"),
868
+ },
869
+ async ({ text, timeout = 10_000 }) => {
870
+ const startTime = Date.now();
871
+ while (Date.now() - startTime < timeout) {
872
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
873
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
874
+ if (!xml.toLowerCase().includes(text.toLowerCase())) {
875
+ return {
876
+ content: [{
877
+ type: "text",
878
+ text: `Element "${text}" disappeared after ${Math.round((Date.now() - startTime) / 1000)}s`,
879
+ }],
880
+ };
881
+ }
882
+ await new Promise((resolve) => setTimeout(resolve, 500));
883
+ }
884
+ return { content: [{ type: "text", text: `Timeout: Element "${text}" still visible after ${timeout}ms` }] };
885
+ }
886
+ );
887
+
888
+ // =====================================================
889
+ // TOOL: multi_tap
890
+ // =====================================================
891
+ server.tool(
892
+ "multi_tap",
893
+ "Perform multiple rapid taps at the same position",
894
+ {
895
+ x: coordinateSchema.describe("X coordinate"),
896
+ y: coordinateSchema.describe("Y coordinate"),
897
+ taps: z.number().int().min(1).max(100).optional().describe("Number of taps (default: 2)"),
898
+ interval: durationMsSchema.optional().describe("Interval between taps in ms (default: 100)"),
899
+ },
900
+ async ({ x, y, taps = 2, interval = 100 }) => {
901
+ for (let i = 0; i < taps; i++) {
902
+ await runAdbShell(["input", "tap", String(x), String(y)]);
903
+ if (i < taps - 1) {
904
+ await new Promise((resolve) => setTimeout(resolve, interval));
905
+ }
906
+ }
907
+ return { content: [{ type: "text", text: `Performed ${taps} taps at (${x}, ${y})` }] };
908
+ }
909
+ );
910
+
911
+ // =====================================================
912
+ // TOOL: pinch_zoom
913
+ // =====================================================
914
+ server.tool(
915
+ "pinch_zoom",
916
+ "Perform a pinch zoom gesture (requires Android 8+)",
917
+ {
918
+ x: coordinateSchema.describe("Center X coordinate"),
919
+ y: coordinateSchema.describe("Center Y coordinate"),
920
+ scale: z.number().min(0.1).max(10).describe("Scale factor (>1 zoom in, <1 zoom out)"),
921
+ duration: durationMsSchema.optional().describe("Duration in milliseconds (default: 500)"),
922
+ },
923
+ async ({ x, y, scale, duration = 500 }) => {
924
+ const distance = 200;
925
+ const scaledDistance = Math.round(distance * scale);
926
+
927
+ if (scale > 1) {
928
+ const halfDist = Math.round(scaledDistance / 2);
929
+ await runAdbShell([
930
+ "input", "swipe",
931
+ String(x), String(y - 50), String(x), String(y - halfDist), String(duration),
932
+ ]);
933
+ await runAdbShell([
934
+ "input", "swipe",
935
+ String(x), String(y + 50), String(x), String(y + halfDist), String(duration),
936
+ ]);
937
+ } else {
938
+ const halfDist = Math.round(distance / 2);
939
+ const targetDist = Math.round((distance * scale) / 2);
940
+ await runAdbShell([
941
+ "input", "swipe",
942
+ String(x), String(y - halfDist), String(x), String(y - targetDist), String(duration),
943
+ ]);
944
+ await runAdbShell([
945
+ "input", "swipe",
946
+ String(x), String(y + halfDist), String(x), String(y + targetDist), String(duration),
947
+ ]);
948
+ }
949
+
950
+ return {
951
+ content: [{
952
+ type: "text",
953
+ text: `Pinch zoom at (${x}, ${y}) with scale ${scale}. Note: True multitouch requires instrumentation.`,
954
+ }],
955
+ };
956
+ }
957
+ );
958
+
959
+ // =====================================================
960
+ // TOOL: set_clipboard
961
+ // =====================================================
962
+ server.tool(
963
+ "set_clipboard",
964
+ "Set text to the device clipboard. Text is transferred via `adb push` (binary transfer, no shell involvement, full Unicode support).",
965
+ {
966
+ text: freeTextSchema.describe("Text to copy to clipboard"),
967
+ },
968
+ async ({ text }) => {
969
+ const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
970
+
971
+ const tmpLocal = path.join(os.tmpdir(), `mcp-clipboard-${process.pid}-${Date.now()}.txt`);
972
+ fs.writeFileSync(tmpLocal, text, "utf8");
973
+
974
+ let success = false;
975
+ let usedPath = "";
976
+
977
+ try {
978
+ for (const clipPath of paths) {
979
+ try {
980
+ await runAdb(["push", tmpLocal, clipPath]);
981
+ // Verificar con cat (ruta fija, sin input de LLM)
982
+ const verify = await runAdbShell(["cat", clipPath]);
983
+ if (verify && verify.length > 0) {
984
+ success = true;
985
+ usedPath = clipPath;
986
+ break;
987
+ }
988
+ } catch { /* try next path */ }
989
+ }
990
+ } finally {
991
+ try { fs.unlinkSync(tmpLocal); } catch { /* ignore */ }
992
+ }
993
+
994
+ if (!success) {
995
+ return {
996
+ content: [{
997
+ type: "text",
998
+ text: `Error: Could not write clipboard. Tried paths: ${paths.join(", ")}`,
999
+ }],
1000
+ };
1001
+ }
1002
+
1003
+ return {
1004
+ content: [{
1005
+ type: "text",
1006
+ text: `Clipboard set to: "${text.substring(0, 50)}${text.length > 50 ? "..." : ""}" (stored at ${usedPath})`,
1007
+ }],
1008
+ };
1009
+ }
1010
+ );
1011
+
1012
+ // =====================================================
1013
+ // TOOL: get_clipboard
1014
+ // =====================================================
1015
+ server.tool(
1016
+ "get_clipboard",
1017
+ "Get the current device clipboard content",
1018
+ {},
1019
+ async () => {
1020
+ const paths = ["/data/local/tmp/clipboard_temp.txt", "/sdcard/clipboard_temp.txt"];
1021
+
1022
+ for (const clipPath of paths) {
1023
+ try {
1024
+ const content = await runAdbShell(["cat", clipPath]);
1025
+ if (content && content.trim()) {
1026
+ return { content: [{ type: "text", text: `Clipboard content: "${content}"` }] };
1027
+ }
1028
+ } catch { /* try next */ }
1029
+ }
1030
+
1031
+ return { content: [{ type: "text", text: `Clipboard content: ""` }] };
1032
+ }
1033
+ );
1034
+
1035
+ // =====================================================
1036
+ // TOOL: rotate_device
1037
+ // =====================================================
1038
+ server.tool(
1039
+ "rotate_device",
1040
+ "Rotate the device to portrait or landscape orientation",
1041
+ {
1042
+ orientation: z.enum(["portrait", "landscape"]).describe("Target orientation"),
1043
+ },
1044
+ async ({ orientation }) => {
1045
+ await runAdbShell(["settings", "put", "system", "accelerometer_rotation", "0"]);
1046
+ const rotation = orientation === "portrait" ? "0" : "1";
1047
+ await runAdbShell(["settings", "put", "system", "user_rotation", rotation]);
1048
+ return { content: [{ type: "text", text: `Device rotated to ${orientation}` }] };
1049
+ }
1050
+ );
1051
+
1052
+ // =====================================================
1053
+ // TOOL: tap_safe
1054
+ // =====================================================
1055
+ server.tool(
1056
+ "tap_safe",
1057
+ "Tap at coordinates while avoiding system navigation bars",
1058
+ {
1059
+ x: coordinateSchema.describe("X coordinate"),
1060
+ y: coordinateSchema.describe("Y coordinate"),
1061
+ avoidStatusBar: z.boolean().optional().describe("Avoid status bar area (default: true)"),
1062
+ avoidNavBar: z.boolean().optional().describe("Avoid navigation bar area (default: true)"),
1063
+ },
1064
+ async ({ x, y, avoidStatusBar = true, avoidNavBar = true }) => {
1065
+ const sizeOutput = await runAdbShell(["wm", "size"]);
1066
+ const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1067
+ const screenWidth = sizeMatch ? parseInt(sizeMatch[1]) : 1080;
1068
+ const screenHeight = sizeMatch ? parseInt(sizeMatch[2]) : 2400;
1069
+
1070
+ const statusBarHeight = 50;
1071
+ const navBarHeight = 120;
1072
+
1073
+ let safeY = y;
1074
+ let adjusted = false;
1075
+ const adjustments: string[] = [];
1076
+
1077
+ if (avoidStatusBar && y < statusBarHeight) {
1078
+ safeY = statusBarHeight + 10;
1079
+ adjusted = true;
1080
+ adjustments.push(`status bar (${y} -> ${safeY})`);
1081
+ }
1082
+ if (avoidNavBar && y > screenHeight - navBarHeight) {
1083
+ safeY = screenHeight - navBarHeight - 10;
1084
+ adjusted = true;
1085
+ adjustments.push(`nav bar (${y} -> ${safeY})`);
1086
+ }
1087
+
1088
+ const safeX = Math.max(10, Math.min(x, screenWidth - 10));
1089
+ await runAdbShell(["input", "tap", String(safeX), String(safeY)]);
1090
+
1091
+ const message = adjusted
1092
+ ? `Tapped at (${safeX}, ${safeY}) [adjusted to avoid ${adjustments.join(", ")}]`
1093
+ : `Tapped at (${safeX}, ${safeY})`;
1094
+ return { content: [{ type: "text", text: message }] };
1095
+ }
1096
+ );
1097
+
1098
+ // =====================================================
1099
+ // TOOL: tap_element
1100
+ // =====================================================
1101
+ server.tool(
1102
+ "tap_element",
1103
+ "Find and tap an element by text or resource-id (more reliable than tap_text)",
1104
+ {
1105
+ text: freeTextSchema.optional().describe("Text to search for"),
1106
+ resourceId: resourceIdSchema.optional().describe("Resource ID to search for"),
1107
+ index: z.number().int().min(0).max(10_000).optional().describe("Index if multiple matches (0-based, default: 0)"),
1108
+ exact: z.boolean().optional().describe("Exact text match (default: false)"),
1109
+ },
1110
+ async ({ text, resourceId, index = 0, exact = false }) => {
1111
+ if (!text && !resourceId) {
1112
+ return { content: [{ type: "text", text: "Error: Must provide either text or resourceId" }] };
1113
+ }
1114
+
1115
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1116
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1117
+
1118
+ let pattern: string;
1119
+ let searchType: string;
1120
+
1121
+ if (resourceId) {
1122
+ const escId = resourceId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1123
+ pattern = `resource-id="${escId}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1124
+ searchType = `resource-id="${resourceId}"`;
1125
+ } else if (exact) {
1126
+ const escapedText = text!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1127
+ pattern = `text="${escapedText}"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1128
+ searchType = `text="${text}"`;
1129
+ } else {
1130
+ const escapedText = text!.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1131
+ pattern = `text="[^"]*${escapedText}[^"]*"[^>]*bounds="\\[(\\d+),(\\d+)\\]\\[(\\d+),(\\d+)\\]"`;
1132
+ searchType = `text containing "${text}"`;
1133
+ }
1134
+
1135
+ const regex = new RegExp(pattern, "gi");
1136
+ const matches: Array<{ x1: number; y1: number; x2: number; y2: number }> = [];
1137
+ let match;
1138
+ while ((match = regex.exec(xml)) !== null) {
1139
+ matches.push({
1140
+ x1: parseInt(match[1]), y1: parseInt(match[2]),
1141
+ x2: parseInt(match[3]), y2: parseInt(match[4]),
1142
+ });
1143
+ }
1144
+
1145
+ if (matches.length === 0) {
1146
+ return { content: [{ type: "text", text: `Element with ${searchType} not found` }] };
1147
+ }
1148
+ if (index >= matches.length) {
1149
+ return {
1150
+ content: [{ type: "text", text: `Index ${index} out of range. Found ${matches.length} matches for ${searchType}` }],
1151
+ };
1152
+ }
1153
+
1154
+ const m = matches[index];
1155
+ const centerX = Math.round((m.x1 + m.x2) / 2);
1156
+ const centerY = Math.round((m.y1 + m.y2) / 2);
1157
+ await runAdbShell(["input", "tap", String(centerX), String(centerY)]);
1158
+
1159
+ return {
1160
+ content: [{
1161
+ type: "text",
1162
+ text: `Tapped element with ${searchType} at (${centerX}, ${centerY})${matches.length > 1 ? ` [match ${index + 1}/${matches.length}]` : ""}`,
1163
+ }],
1164
+ };
1165
+ }
1166
+ );
1167
+
1168
+ // =====================================================
1169
+ // TOOL: get_focused_element
1170
+ // =====================================================
1171
+ server.tool(
1172
+ "get_focused_element",
1173
+ "Get information about the currently focused UI element",
1174
+ {},
1175
+ async () => {
1176
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1177
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1178
+
1179
+ const focusedRegex = /focused="true"[^>]*text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/;
1180
+ const match = focusedRegex.exec(xml);
1181
+
1182
+ if (!match) {
1183
+ const altRegex = /bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"[^>]*focused="true"[^>]*text="([^"]*)"/;
1184
+ const altMatch = altRegex.exec(xml);
1185
+ if (!altMatch) {
1186
+ return { content: [{ type: "text", text: JSON.stringify({ focused: false, element: null }) }] };
1187
+ }
1188
+ const [, x1, y1, x2, y2, text] = altMatch;
1189
+ return {
1190
+ content: [{
1191
+ type: "text",
1192
+ text: JSON.stringify({
1193
+ focused: true,
1194
+ element: {
1195
+ text,
1196
+ bounds: { x: parseInt(x1), y: parseInt(y1), width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1) },
1197
+ center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1198
+ },
1199
+ }, null, 2),
1200
+ }],
1201
+ };
1202
+ }
1203
+
1204
+ const [, text, x1, y1, x2, y2] = match;
1205
+ return {
1206
+ content: [{
1207
+ type: "text",
1208
+ text: JSON.stringify({
1209
+ focused: true,
1210
+ element: {
1211
+ text,
1212
+ bounds: { x: parseInt(x1), y: parseInt(y1), width: parseInt(x2) - parseInt(x1), height: parseInt(y2) - parseInt(y1) },
1213
+ center: { x: Math.round((parseInt(x1) + parseInt(x2)) / 2), y: Math.round((parseInt(y1) + parseInt(y2)) / 2) },
1214
+ },
1215
+ }, null, 2),
1216
+ }],
1217
+ };
1218
+ }
1219
+ );
1220
+
1221
+ // =====================================================
1222
+ // TOOL: assert_screen_contains
1223
+ // =====================================================
1224
+ server.tool(
1225
+ "assert_screen_contains",
1226
+ "Assert that specific text is visible on screen (useful for testing)",
1227
+ {
1228
+ text: freeTextSchema.describe("Text that should be visible"),
1229
+ exact: z.boolean().optional().describe("Exact match (default: false)"),
1230
+ },
1231
+ async ({ text, exact = false }) => {
1232
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1233
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1234
+ const found = exact
1235
+ ? xml.includes(`text="${text}"`)
1236
+ : xml.toLowerCase().includes(text.toLowerCase());
1237
+ return {
1238
+ content: [{
1239
+ type: "text",
1240
+ text: JSON.stringify({ assertion: found ? "PASS" : "FAIL", expected: text, found }, null, 2),
1241
+ }],
1242
+ };
1243
+ }
1244
+ );
1245
+
1246
+ // =====================================================
1247
+ // TOOL: get_all_text
1248
+ // =====================================================
1249
+ server.tool(
1250
+ "get_all_text",
1251
+ "Get all visible text elements on screen (useful for debugging and verification)",
1252
+ {
1253
+ includeEmpty: z.boolean().optional().describe("Include elements with empty text (default: false)"),
1254
+ },
1255
+ async ({ includeEmpty = false }) => {
1256
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1257
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1258
+
1259
+ const texts: Array<{ text: string; centerX: number; centerY: number }> = [];
1260
+ const regex = /text="([^"]*)"[^>]*bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/g;
1261
+ let match;
1262
+ while ((match = regex.exec(xml)) !== null) {
1263
+ const [, text, x1, y1, x2, y2] = match;
1264
+ if (text || includeEmpty) {
1265
+ texts.push({
1266
+ text: text || "(empty)",
1267
+ centerX: Math.round((parseInt(x1) + parseInt(x2)) / 2),
1268
+ centerY: Math.round((parseInt(y1) + parseInt(y2)) / 2),
1269
+ });
1270
+ }
1271
+ }
1272
+ texts.sort((a, b) => a.centerY - b.centerY || a.centerX - b.centerX);
1273
+ const textList = texts.map((t) => `"${t.text}" at (${t.centerX}, ${t.centerY})`).join("\n");
1274
+ return {
1275
+ content: [{ type: "text", text: `Found ${texts.length} text elements:\n${textList}` }],
1276
+ };
1277
+ }
1278
+ );
1279
+
1280
+ // =====================================================
1281
+ // TOOL: get_clickable_elements
1282
+ // =====================================================
1283
+ server.tool(
1284
+ "get_clickable_elements",
1285
+ "Get all clickable elements on screen with their text, resource-id, and coordinates (useful when tap_text fails)",
1286
+ {
1287
+ includeDisabled: z.boolean().optional().describe("Include disabled elements (default: false)"),
1288
+ },
1289
+ async ({ includeDisabled = false }) => {
1290
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1291
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1292
+
1293
+ const elements: Array<{
1294
+ text: string; resourceId: string; className: string;
1295
+ centerX: number; centerY: number; bounds: string;
1296
+ }> = [];
1297
+
1298
+ const regex = /<node[^>]*clickable="true"[^>]*>/g;
1299
+ let nodeMatch;
1300
+ while ((nodeMatch = regex.exec(xml)) !== null) {
1301
+ const node = nodeMatch[0];
1302
+ if (!includeDisabled && node.includes('enabled="false"')) continue;
1303
+
1304
+ const textMatch = node.match(/text="([^"]*)"/);
1305
+ const resourceIdMatch = node.match(/resource-id="([^"]*)"/);
1306
+ const classMatch = node.match(/class="([^"]*)"/);
1307
+ const boundsMatch = node.match(/bounds="\[(\d+),(\d+)\]\[(\d+),(\d+)\]"/);
1308
+
1309
+ if (boundsMatch) {
1310
+ const [, x1, y1, x2, y2] = boundsMatch;
1311
+ const centerX = Math.round((parseInt(x1) + parseInt(x2)) / 2);
1312
+ const centerY = Math.round((parseInt(y1) + parseInt(y2)) / 2);
1313
+ elements.push({
1314
+ text: textMatch ? textMatch[1] : "",
1315
+ resourceId: resourceIdMatch ? resourceIdMatch[1] : "",
1316
+ className: classMatch ? classMatch[1].split(".").pop() || "" : "",
1317
+ centerX, centerY,
1318
+ bounds: `[${x1},${y1}][${x2},${y2}]`,
1319
+ });
1320
+ }
1321
+ }
1322
+
1323
+ elements.sort((a, b) => a.centerY - b.centerY || a.centerX - b.centerX);
1324
+ const formatted = elements.map((el, i) => {
1325
+ const parts: string[] = [];
1326
+ if (el.text) parts.push(`text="${el.text}"`);
1327
+ if (el.resourceId) parts.push(`id="${el.resourceId.split("/").pop()}"`);
1328
+ if (el.className) parts.push(`[${el.className}]`);
1329
+ return `${i + 1}. ${parts.join(" ") || "(no text/id)"} at (${el.centerX}, ${el.centerY})`;
1330
+ }).join("\n");
1331
+
1332
+ return {
1333
+ content: [{ type: "text", text: `Found ${elements.length} clickable elements:\n${formatted}` }],
1334
+ };
1335
+ }
1336
+ );
1337
+
1338
+ // =====================================================
1339
+ // TOOL: is_keyboard_visible
1340
+ // =====================================================
1341
+ server.tool(
1342
+ "is_keyboard_visible",
1343
+ "Check if the soft keyboard is currently visible on screen",
1344
+ {},
1345
+ async () => {
1346
+ let isShowingViaIme = false;
1347
+ let hasKeyboardWindow = false;
1348
+ let heightMethod = false;
1349
+
1350
+ try {
1351
+ const imeDump = await runAdbShell(["dumpsys", "input_method"]);
1352
+ isShowingViaIme = imeDump
1353
+ .split("\n")
1354
+ .some((l) => /mInputShown=true/.test(l));
1355
+ } catch { /* ignore */ }
1356
+
1357
+ try {
1358
+ const windowDump = await runAdbShell(["dumpsys", "window", "windows"]);
1359
+ hasKeyboardWindow = windowDump
1360
+ .split("\n")
1361
+ .some((l) => /inputmethod/i.test(l) && /mHasSurface=true/.test(l));
1362
+ } catch { /* ignore */ }
1363
+
1364
+ try {
1365
+ const win = await runAdbShell(["dumpsys", "window"]);
1366
+ const sizeOutput = await runAdbShell(["wm", "size"]);
1367
+ const sizeMatch = sizeOutput.match(/(\d+)x(\d+)/);
1368
+ const visibleFrame = win.split("\n").find((l) => /mVisibleFrame/.test(l)) || "";
1369
+ if (sizeMatch && visibleFrame) {
1370
+ const screenHeight = parseInt(sizeMatch[2]);
1371
+ const frameMatch = visibleFrame.match(/mVisibleFrame=\[\d+,\d+\]\[\d+,(\d+)\]/);
1372
+ if (frameMatch) {
1373
+ const visibleHeight = parseInt(frameMatch[1]);
1374
+ heightMethod = visibleHeight < screenHeight * 0.8;
1375
+ }
1376
+ }
1377
+ } catch { /* ignore */ }
1378
+
1379
+ const isVisible = isShowingViaIme || hasKeyboardWindow || heightMethod;
1380
+ return {
1381
+ content: [{
1382
+ type: "text",
1383
+ text: JSON.stringify({
1384
+ visible: isVisible,
1385
+ checks: {
1386
+ inputMethodShown: isShowingViaIme,
1387
+ keyboardWindowVisible: hasKeyboardWindow,
1388
+ heightReduced: heightMethod,
1389
+ },
1390
+ }, null, 2),
1391
+ }],
1392
+ };
1393
+ }
1394
+ );
1395
+
1396
+ // =====================================================
1397
+ // TOOL: get_focused_input_value
1398
+ // =====================================================
1399
+ server.tool(
1400
+ "get_focused_input_value",
1401
+ "Get the current text value of the focused input field",
1402
+ {},
1403
+ async () => {
1404
+ await runAdbShell(["uiautomator", "dump", "/sdcard/ui_dump.xml"]);
1405
+ const xml = await runAdbShell(["cat", "/sdcard/ui_dump.xml"]);
1406
+
1407
+ const patterns = [
1408
+ /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*focused="true"[^>]*text="([^"]*)"/gi,
1409
+ /class="[^"]*(?:Edit|Input|Text)[^"]*"[^>]*text="([^"]*)"[^>]*focused="true"/gi,
1410
+ /focused="true"[^>]*text="([^"]*)"[^>]*class="[^"]*(?:Edit|Input|Text)[^"]*"/gi,
1411
+ ];
1412
+
1413
+ for (const pattern of patterns) {
1414
+ const match = pattern.exec(xml);
1415
+ if (match) {
1416
+ return {
1417
+ content: [{
1418
+ type: "text",
1419
+ text: JSON.stringify({
1420
+ found: true, value: match[1], isEmpty: match[1] === "",
1421
+ }, null, 2),
1422
+ }],
1423
+ };
1424
+ }
1425
+ }
1426
+
1427
+ const broadPattern = /focused="true"[^>]*text="([^"]*)"|text="([^"]*)"[^>]*focused="true"/gi;
1428
+ const broadMatch = broadPattern.exec(xml);
1429
+ if (broadMatch) {
1430
+ const value = broadMatch[1] || broadMatch[2] || "";
1431
+ return {
1432
+ content: [{
1433
+ type: "text",
1434
+ text: JSON.stringify({
1435
+ found: true, value, isEmpty: value === "",
1436
+ note: "Found focused element (may not be an input field)",
1437
+ }, null, 2),
1438
+ }],
1439
+ };
1440
+ }
1441
+
1442
+ return {
1443
+ content: [{
1444
+ type: "text",
1445
+ text: JSON.stringify({ found: false, value: null, error: "No focused input field found" }, null, 2),
1446
+ }],
1447
+ };
1448
+ }
1449
+ );
1450
+
1451
+ // =====================================================
1452
+ // Start server
1453
+ // =====================================================
1454
+ async function main() {
1455
+ const transport = new StdioServerTransport();
1456
+ await server.connect(transport);
1457
+ console.error("MCP Android Emulator Server running on stdio");
1458
+ }
1459
+
1460
+ main().catch((err) => {
1461
+ console.error(err);
1462
+ process.exit(1);
1463
+ });