mobile-device-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +181 -0
  2. package/dist/ai/analyzer.d.ts +98 -0
  3. package/dist/ai/analyzer.d.ts.map +1 -0
  4. package/dist/ai/analyzer.js +451 -0
  5. package/dist/ai/analyzer.js.map +1 -0
  6. package/dist/ai/client.d.ts +92 -0
  7. package/dist/ai/client.d.ts.map +1 -0
  8. package/dist/ai/client.js +281 -0
  9. package/dist/ai/client.js.map +1 -0
  10. package/dist/ai/element-search.d.ts +12 -0
  11. package/dist/ai/element-search.d.ts.map +1 -0
  12. package/dist/ai/element-search.js +387 -0
  13. package/dist/ai/element-search.js.map +1 -0
  14. package/dist/ai/prompts.d.ts +27 -0
  15. package/dist/ai/prompts.d.ts.map +1 -0
  16. package/dist/ai/prompts.js +153 -0
  17. package/dist/ai/prompts.js.map +1 -0
  18. package/dist/drivers/android/adb.d.ts +21 -0
  19. package/dist/drivers/android/adb.d.ts.map +1 -0
  20. package/dist/drivers/android/adb.js +122 -0
  21. package/dist/drivers/android/adb.js.map +1 -0
  22. package/dist/drivers/android/index.d.ts +70 -0
  23. package/dist/drivers/android/index.d.ts.map +1 -0
  24. package/dist/drivers/android/index.js +529 -0
  25. package/dist/drivers/android/index.js.map +1 -0
  26. package/dist/index.d.ts +3 -0
  27. package/dist/index.d.ts.map +1 -0
  28. package/dist/index.js +131 -0
  29. package/dist/index.js.map +1 -0
  30. package/dist/server.d.ts +13 -0
  31. package/dist/server.d.ts.map +1 -0
  32. package/dist/server.js +41 -0
  33. package/dist/server.js.map +1 -0
  34. package/dist/tools/ai-tools.d.ts +11 -0
  35. package/dist/tools/ai-tools.d.ts.map +1 -0
  36. package/dist/tools/ai-tools.js +238 -0
  37. package/dist/tools/ai-tools.js.map +1 -0
  38. package/dist/tools/app-tools.d.ts +4 -0
  39. package/dist/tools/app-tools.d.ts.map +1 -0
  40. package/dist/tools/app-tools.js +222 -0
  41. package/dist/tools/app-tools.js.map +1 -0
  42. package/dist/tools/device-tools.d.ts +4 -0
  43. package/dist/tools/device-tools.d.ts.map +1 -0
  44. package/dist/tools/device-tools.js +104 -0
  45. package/dist/tools/device-tools.js.map +1 -0
  46. package/dist/tools/index.d.ts +21 -0
  47. package/dist/tools/index.d.ts.map +1 -0
  48. package/dist/tools/index.js +30 -0
  49. package/dist/tools/index.js.map +1 -0
  50. package/dist/tools/interaction-tools.d.ts +4 -0
  51. package/dist/tools/interaction-tools.d.ts.map +1 -0
  52. package/dist/tools/interaction-tools.js +304 -0
  53. package/dist/tools/interaction-tools.js.map +1 -0
  54. package/dist/tools/log-tools.d.ts +4 -0
  55. package/dist/tools/log-tools.d.ts.map +1 -0
  56. package/dist/tools/log-tools.js +60 -0
  57. package/dist/tools/log-tools.js.map +1 -0
  58. package/dist/tools/screen-tools.d.ts +4 -0
  59. package/dist/tools/screen-tools.d.ts.map +1 -0
  60. package/dist/tools/screen-tools.js +105 -0
  61. package/dist/tools/screen-tools.js.map +1 -0
  62. package/dist/types.d.ts +219 -0
  63. package/dist/types.d.ts.map +1 -0
  64. package/dist/types.js +19 -0
  65. package/dist/types.js.map +1 -0
  66. package/dist/utils/discovery.d.ts +20 -0
  67. package/dist/utils/discovery.d.ts.map +1 -0
  68. package/dist/utils/discovery.js +156 -0
  69. package/dist/utils/discovery.js.map +1 -0
  70. package/dist/utils/image.d.ts +46 -0
  71. package/dist/utils/image.d.ts.map +1 -0
  72. package/dist/utils/image.js +170 -0
  73. package/dist/utils/image.js.map +1 -0
  74. package/package.json +69 -0
package/README.md ADDED
@@ -0,0 +1,181 @@
1
+ # mobile-device-mcp
2
+
3
+ MCP server that gives AI coding assistants (Claude Code, Cursor, Windsurf) the ability to **see and interact with mobile devices**. 26 tools for screenshots, UI inspection, touch interaction, and AI-powered visual analysis.
4
+
5
+ > AI assistants can read your code but can't see your phone. This fixes that.
6
+
7
+ ## The Problem
8
+
9
+ Web developers have browser DevTools, Playwright, and Puppeteer — AI assistants can click around, take screenshots, and verify fixes. Mobile developers? They're stuck manually screenshotting, copying logs, and describing what's on screen. They're **human middleware** between the AI and the device.
10
+
11
+ ## What This Does
12
+
13
+ ```
14
+ Developer: "The login button doesn't work"
15
+
16
+ Without this tool: With this tool:
17
+ 1. Manually screenshot 1. AI calls take_screenshot → sees the screen
18
+ 2. Paste into AI chat 2. AI calls smart_tap("login button") → taps it
19
+ 3. AI guesses what's wrong 3. AI calls verify_screen("error message shown") → sees result
20
+ 4. Apply fix, rebuild 4. AI calls visual_diff → confirms fix worked
21
+ 5. Repeat 4-5 times 5. Done.
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ### Prerequisites
27
+ - Node.js 18+
28
+ - Android device/emulator connected via ADB
29
+ - ADB installed (Android SDK Platform Tools)
30
+
31
+ ### Install & Run
32
+
33
+ ```bash
34
+ # Clone and build
35
+ git clone https://github.com/saranshbamania/mobile-device-mcp.git
36
+ cd mobile-device-mcp
37
+ npm install
38
+ npm run build
39
+
40
+ # Run (auto-discovers ADB and connected devices)
41
+ node dist/index.js
42
+ ```
43
+
44
+ ### Configure with Claude Code
45
+
46
+ Add to your Claude Code MCP settings (`~/.claude/settings.json`):
47
+
48
+ ```json
49
+ {
50
+ "mcpServers": {
51
+ "mobile-device": {
52
+ "command": "node",
53
+ "args": ["/path/to/mobile-device-mcp/dist/index.js"],
54
+ "env": {
55
+ "GOOGLE_API_KEY": "your-google-api-key",
56
+ "ANTHROPIC_API_KEY": "your-anthropic-api-key"
57
+ }
58
+ }
59
+ }
60
+ }
61
+ ```
62
+
63
+ ### Configure with Cursor
64
+
65
+ Add to `.cursor/mcp.json`:
66
+
67
+ ```json
68
+ {
69
+ "mcpServers": {
70
+ "mobile-device": {
71
+ "command": "node",
72
+ "args": ["/path/to/mobile-device-mcp/dist/index.js"],
73
+ "env": {
74
+ "GOOGLE_API_KEY": "your-google-api-key"
75
+ }
76
+ }
77
+ }
78
+ }
79
+ ```
80
+
81
+ ## Tools (26 total)
82
+
83
+ ### Phase 1 — Device Control (18 tools)
84
+
85
+ | Tool | What it does |
86
+ |------|-------------|
87
+ | `list_devices` | List all connected Android devices/emulators |
88
+ | `get_device_info` | Model, manufacturer, Android version, SDK level |
89
+ | `get_screen_size` | Screen resolution in pixels |
90
+ | `take_screenshot` | Capture PNG screenshot (returned as base64 image) |
91
+ | `get_ui_elements` | Get the accessibility/UI element tree as structured JSON |
92
+ | `tap` | Tap at coordinates |
93
+ | `double_tap` | Double tap at coordinates |
94
+ | `long_press` | Long press at coordinates |
95
+ | `swipe` | Swipe between two points |
96
+ | `type_text` | Type text into the focused field |
97
+ | `press_key` | Press a key (home, back, enter, volume, etc.) |
98
+ | `list_apps` | List installed apps |
99
+ | `get_current_app` | Get the foreground app |
100
+ | `launch_app` | Launch an app by package name |
101
+ | `stop_app` | Force stop an app |
102
+ | `install_app` | Install an APK |
103
+ | `uninstall_app` | Uninstall an app |
104
+ | `get_logs` | Get logcat entries with filtering |
105
+
106
+ ### Phase 2 — AI Visual Analysis (8 tools)
107
+
108
+ These tools use AI vision (Claude or Gemini) to understand what's on screen. Requires `ANTHROPIC_API_KEY` or `GOOGLE_API_KEY`.
109
+
110
+ | Tool | What it does |
111
+ |------|-------------|
112
+ | `analyze_screen` | AI describes the screen: app name, screen type, interactive elements, visible text, suggestions |
113
+ | `find_element` | Find a UI element by description: *"the login button"*, *"email input field"* |
114
+ | `smart_tap` | Find an element by description and tap it in one step |
115
+ | `smart_type` | Find an input field by description, focus it, and type text |
116
+ | `suggest_actions` | Plan actions to achieve a goal: *"log into the app"*, *"add item to cart"* |
117
+ | `visual_diff` | Compare current screen with a previous screenshot — what changed? |
118
+ | `extract_text` | Extract all visible text from the screen (AI-powered OCR) |
119
+ | `verify_screen` | Verify an assertion: *"the login was successful"*, *"error message is showing"* |
120
+
121
+ ## Environment Variables
122
+
123
+ | Variable | Description | Default |
124
+ |----------|-------------|---------|
125
+ | `ANTHROPIC_API_KEY` | Anthropic API key for Claude vision | — |
126
+ | `GOOGLE_API_KEY` or `GEMINI_API_KEY` | Google API key for Gemini vision | — |
127
+ | `MCP_AI_PROVIDER` | Force AI provider: `"anthropic"` or `"google"` | Auto-detected |
128
+ | `MCP_AI_MODEL` | Override AI model | `claude-sonnet-4-20250514` / `gemini-2.5-flash` |
129
+ | `MCP_ADB_PATH` | Custom ADB binary path | Auto-discovered |
130
+ | `MCP_DEFAULT_DEVICE` | Default device serial | Auto-discovered |
131
+ | `MCP_SCREENSHOT_FORMAT` | `"png"` or `"jpeg"` | `png` |
132
+ | `MCP_SCREENSHOT_QUALITY` | JPEG quality (1-100) | `80` |
133
+ | `MCP_AI_SCREENSHOT` | Send screenshots to AI (`"true"`/`"false"`) | `true` |
134
+ | `MCP_AI_UITREE` | Send UI tree to AI (`"true"`/`"false"`) | `true` |
135
+
136
+ ## Architecture
137
+
138
+ ```
139
+ src/
140
+ ├── index.ts # CLI entry point (auto-discovery, env config)
141
+ ├── server.ts # MCP server factory
142
+ ├── types.ts # Shared interfaces
143
+ ├── drivers/android/ # ADB driver (DeviceDriver implementation)
144
+ │ ├── adb.ts # Low-level ADB command wrapper
145
+ │ └── index.ts # AndroidDriver class
146
+ ├── tools/ # MCP tool registrations
147
+ │ ├── device-tools.ts # Device management
148
+ │ ├── screen-tools.ts # Screenshots & UI inspection
149
+ │ ├── interaction-tools.ts # Touch, type, keys
150
+ │ ├── app-tools.ts # App management
151
+ │ ├── log-tools.ts # Logcat
152
+ │ └── ai-tools.ts # AI-powered tools
153
+ ├── ai/ # AI visual analysis engine
154
+ │ ├── client.ts # Multi-provider client (Anthropic + Google)
155
+ │ ├── prompts.ts # System prompts & UI element summarizer
156
+ │ └── analyzer.ts # ScreenAnalyzer orchestrator
157
+ └── utils/
158
+ ├── discovery.ts # ADB auto-discovery
159
+ └── image.ts # PNG parsing utilities
160
+ ```
161
+
162
+ ## Roadmap
163
+
164
+ - [x] Phase 1: Android ADB device control (18 tools)
165
+ - [x] Phase 2: AI visual analysis layer (8 tools)
166
+ - [x] Multi-provider AI (Anthropic Claude + Google Gemini)
167
+ - [ ] Phase 3: Flutter widget tree integration (Dart VM Service Protocol)
168
+ - [ ] Phase 4: iOS support (simulators via xcrun simctl, devices via idevice)
169
+ - [ ] Phase 5: Monetization (license keys, usage analytics)
170
+ - [ ] npm publish (`npx mobile-device-mcp`)
171
+ - [ ] Screenshot compression pipeline (JPEG, thumbnail mode)
172
+ - [ ] Multi-device orchestration
173
+
174
+ ## Tested On
175
+
176
+ - Pixel 8, Android 16, SDK 36 — 13/13 device tests passed
177
+ - Windows 11 + ADB over TCP
178
+
179
+ ## License
180
+
181
+ MIT
@@ -0,0 +1,98 @@
1
+ import type { ScreenAnalysis, ElementMatch, VisualDiff, ActionPlan, ScreenVerification, AnalyzedElement, DeviceDriver, ScreenshotOptions } from "../types.js";
2
+ import { AIClient } from "./client.js";
3
+ export declare class ScreenAnalyzer {
4
+ private client;
5
+ private driver;
6
+ private config;
7
+ private cache;
8
+ private cacheTTL;
9
+ /** Screenshot options used for AI analysis (compressed for performance). */
10
+ private screenshotOptions;
11
+ constructor(client: AIClient, driver: DeviceDriver, config: {
12
+ provider: "anthropic" | "google";
13
+ analyzeWithScreenshot: boolean;
14
+ analyzeWithUITree: boolean;
15
+ apiKey: string;
16
+ model: string;
17
+ maxTokens: number;
18
+ }, screenshotOptions?: ScreenshotOptions);
19
+ /**
20
+ * Analyze the current screen state — identifies elements, screen
21
+ * type, visible text, and actionable suggestions.
22
+ */
23
+ analyzeScreen(deviceId: string): Promise<ScreenAnalysis>;
24
+ /**
25
+ * Find an element on screen by natural language description.
26
+ *
27
+ * Fast path: searches the UI tree locally first. If a high-confidence
28
+ * match is found (>0.7), returns immediately without calling the AI.
29
+ * Falls back to AI for ambiguous or complex queries.
30
+ */
31
+ findElement(deviceId: string, query: string): Promise<ElementMatch>;
32
+ /**
33
+ * Plan a sequence of actions to achieve a goal on the current screen.
34
+ */
35
+ suggestActions(deviceId: string, goal: string): Promise<ActionPlan>;
36
+ /**
37
+ * Compare the current screen with a previous screenshot and
38
+ * describe what changed.
39
+ */
40
+ compareScreenshots(deviceId: string, beforeBase64: string): Promise<VisualDiff>;
41
+ /**
42
+ * Extract all readable text from the current screen.
43
+ */
44
+ extractText(deviceId: string): Promise<string[]>;
45
+ /**
46
+ * Verify an assertion about the current screen state.
47
+ */
48
+ verifyScreen(deviceId: string, assertion: string): Promise<ScreenVerification>;
49
+ /**
50
+ * Smart tap: find an element by natural language description and
51
+ * tap its center.
52
+ */
53
+ smartTap(deviceId: string, elementDescription: string): Promise<{
54
+ success: boolean;
55
+ tapped: AnalyzedElement | null;
56
+ message: string;
57
+ }>;
58
+ /**
59
+ * Smart type: find an input field by description, tap to focus it,
60
+ * then type the provided text.
61
+ */
62
+ smartType(deviceId: string, fieldDescription: string, text: string): Promise<{
63
+ success: boolean;
64
+ field: AnalyzedElement | null;
65
+ message: string;
66
+ }>;
67
+ /**
68
+ * Fetch UI elements for the device, using the cache when valid.
69
+ * This is a lightweight call (no screenshot) used by the local
70
+ * element search fast path.
71
+ */
72
+ private getUIElements;
73
+ /**
74
+ * Capture screenshot and/or UI elements based on config flags.
75
+ * Uses caching with TTL and parallel capture for performance.
76
+ */
77
+ private captureContext;
78
+ /**
79
+ * Check if a cached entry is still valid based on TTL.
80
+ */
81
+ private isCacheValid;
82
+ /**
83
+ * Invalidate the cache — call after actions that change the screen.
84
+ * Only clears the screenshot. UI element positions (button layout)
85
+ * rarely change after a tap, so keep the UI tree cache to avoid
86
+ * expensive uiautomator dump calls on every interaction.
87
+ */
88
+ private invalidateCache;
89
+ /**
90
+ * Throw if the AI client is not available (no API key configured).
91
+ */
92
+ private assertClientAvailable;
93
+ /**
94
+ * Promise-based delay helper.
95
+ */
96
+ private delay;
97
+ }
98
+ //# sourceMappingURL=analyzer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../../src/ai/analyzer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EACV,cAAc,EACd,YAAY,EACZ,UAAU,EACV,UAAU,EACV,kBAAkB,EAClB,eAAe,EAEf,YAAY,EACZ,iBAAiB,EAGlB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAgLvC,qBAAa,cAAc;IAWvB,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,MAAM;IACd,OAAO,CAAC,MAAM;IAZhB,OAAO,CAAC,KAAK,CAGN;IACP,OAAO,CAAC,QAAQ,CAAgB;IAEhC,4EAA4E;IAC5E,OAAO,CAAC,iBAAiB,CAAoB;gBAGnC,MAAM,EAAE,QAAQ,EAChB,MAAM,EAAE,YAAY,EACpB,MAAM,EAAE;QACd,QAAQ,EAAE,WAAW,GAAG,QAAQ,CAAC;QACjC,qBAAqB,EAAE,OAAO,CAAC;QAC/B,iBAAiB,EAAE,OAAO,CAAC;QAC3B,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;KACnB,EACD,iBAAiB,CAAC,EAAE,iBAAiB;IAcvC;;;OAGG;IACG,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC;IAsB9D;;;;;;OAMG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,CAAC;IAkCxB;;OAEG;IACG,cAAc,CAClB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,UAAU,CAAC;IAmCtB;;;OAGG;IACG,kBAAkB,CACtB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,UAAU,CAAC;IAkCtB;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAuBtD;;OAEG;IACG,YAAY,CAChB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,kBAAkB,CAAC;IAsB9B;;;OAGG;IACG,QAAQ,CACZ,QAAQ,EAAE,MAAM,EAChB,kBAAkB,EAAE,MAAM,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,OAAO,CAAC;QACjB,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;QAC/B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;IA8BF;;;OAGG;IACG,SAAS,CACb,QAAQ,EAAE,MAAM,EAChB,gBAAgB,EAAE,MAAM,EACxB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC;QACT,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,EAAE,eAAe,GAAG,IAAI,CAAC;QAC9B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;IA0CF;;;;OAIG;YACW,aAAa;IAmB3B;;;OAGG;YACW,cAAc;IAuD5B;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAIvB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAQ7B;;OAEG;IACH,OAAO,CAAC,KAAK;CAGd"}