agi-android-mcp 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Build package
22
+ run: |
23
+ pip install build
24
+ python -m build
25
+
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,6 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .env
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AGI Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-android-mcp
3
+ Version: 0.0.1
4
+ Summary: Control any Android phone from Claude, Cursor, or any MCP client — via ADB
5
+ Project-URL: Homepage, https://github.com/agi-inc/agi-android-mcp
6
+ Project-URL: Repository, https://github.com/agi-inc/agi-android-mcp
7
+ Project-URL: Issues, https://github.com/agi-inc/agi-android-mcp/issues
8
+ Author-email: AGI Inc <tech@agi.tech>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: adb,android,automation,claude,llm,mcp
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Topic :: Software Development :: Testing
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: mcp[cli]>=1.0.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # agi-android-mcp
27
+
28
+ Control any Android phone from Claude Code, Cursor, or any MCP client — just ADB, no app required.
29
+
30
+ ![Demo](demo_screenshot.png)
31
+
32
+ ## How it works
33
+
34
+ ```
35
+ Claude Code ──(MCP stdio)──► agi-android-mcp ──(ADB)──► Android Phone
36
+ ```
37
+
38
+ A lightweight Python MCP server that translates tool calls into `adb` commands. Works with any Android phone that has USB debugging enabled. No proprietary dependencies.
39
+
40
+ ## Quick Start
41
+
42
+ ### 1. Install
43
+
44
+ ```bash
45
+ pip install agi-android-mcp
46
+ ```
47
+
48
+ ### 2. Plug in your phone
49
+
50
+ Enable USB debugging (Settings > Developer options > USB debugging), connect via USB, and verify:
51
+
52
+ ```bash
53
+ adb devices
54
+ # emulator-5554 device
55
+ ```
56
+
57
+ ### 3. Add to Claude Code
58
+
59
+ Add to `~/.claude/claude_code_config.json`:
60
+
61
+ ```json
62
+ {
63
+ "mcpServers": {
64
+ "android": {
65
+ "command": "agi-android-mcp"
66
+ }
67
+ }
68
+ }
69
+ ```
70
+
71
+ Restart Claude Code. That's it. Tell Claude:
72
+
73
+ > "Take a screenshot of my phone"
74
+
75
+ > "Open Chrome and search for the weather"
76
+
77
+ > "Launch Settings and turn on dark mode"
78
+
79
+ Claude will take screenshots, reason about the UI, and tap/type/swipe to accomplish the task.
80
+
81
+ ### Add to Cursor
82
+
83
+ Add to `.cursor/mcp.json`:
84
+
85
+ ```json
86
+ {
87
+ "mcpServers": {
88
+ "android": {
89
+ "command": "agi-android-mcp"
90
+ }
91
+ }
92
+ }
93
+ ```
94
+
95
+ ### Any MCP client
96
+
97
+ The server uses stdio transport. Run `agi-android-mcp` as the command — it speaks MCP over stdin/stdout.
98
+
99
+ ## Tools (18)
100
+
101
+ | Tool | Description |
102
+ |------|-------------|
103
+ | `screenshot` | Take a screenshot, returned as PNG image |
104
+ | `get_screen_size` | Get screen dimensions in pixels |
105
+ | `tap(x, y)` | Tap at pixel coordinates |
106
+ | `double_tap(x, y)` | Double-tap at pixel coordinates |
107
+ | `long_press(x, y)` | Long-press at pixel coordinates |
108
+ | `type_text(text)` | Type text into focused input field |
109
+ | `press_key(key)` | Press a key (enter, backspace, tab, space, home, back) |
110
+ | `swipe(direction)` | Swipe up/down/left/right from screen center |
111
+ | `drag(start, end)` | Drag between two points |
112
+ | `press_home()` | Press the Home button |
113
+ | `press_back()` | Press the Back button |
114
+ | `open_notifications()` | Open the notification shade |
115
+ | `open_quick_settings()` | Open quick settings panel |
116
+ | `launch_app(package)` | Launch an app by package name |
117
+ | `get_current_app()` | Get the currently visible app/activity |
118
+ | `list_installed_apps()` | List installed packages |
119
+ | `shell(command)` | Run any ADB shell command |
120
+ | `get_device_info()` | Get device model, Android version, screen size, battery |
121
+
122
+ ## Environment Variables
123
+
124
+ | Variable | Default | Description |
125
+ |----------|---------|-------------|
126
+ | `ADB_PATH` | Auto-detected | Path to `adb` binary |
127
+ | `ADB_SERIAL` | (none) | Target a specific device by serial number |
128
+
129
+ Multiple devices? Set `ADB_SERIAL`:
130
+
131
+ ```json
132
+ {
133
+ "mcpServers": {
134
+ "android": {
135
+ "command": "agi-android-mcp",
136
+ "env": {
137
+ "ADB_SERIAL": "XXXXXXXXXXXXXX"
138
+ }
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ ## Agentic Demo
145
+
146
+ `demo.py` runs a full autonomous loop: screenshot → Claude reasons → execute action → repeat.
147
+
148
+ ```bash
149
+ pip install anthropic
150
+ ANTHROPIC_API_KEY=sk-... python demo.py "Open Chrome and search for cats"
151
+ ```
152
+
153
+ ## How It Works
154
+
155
+ 1. MCP server starts over stdio (standard MCP transport)
156
+ 2. When a tool is called, it translates to an `adb` subprocess call
157
+ 3. Screenshots: `adb exec-out screencap -p`
158
+ 4. Input: `adb shell input tap/swipe/text/keyevent`
159
+ 5. Apps: `adb shell am`, `adb shell pm`
160
+
161
+ ## Development
162
+
163
+ ```bash
164
+ pip install -e .
165
+ python -c "from agi_android_mcp.server import mcp; print(len(mcp._tool_manager._tools), 'tools')"
166
+ ```
167
+
168
+ ## License
169
+
170
+ MIT
@@ -0,0 +1,145 @@
1
+ # agi-android-mcp
2
+
3
+ Control any Android phone from Claude Code, Cursor, or any MCP client — just ADB, no app required.
4
+
5
+ ![Demo](demo_screenshot.png)
6
+
7
+ ## How it works
8
+
9
+ ```
10
+ Claude Code ──(MCP stdio)──► agi-android-mcp ──(ADB)──► Android Phone
11
+ ```
12
+
13
+ A lightweight Python MCP server that translates tool calls into `adb` commands. Works with any Android phone that has USB debugging enabled. No proprietary dependencies.
14
+
15
+ ## Quick Start
16
+
17
+ ### 1. Install
18
+
19
+ ```bash
20
+ pip install agi-android-mcp
21
+ ```
22
+
23
+ ### 2. Plug in your phone
24
+
25
+ Enable USB debugging (Settings > Developer options > USB debugging), connect via USB, and verify:
26
+
27
+ ```bash
28
+ adb devices
29
+ # emulator-5554 device
30
+ ```
31
+
32
+ ### 3. Add to Claude Code
33
+
34
+ Add to `~/.claude/claude_code_config.json`:
35
+
36
+ ```json
37
+ {
38
+ "mcpServers": {
39
+ "android": {
40
+ "command": "agi-android-mcp"
41
+ }
42
+ }
43
+ }
44
+ ```
45
+
46
+ Restart Claude Code. That's it. Tell Claude:
47
+
48
+ > "Take a screenshot of my phone"
49
+
50
+ > "Open Chrome and search for the weather"
51
+
52
+ > "Launch Settings and turn on dark mode"
53
+
54
+ Claude will take screenshots, reason about the UI, and tap/type/swipe to accomplish the task.
55
+
56
+ ### Add to Cursor
57
+
58
+ Add to `.cursor/mcp.json`:
59
+
60
+ ```json
61
+ {
62
+ "mcpServers": {
63
+ "android": {
64
+ "command": "agi-android-mcp"
65
+ }
66
+ }
67
+ }
68
+ ```
69
+
70
+ ### Any MCP client
71
+
72
+ The server uses stdio transport. Run `agi-android-mcp` as the command — it speaks MCP over stdin/stdout.
73
+
74
+ ## Tools (18)
75
+
76
+ | Tool | Description |
77
+ |------|-------------|
78
+ | `screenshot` | Take a screenshot, returned as PNG image |
79
+ | `get_screen_size` | Get screen dimensions in pixels |
80
+ | `tap(x, y)` | Tap at pixel coordinates |
81
+ | `double_tap(x, y)` | Double-tap at pixel coordinates |
82
+ | `long_press(x, y)` | Long-press at pixel coordinates |
83
+ | `type_text(text)` | Type text into focused input field |
84
+ | `press_key(key)` | Press a key (enter, backspace, tab, space, home, back) |
85
+ | `swipe(direction)` | Swipe up/down/left/right from screen center |
86
+ | `drag(start, end)` | Drag between two points |
87
+ | `press_home()` | Press the Home button |
88
+ | `press_back()` | Press the Back button |
89
+ | `open_notifications()` | Open the notification shade |
90
+ | `open_quick_settings()` | Open quick settings panel |
91
+ | `launch_app(package)` | Launch an app by package name |
92
+ | `get_current_app()` | Get the currently visible app/activity |
93
+ | `list_installed_apps()` | List installed packages |
94
+ | `shell(command)` | Run any ADB shell command |
95
+ | `get_device_info()` | Get device model, Android version, screen size, battery |
96
+
97
+ ## Environment Variables
98
+
99
+ | Variable | Default | Description |
100
+ |----------|---------|-------------|
101
+ | `ADB_PATH` | Auto-detected | Path to `adb` binary |
102
+ | `ADB_SERIAL` | (none) | Target a specific device by serial number |
103
+
104
+ Multiple devices? Set `ADB_SERIAL`:
105
+
106
+ ```json
107
+ {
108
+ "mcpServers": {
109
+ "android": {
110
+ "command": "agi-android-mcp",
111
+ "env": {
112
+ "ADB_SERIAL": "XXXXXXXXXXXXXX"
113
+ }
114
+ }
115
+ }
116
+ }
117
+ ```
118
+
119
+ ## Agentic Demo
120
+
121
+ `demo.py` runs a full autonomous loop: screenshot → Claude reasons → execute action → repeat.
122
+
123
+ ```bash
124
+ pip install anthropic
125
+ ANTHROPIC_API_KEY=sk-... python demo.py "Open Chrome and search for cats"
126
+ ```
127
+
128
+ ## How It Works
129
+
130
+ 1. MCP server starts over stdio (standard MCP transport)
131
+ 2. When a tool is called, it translates to an `adb` subprocess call
132
+ 3. Screenshots: `adb exec-out screencap -p`
133
+ 4. Input: `adb shell input tap/swipe/text/keyevent`
134
+ 5. Apps: `adb shell am`, `adb shell pm`
135
+
136
+ ## Development
137
+
138
+ ```bash
139
+ pip install -e .
140
+ python -c "from agi_android_mcp.server import mcp; print(len(mcp._tool_manager._tools), 'tools')"
141
+ ```
142
+
143
+ ## License
144
+
145
+ MIT
@@ -0,0 +1,460 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ AGI Android MCP Demo — Let Claude drive your phone via ADB.
4
+
5
+ An agentic loop: screenshot -> Claude decides action -> execute via ADB -> repeat.
6
+
7
+ Usage:
8
+ pip install anthropic
9
+ ANTHROPIC_API_KEY=sk-... python demo.py "Open Chrome and search for cats"
10
+
11
+ Prerequisites:
12
+ 1. ADB installed and in PATH
13
+ 2. Android device connected with USB debugging enabled
14
+ 3. Run 'adb devices' to verify connectivity
15
+ """
16
+
17
+ import argparse
18
+ import base64
19
+ import os
20
+ import shutil
21
+ import subprocess
22
+ import sys
23
+ import time
24
+
25
+ try:
26
+ import anthropic
27
+ except ImportError:
28
+ print("Error: 'anthropic' package required. Install with: pip install anthropic")
29
+ sys.exit(1)
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Colors for terminal output
33
+ # ---------------------------------------------------------------------------
34
+
35
+
36
+ class C:
37
+ BOLD = "\033[1m"
38
+ DIM = "\033[2m"
39
+ CYAN = "\033[36m"
40
+ GREEN = "\033[32m"
41
+ YELLOW = "\033[33m"
42
+ MAGENTA = "\033[35m"
43
+ RED = "\033[31m"
44
+ BLUE = "\033[34m"
45
+ RESET = "\033[0m"
46
+
47
+
48
+ BANNER = f"""{C.CYAN}{C.BOLD}
49
+ ___ ____________ ___ __ _ __
50
+ / | / ____/ _/ | / / /___ ___ __/ /________ (_)___/ /
51
+ / /| |/ / __ / // /| |/ / __ / __|/ / __ / ___/ / / __ /
52
+ / ___ / /_/ // // ___ / / /_/ / / / / /_/ / / / / / /_/ /
53
+ /_/ |_\\____/___/_/ |_/_/\\__,_/_/ /_/\\____/_/ /_/_/\\__,_/
54
+ {C.RESET}{C.DIM}
55
+ Claude x ADB — Android MCP Demo
56
+ {C.RESET}"""
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # ADB helpers
60
+ # ---------------------------------------------------------------------------
61
+
62
+ ADB = os.environ.get("ADB_PATH", shutil.which("adb") or "adb")
63
+ SERIAL = os.environ.get("ADB_SERIAL", "")
64
+
65
+
66
+ def _adb(*args: str, timeout: float = 10.0) -> subprocess.CompletedProcess:
67
+ cmd = [ADB]
68
+ if SERIAL:
69
+ cmd += ["-s", SERIAL]
70
+ cmd += list(args)
71
+ return subprocess.run(cmd, capture_output=True, timeout=timeout)
72
+
73
+
74
+ def _shell(*args: str, timeout: float = 10.0) -> str:
75
+ r = _adb("shell", *args, timeout=timeout)
76
+ return r.stdout.decode("utf-8", errors="replace").strip()
77
+
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Tools for Claude
81
+ # ---------------------------------------------------------------------------
82
+
83
+ SCREEN_W, SCREEN_H = 0, 0
84
+
85
+ TOOLS = [
86
+ {
87
+ "name": "screenshot",
88
+ "description": "Take a screenshot of the Android screen. Call this to see what's on screen.",
89
+ "input_schema": {"type": "object", "properties": {}, "required": []},
90
+ },
91
+ {
92
+ "name": "tap",
93
+ "description": "Tap at (x, y) pixel coordinates on the screen.",
94
+ "input_schema": {
95
+ "type": "object",
96
+ "properties": {
97
+ "x": {"type": "number", "description": "X coordinate in pixels"},
98
+ "y": {"type": "number", "description": "Y coordinate in pixels"},
99
+ },
100
+ "required": ["x", "y"],
101
+ },
102
+ },
103
+ {
104
+ "name": "type_text",
105
+ "description": "Type text into the currently focused input field.",
106
+ "input_schema": {
107
+ "type": "object",
108
+ "properties": {
109
+ "text": {"type": "string", "description": "Text to type"},
110
+ },
111
+ "required": ["text"],
112
+ },
113
+ },
114
+ {
115
+ "name": "swipe",
116
+ "description": "Swipe the screen in a direction (up/down/left/right).",
117
+ "input_schema": {
118
+ "type": "object",
119
+ "properties": {
120
+ "direction": {
121
+ "type": "string",
122
+ "enum": ["up", "down", "left", "right"],
123
+ },
124
+ },
125
+ "required": ["direction"],
126
+ },
127
+ },
128
+ {
129
+ "name": "press_key",
130
+ "description": "Press a key: enter, back, home, backspace.",
131
+ "input_schema": {
132
+ "type": "object",
133
+ "properties": {
134
+ "key": {
135
+ "type": "string",
136
+ "enum": ["enter", "back", "home", "backspace"],
137
+ },
138
+ },
139
+ "required": ["key"],
140
+ },
141
+ },
142
+ {
143
+ "name": "launch_app",
144
+ "description": "Launch an Android app by package name.",
145
+ "input_schema": {
146
+ "type": "object",
147
+ "properties": {
148
+ "package": {"type": "string", "description": "e.g. com.android.chrome"},
149
+ },
150
+ "required": ["package"],
151
+ },
152
+ },
153
+ {
154
+ "name": "long_press",
155
+ "description": "Long-press at (x, y) pixel coordinates.",
156
+ "input_schema": {
157
+ "type": "object",
158
+ "properties": {
159
+ "x": {"type": "number"},
160
+ "y": {"type": "number"},
161
+ },
162
+ "required": ["x", "y"],
163
+ },
164
+ },
165
+ {
166
+ "name": "done",
167
+ "description": "Call this when the task is complete. Include a summary of what you did.",
168
+ "input_schema": {
169
+ "type": "object",
170
+ "properties": {
171
+ "summary": {"type": "string", "description": "What was accomplished"},
172
+ },
173
+ "required": ["summary"],
174
+ },
175
+ },
176
+ ]
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # Pretty output helpers
181
+ # ---------------------------------------------------------------------------
182
+
183
+
184
+ def log_step(step: int, max_steps: int):
185
+ bar_len = 30
186
+ filled = int(bar_len * step / max_steps)
187
+ bar = "=" * filled + "-" * (bar_len - filled)
188
+ print(f"\n{C.BLUE}{C.BOLD}[{bar}] Step {step}/{max_steps}{C.RESET}")
189
+
190
+
191
+ def log_thinking(text: str):
192
+ for line in text.split("\n"):
193
+ print(f" {C.DIM}{line}{C.RESET}")
194
+
195
+
196
+ def log_action(name: str, args: dict):
197
+ args_str = ""
198
+ if name == "tap":
199
+ args_str = f"({args['x']}, {args['y']})"
200
+ elif name == "type_text":
201
+ args_str = f'"{args["text"]}"'
202
+ elif name == "swipe":
203
+ args_str = args["direction"]
204
+ elif name == "press_key":
205
+ args_str = args["key"]
206
+ elif name == "launch_app":
207
+ args_str = args["package"]
208
+ elif name == "long_press":
209
+ args_str = f"({args['x']}, {args['y']})"
210
+ elif name == "screenshot":
211
+ args_str = "capturing..."
212
+ elif name == "done":
213
+ args_str = args.get("summary", "")[:60]
214
+ print(f" {C.YELLOW}{C.BOLD}{name}{C.RESET} {C.DIM}{args_str}{C.RESET}")
215
+
216
+
217
+ def log_result(name: str, elapsed_ms: int):
218
+ print(f" {C.GREEN}done{C.RESET} {C.DIM}({elapsed_ms}ms){C.RESET}")
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # Tool execution via ADB
223
+ # ---------------------------------------------------------------------------
224
+
225
+
226
+ def exec_tool(name: str, args: dict) -> list:
227
+ """Execute a tool call via ADB and return Anthropic content blocks."""
228
+
229
+ if name == "screenshot":
230
+ r = _adb("exec-out", "screencap", "-p", timeout=15.0)
231
+ if r.returncode != 0 or not r.stdout:
232
+ return [{"type": "text", "text": f"Screenshot failed: {r.stderr.decode()}"}]
233
+ b64 = base64.standard_b64encode(r.stdout).decode("ascii")
234
+ return [
235
+ {"type": "text", "text": f"Here is the current screen ({SCREEN_W}x{SCREEN_H}):"},
236
+ {
237
+ "type": "image",
238
+ "source": {
239
+ "type": "base64",
240
+ "media_type": "image/png",
241
+ "data": b64,
242
+ },
243
+ },
244
+ ]
245
+
246
+ elif name == "tap":
247
+ x, y = int(args["x"]), int(args["y"])
248
+ _shell("input", "tap", str(x), str(y))
249
+ return [{"type": "text", "text": f"Tapped ({x}, {y})"}]
250
+
251
+ elif name == "type_text":
252
+ text = args["text"]
253
+ escaped = text.replace(" ", "%s")
254
+ escaped = escaped.replace("'", "\\'")
255
+ escaped = escaped.replace('"', '\\"')
256
+ escaped = escaped.replace("&", "\\&")
257
+ escaped = escaped.replace("|", "\\|")
258
+ escaped = escaped.replace(";", "\;")
259
+ escaped = escaped.replace("(", "\\(")
260
+ escaped = escaped.replace(")", "\\)")
261
+ _shell("input", "text", escaped)
262
+ return [{"type": "text", "text": f"Typed: {text}"}]
263
+
264
+ elif name == "swipe":
265
+ direction = args["direction"]
266
+ cx, cy = SCREEN_W // 2, SCREEN_H // 2
267
+ dist = int(0.35 * SCREEN_H)
268
+ if direction == "up":
269
+ ex, ey = cx, cy - dist
270
+ elif direction == "down":
271
+ ex, ey = cx, cy + dist
272
+ elif direction == "left":
273
+ ex, ey = cx - dist, cy
274
+ elif direction == "right":
275
+ ex, ey = cx + dist, cy
276
+ else:
277
+ return [{"type": "text", "text": f"Invalid direction: {direction}"}]
278
+ _shell("input", "swipe", str(cx), str(cy), str(ex), str(ey), "300")
279
+ return [{"type": "text", "text": f"Swiped {direction}"}]
280
+
281
+ elif name == "press_key":
282
+ key = args["key"]
283
+ keymap = {"enter": "66", "backspace": "67", "back": "4", "home": "3"}
284
+ keycode = keymap.get(key, key)
285
+ _shell("input", "keyevent", keycode)
286
+ return [{"type": "text", "text": f"Pressed {key}"}]
287
+
288
+ elif name == "launch_app":
289
+ package = args["package"]
290
+ _shell("monkey", "-p", package, "-c", "android.intent.category.LAUNCHER", "1")
291
+ return [{"type": "text", "text": f"Launched {package}"}]
292
+
293
+ elif name == "long_press":
294
+ x, y = int(args["x"]), int(args["y"])
295
+ _shell("input", "swipe", str(x), str(y), str(x), str(y), "1000")
296
+ return [{"type": "text", "text": f"Long-pressed ({x}, {y})"}]
297
+
298
+ elif name == "done":
299
+ return [{"type": "text", "text": args["summary"]}]
300
+
301
+ return [{"type": "text", "text": f"Unknown tool: {name}"}]
302
+
303
+
304
+ # ---------------------------------------------------------------------------
305
+ # Agent loop
306
+ # ---------------------------------------------------------------------------
307
+
308
+ SYSTEM = """\
309
+ You are an Android phone operator. You can see the screen via screenshots and \
310
+ interact using tap, type_text, swipe, press_key, launch_app, and long_press.
311
+
312
+ Screen coordinates are in pixels. The screen is {w}x{h}.
313
+
314
+ Strategy:
315
+ 1. Always start by taking a screenshot to see the current state.
316
+ 2. Decide on one action at a time.
317
+ 3. After each action, take another screenshot to verify the result.
318
+ 4. When the task is complete, call the `done` tool with a summary.
319
+
320
+ Be precise with coordinates — look carefully at the screenshot to identify \
321
+ where UI elements are before tapping.\
322
+ """
323
+
324
+
325
+ def run(task: str, max_steps: int = 25, model: str = "claude-sonnet-4-5-20250929"):
326
+ global SCREEN_W, SCREEN_H
327
+
328
+ client = anthropic.Anthropic()
329
+
330
+ # Check ADB connectivity
331
+ print(f" {C.DIM}Checking ADB connection...{C.RESET}")
332
+ r = _adb("devices", timeout=5.0)
333
+ output = r.stdout.decode("utf-8", errors="replace")
334
+ device_lines = [l for l in output.strip().splitlines()[1:] if l.strip()]
335
+ if not device_lines:
336
+ print(f" {C.RED}{C.BOLD}Error:{C.RESET} {C.RED}No ADB devices found.{C.RESET}")
337
+ print(f" Connect a device with USB debugging enabled and run 'adb devices'.")
338
+ sys.exit(1)
339
+
340
+ # Get screen size
341
+ try:
342
+ wm_output = _shell("wm", "size")
343
+ for line in wm_output.splitlines():
344
+ if "Physical size" in line:
345
+ dims = line.split(":")[-1].strip()
346
+ w, h = dims.split("x")
347
+ SCREEN_W, SCREEN_H = int(w), int(h)
348
+ break
349
+ if SCREEN_W == 0:
350
+ SCREEN_W, SCREEN_H = 1080, 2400
351
+ except Exception:
352
+ SCREEN_W, SCREEN_H = 1080, 2400
353
+
354
+ print(f" {C.GREEN}Connected{C.RESET} | Screen: {SCREEN_W}x{SCREEN_H}")
355
+ print(f" {C.DIM}Model: {model}{C.RESET}")
356
+ print(f" {C.MAGENTA}{C.BOLD}Task:{C.RESET} {task}")
357
+
358
+ system_prompt = SYSTEM.format(w=SCREEN_W, h=SCREEN_H)
359
+ messages = [{"role": "user", "content": f"Task: {task}"}]
360
+ total_tokens = 0
361
+ start_time = time.time()
362
+
363
+ for step in range(1, max_steps + 1):
364
+ log_step(step, max_steps)
365
+
366
+ t0 = time.time()
367
+ response = client.messages.create(
368
+ model=model,
369
+ max_tokens=1024,
370
+ system=system_prompt,
371
+ tools=TOOLS,
372
+ messages=messages,
373
+ )
374
+ api_ms = int((time.time() - t0) * 1000)
375
+ total_tokens += response.usage.input_tokens + response.usage.output_tokens
376
+
377
+ print(
378
+ f" {C.DIM}API: {api_ms}ms | tokens: "
379
+ f"+{response.usage.input_tokens + response.usage.output_tokens}{C.RESET}"
380
+ )
381
+
382
+ # Process response
383
+ assistant_content = response.content
384
+ messages.append({"role": "assistant", "content": assistant_content})
385
+
386
+ # Show Claude's thinking
387
+ for block in assistant_content:
388
+ if block.type == "text" and block.text:
389
+ log_thinking(block.text)
390
+
391
+ # Check stop
392
+ if response.stop_reason == "end_turn":
393
+ print(f"\n {C.DIM}Claude finished (no more tool calls).{C.RESET}")
394
+ break
395
+
396
+ # Execute tool calls
397
+ tool_results = []
398
+ finished = False
399
+ for block in assistant_content:
400
+ if block.type != "tool_use":
401
+ continue
402
+
403
+ name = block.name
404
+ args = block.input
405
+ log_action(name, args)
406
+
407
+ t0 = time.time()
408
+ result_content = exec_tool(name, args)
409
+ exec_ms = int((time.time() - t0) * 1000)
410
+ log_result(name, exec_ms)
411
+
412
+ tool_results.append(
413
+ {
414
+ "type": "tool_result",
415
+ "tool_use_id": block.id,
416
+ "content": result_content,
417
+ }
418
+ )
419
+
420
+ if name == "done":
421
+ finished = True
422
+ print(f"\n{C.GREEN}{C.BOLD}Task Complete{C.RESET}")
423
+ print(f" {args['summary']}")
424
+
425
+ messages.append({"role": "user", "content": tool_results})
426
+
427
+ if finished:
428
+ break
429
+ else:
430
+ print(f"\n {C.YELLOW}Reached max steps ({max_steps}).{C.RESET}")
431
+
432
+ elapsed = time.time() - start_time
433
+ print(f"\n{C.DIM} {elapsed:.1f}s total | {total_tokens:,} tokens{C.RESET}")
434
+
435
+
436
+ # ---------------------------------------------------------------------------
437
+ # CLI
438
+ # ---------------------------------------------------------------------------
439
+
440
+
441
+ def main():
442
+ parser = argparse.ArgumentParser(
443
+ description="Let Claude drive your Android phone via ADB.",
444
+ epilog='Example: python demo.py "Open Settings and enable dark mode"',
445
+ )
446
+ parser.add_argument("task", help="What you want Claude to do on the phone")
447
+ parser.add_argument("--steps", type=int, default=25, help="Max steps (default 25)")
448
+ parser.add_argument(
449
+ "--model",
450
+ default="claude-sonnet-4-5-20250929",
451
+ help="Anthropic model (default: claude-sonnet-4-5-20250929)",
452
+ )
453
+ args = parser.parse_args()
454
+
455
+ print(BANNER)
456
+ run(args.task, max_steps=args.steps, model=args.model)
457
+
458
+
459
+ if __name__ == "__main__":
460
+ main()
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agi-android-mcp"
7
+ version = "0.0.1"
8
+ description = "Control any Android phone from Claude, Cursor, or any MCP client — via ADB"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "AGI Inc", email = "tech@agi.tech" },
14
+ ]
15
+ keywords = ["android", "mcp", "adb", "llm", "claude", "automation"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Software Development :: Libraries",
26
+ "Topic :: Software Development :: Testing",
27
+ ]
28
+ dependencies = [
29
+ "mcp[cli]>=1.0.0",
30
+ ]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/agi-inc/agi-android-mcp"
34
+ Repository = "https://github.com/agi-inc/agi-android-mcp"
35
+ Issues = "https://github.com/agi-inc/agi-android-mcp/issues"
36
+
37
+ [project.scripts]
38
+ agi-android-mcp = "agi_android_mcp.server:main"
@@ -0,0 +1 @@
1
+ """AGI Android MCP Server — ADB transport."""
@@ -0,0 +1,393 @@
1
+ """
2
+ AGI Android MCP Server — control any Android device via ADB.
3
+
4
+ All device interaction goes through `adb` subprocess calls.
5
+ No proprietary dependencies, works with any Android phone that has USB debugging enabled.
6
+ """
7
+
8
+ import base64
9
+ import os
10
+ import shutil
11
+ import subprocess
12
+ import time
13
+
14
+ from mcp.server.fastmcp import FastMCP, Image
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # ADB helpers
18
+ # ---------------------------------------------------------------------------
19
+
20
+ ADB = os.environ.get("ADB_PATH", shutil.which("adb") or "adb")
21
+ SERIAL = os.environ.get("ADB_SERIAL", "")
22
+
23
+
24
+ def _adb(*args: str, timeout: float = 10.0) -> subprocess.CompletedProcess:
25
+ """Run an adb command and return the CompletedProcess."""
26
+ cmd = [ADB]
27
+ if SERIAL:
28
+ cmd += ["-s", SERIAL]
29
+ cmd += list(args)
30
+ return subprocess.run(cmd, capture_output=True, timeout=timeout)
31
+
32
+
33
+ def _shell(*args: str, timeout: float = 10.0) -> str:
34
+ """Run `adb shell <args>` and return stdout as a stripped string."""
35
+ r = _adb("shell", *args, timeout=timeout)
36
+ return r.stdout.decode("utf-8", errors="replace").strip()
37
+
38
+
39
+ def _check_connection() -> str:
40
+ """Verify an ADB device is connected and reachable."""
41
+ r = _adb("devices", timeout=5.0)
42
+ output = r.stdout.decode("utf-8", errors="replace")
43
+ lines = [l for l in output.strip().splitlines()[1:] if l.strip()]
44
+
45
+ if not lines:
46
+ raise RuntimeError(
47
+ "No ADB devices found. Connect a device with USB debugging enabled "
48
+ "and run 'adb devices' to verify."
49
+ )
50
+
51
+ for line in lines:
52
+ parts = line.split("\t")
53
+ if len(parts) >= 2:
54
+ serial, state = parts[0], parts[1]
55
+ if SERIAL and serial != SERIAL:
56
+ continue
57
+ if state == "offline":
58
+ raise RuntimeError(f"Device {serial} is offline.")
59
+ if state == "device":
60
+ return serial
61
+
62
+ if SERIAL:
63
+ raise RuntimeError(
64
+ f"Device with serial '{SERIAL}' not found. "
65
+ f"Available devices:\n{output}"
66
+ )
67
+ raise RuntimeError(f"No usable ADB device found. Output:\n{output}")
68
+
69
+
70
+ # ---------------------------------------------------------------------------
71
+ # MCP Server
72
+ # ---------------------------------------------------------------------------
73
+
74
+ mcp = FastMCP(
75
+ "AGI Android MCP",
76
+ instructions="Control any Android phone via ADB — tap, swipe, type, screenshot, and more.",
77
+ )
78
+
79
+
80
+ # ---------------------------------------------------------------------------
81
+ # Tools
82
+ # ---------------------------------------------------------------------------
83
+
84
+
85
+ @mcp.tool()
86
+ def screenshot() -> Image:
87
+ """Take a screenshot of the Android screen. Returns the current screen as a PNG image."""
88
+ _check_connection()
89
+ r = _adb("exec-out", "screencap", "-p", timeout=15.0)
90
+ if r.returncode != 0:
91
+ raise RuntimeError(
92
+ f"screencap failed: {r.stderr.decode('utf-8', errors='replace')}"
93
+ )
94
+ png_bytes = r.stdout
95
+ if not png_bytes or len(png_bytes) < 8:
96
+ raise RuntimeError("screencap returned empty data")
97
+ return Image(data=png_bytes, format="png")
98
+
99
+
100
+ @mcp.tool()
101
+ def get_screen_size() -> dict:
102
+ """Get the physical screen size of the Android device in pixels."""
103
+ _check_connection()
104
+ output = _shell("wm", "size")
105
+ # Example: "Physical size: 1080x2400"
106
+ for line in output.splitlines():
107
+ if "Physical size" in line:
108
+ dims = line.split(":")[-1].strip()
109
+ w, h = dims.split("x")
110
+ return {"width": int(w), "height": int(h)}
111
+ # Fallback: try parsing override or whatever is there
112
+ if "x" in output:
113
+ parts = output.strip().splitlines()[-1]
114
+ dims = parts.split(":")[-1].strip() if ":" in parts else parts.strip()
115
+ if "x" in dims:
116
+ w, h = dims.split("x")
117
+ return {"width": int(w.strip()), "height": int(h.strip())}
118
+ raise RuntimeError(f"Could not parse screen size from: {output}")
119
+
120
+
121
+ @mcp.tool()
122
+ def tap(x: int, y: int) -> str:
123
+ """Tap at (x, y) pixel coordinates on the screen."""
124
+ _check_connection()
125
+ _shell("input", "tap", str(x), str(y))
126
+ return f"Tapped ({x}, {y})"
127
+
128
+
129
+ @mcp.tool()
130
+ def double_tap(x: int, y: int) -> str:
131
+ """Double-tap at (x, y) pixel coordinates on the screen."""
132
+ _check_connection()
133
+ _shell("input", "tap", str(x), str(y))
134
+ time.sleep(0.1)
135
+ _shell("input", "tap", str(x), str(y))
136
+ return f"Double-tapped ({x}, {y})"
137
+
138
+
139
+ @mcp.tool()
140
+ def long_press(x: int, y: int) -> str:
141
+ """Long-press at (x, y) pixel coordinates (holds for 1 second)."""
142
+ _check_connection()
143
+ _shell("input", "swipe", str(x), str(y), str(x), str(y), "1000")
144
+ return f"Long-pressed ({x}, {y})"
145
+
146
+
147
+ @mcp.tool()
148
+ def type_text(text: str) -> str:
149
+ """Type text into the currently focused input field.
150
+
151
+ Handles spaces and special characters automatically.
152
+ """
153
+ _check_connection()
154
+ # ADB input text doesn't handle spaces well — replace with %s
155
+ # Also escape shell-special characters
156
+ escaped = text.replace("\\", "\\\\")
157
+ escaped = escaped.replace(" ", "%s")
158
+ escaped = escaped.replace("'", "\\'")
159
+ escaped = escaped.replace('"', '\\"')
160
+ escaped = escaped.replace("&", "\\&")
161
+ escaped = escaped.replace("|", "\\|")
162
+ escaped = escaped.replace(";", "\\;")
163
+ escaped = escaped.replace("(", "\\(")
164
+ escaped = escaped.replace(")", "\\)")
165
+ escaped = escaped.replace("<", "\\<")
166
+ escaped = escaped.replace(">", "\\>")
167
+ escaped = escaped.replace("`", "\\`")
168
+ _shell("input", "text", escaped)
169
+ return f"Typed: {text}"
170
+
171
+
172
+ @mcp.tool()
173
+ def press_key(key: str) -> str:
174
+ """Press a key on the Android device.
175
+
176
+ Supported keys: enter, backspace, delete, tab, space, home, back,
177
+ menu, search, volume_up, volume_down, power, escape.
178
+ """
179
+ _check_connection()
180
+ keymap = {
181
+ "enter": "66",
182
+ "backspace": "67",
183
+ "delete": "112",
184
+ "tab": "61",
185
+ "space": "62",
186
+ "home": "3",
187
+ "back": "4",
188
+ "menu": "82",
189
+ "search": "84",
190
+ "volume_up": "24",
191
+ "volume_down": "25",
192
+ "power": "26",
193
+ "escape": "111",
194
+ }
195
+ keycode = keymap.get(key.lower())
196
+ if keycode is None:
197
+ # Try as raw KEYCODE_ value
198
+ _shell("input", "keyevent", key)
199
+ return f"Pressed key: {key}"
200
+ _shell("input", "keyevent", keycode)
201
+ return f"Pressed {key}"
202
+
203
+
204
+ @mcp.tool()
205
+ def swipe(direction: str, distance: int = 500, x: int = -1, y: int = -1) -> str:
206
+ """Swipe the screen in a direction.
207
+
208
+ Args:
209
+ direction: One of "up", "down", "left", "right".
210
+ distance: Swipe distance in pixels (default 500).
211
+ x: Starting X coordinate. Defaults to screen center.
212
+ y: Starting Y coordinate. Defaults to screen center.
213
+ """
214
+ _check_connection()
215
+ # Get screen size for centering
216
+ if x < 0 or y < 0:
217
+ size = get_screen_size()
218
+ if x < 0:
219
+ x = size["width"] // 2
220
+ if y < 0:
221
+ y = size["height"] // 2
222
+
223
+ direction = direction.lower()
224
+ if direction == "up":
225
+ ex, ey = x, y - distance
226
+ elif direction == "down":
227
+ ex, ey = x, y + distance
228
+ elif direction == "left":
229
+ ex, ey = x - distance, y
230
+ elif direction == "right":
231
+ ex, ey = x + distance, y
232
+ else:
233
+ raise ValueError(f"Invalid direction: {direction}. Use up/down/left/right.")
234
+
235
+ _shell("input", "swipe", str(x), str(y), str(ex), str(ey), "300")
236
+ return f"Swiped {direction} from ({x}, {y}) to ({ex}, {ey})"
237
+
238
+
239
+ @mcp.tool()
240
+ def drag(start_x: int, start_y: int, end_x: int, end_y: int) -> str:
241
+ """Drag from (start_x, start_y) to (end_x, end_y) with a 300ms duration."""
242
+ _check_connection()
243
+ _shell(
244
+ "input", "swipe",
245
+ str(start_x), str(start_y),
246
+ str(end_x), str(end_y),
247
+ "300",
248
+ )
249
+ return f"Dragged from ({start_x}, {start_y}) to ({end_x}, {end_y})"
250
+
251
+
252
+ @mcp.tool()
253
+ def press_home() -> str:
254
+ """Press the Home button."""
255
+ _check_connection()
256
+ _shell("input", "keyevent", "KEYCODE_HOME")
257
+ return "Pressed Home"
258
+
259
+
260
+ @mcp.tool()
261
+ def press_back() -> str:
262
+ """Press the Back button."""
263
+ _check_connection()
264
+ _shell("input", "keyevent", "KEYCODE_BACK")
265
+ return "Pressed Back"
266
+
267
+
268
+ @mcp.tool()
269
+ def open_notifications() -> str:
270
+ """Open the notification shade."""
271
+ _check_connection()
272
+ _shell("cmd", "statusbar", "expand-notifications")
273
+ return "Opened notifications"
274
+
275
+
276
+ @mcp.tool()
277
+ def open_quick_settings() -> str:
278
+ """Open the quick settings panel."""
279
+ _check_connection()
280
+ _shell("cmd", "statusbar", "expand-settings")
281
+ return "Opened quick settings"
282
+
283
+
284
+ @mcp.tool()
285
+ def launch_app(package: str) -> str:
286
+ """Launch an Android app by its package name (e.g. com.android.chrome)."""
287
+ _check_connection()
288
+ output = _shell(
289
+ "monkey", "-p", package,
290
+ "-c", "android.intent.category.LAUNCHER", "1",
291
+ )
292
+ if "No activities found" in output:
293
+ raise RuntimeError(
294
+ f"Could not launch '{package}': no launcher activity found. "
295
+ f"Check the package name with list_installed_apps()."
296
+ )
297
+ return f"Launched {package}"
298
+
299
+
300
+ @mcp.tool()
301
+ def get_current_app() -> str:
302
+ """Get the currently visible app (package name and activity)."""
303
+ _check_connection()
304
+ output = _shell("dumpsys", "activity", "activities", timeout=5.0)
305
+ for line in output.splitlines():
306
+ if "mResumedActivity" in line or "ResumedActivity" in line:
307
+ return line.strip()
308
+ return "Could not determine current activity"
309
+
310
+
311
+ @mcp.tool()
312
+ def list_installed_apps() -> list[str]:
313
+ """List third-party installed apps (package names)."""
314
+ _check_connection()
315
+ output = _shell("pm", "list", "packages", "-3", timeout=10.0)
316
+ packages = []
317
+ for line in output.splitlines():
318
+ line = line.strip()
319
+ if line.startswith("package:"):
320
+ packages.append(line[len("package:"):])
321
+ return sorted(packages)
322
+
323
+
324
+ @mcp.tool()
325
+ def shell(command: str) -> str:
326
+ """Run an arbitrary ADB shell command and return its output.
327
+
328
+ Use this for anything not covered by the other tools.
329
+ """
330
+ _check_connection()
331
+ result = _shell(command, timeout=15.0)
332
+ return result
333
+
334
+
335
+ @mcp.tool()
336
+ def get_device_info() -> dict:
337
+ """Get device information: screen size, Android version, model, and battery level."""
338
+ _check_connection()
339
+
340
+ # Screen size
341
+ screen = {}
342
+ try:
343
+ screen = get_screen_size()
344
+ except Exception as e:
345
+ screen = {"error": str(e)}
346
+
347
+ # Android version
348
+ android_version = _shell("getprop", "ro.build.version.release")
349
+
350
+ # Device model
351
+ model = _shell("getprop", "ro.product.model")
352
+
353
+ # Manufacturer
354
+ manufacturer = _shell("getprop", "ro.product.manufacturer")
355
+
356
+ # Battery
357
+ battery_output = _shell("dumpsys", "battery")
358
+ battery = {}
359
+ for line in battery_output.splitlines():
360
+ line = line.strip()
361
+ if line.startswith("level:"):
362
+ battery["level"] = int(line.split(":")[1].strip())
363
+ elif line.startswith("status:"):
364
+ status_code = int(line.split(":")[1].strip())
365
+ battery["status"] = {
366
+ 1: "unknown",
367
+ 2: "charging",
368
+ 3: "discharging",
369
+ 4: "not_charging",
370
+ 5: "full",
371
+ }.get(status_code, str(status_code))
372
+
373
+ return {
374
+ "screen": screen,
375
+ "android_version": android_version,
376
+ "model": model,
377
+ "manufacturer": manufacturer,
378
+ "battery": battery,
379
+ }
380
+
381
+
382
+ # ---------------------------------------------------------------------------
383
+ # Entry point
384
+ # ---------------------------------------------------------------------------
385
+
386
+
387
+ def main():
388
+ """Run the MCP server over stdio."""
389
+ mcp.run()
390
+
391
+
392
+ if __name__ == "__main__":
393
+ main()