minitap-mobile-use 0.0.1.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of minitap-mobile-use might be problematic. Click here for more details.

Files changed (95) hide show
  1. minitap_mobile_use-0.0.1.dev0/LICENSE +21 -0
  2. minitap_mobile_use-0.0.1.dev0/PKG-INFO +274 -0
  3. minitap_mobile_use-0.0.1.dev0/README.md +221 -0
  4. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/__init__.py +0 -0
  5. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/contextor/contextor.py +42 -0
  6. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/cortex/cortex.md +93 -0
  7. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/cortex/cortex.py +107 -0
  8. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/cortex/types.py +11 -0
  9. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/executor/executor.md +73 -0
  10. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/executor/executor.py +84 -0
  11. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/executor/executor_context_cleaner.py +27 -0
  12. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/executor/utils.py +11 -0
  13. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/hopper/hopper.md +13 -0
  14. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/hopper/hopper.py +45 -0
  15. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/orchestrator/human.md +13 -0
  16. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/orchestrator/orchestrator.md +18 -0
  17. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/orchestrator/orchestrator.py +114 -0
  18. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/orchestrator/types.py +14 -0
  19. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/outputter/human.md +25 -0
  20. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/outputter/outputter.py +75 -0
  21. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/outputter/test_outputter.py +107 -0
  22. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/planner/human.md +12 -0
  23. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/planner/planner.md +64 -0
  24. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/planner/planner.py +64 -0
  25. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/planner/types.py +44 -0
  26. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/planner/utils.py +45 -0
  27. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/agents/summarizer/summarizer.py +34 -0
  28. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/clients/device_hardware_client.py +23 -0
  29. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/clients/ios_client.py +44 -0
  30. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/clients/screen_api_client.py +53 -0
  31. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/config.py +285 -0
  32. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/constants.py +2 -0
  33. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/context.py +65 -0
  34. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/controllers/__init__.py +0 -0
  35. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/controllers/mobile_command_controller.py +379 -0
  36. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/controllers/platform_specific_commands_controller.py +74 -0
  37. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/graph/graph.py +149 -0
  38. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/graph/state.py +73 -0
  39. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/main.py +122 -0
  40. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/__init__.py +12 -0
  41. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/agent.py +524 -0
  42. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/builders/__init__.py +10 -0
  43. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/builders/agent_config_builder.py +213 -0
  44. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/builders/index.py +15 -0
  45. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/builders/task_request_builder.py +218 -0
  46. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/constants.py +14 -0
  47. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/examples/README.md +45 -0
  48. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/examples/__init__.py +1 -0
  49. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/examples/simple_photo_organizer.py +76 -0
  50. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/examples/smart_notification_assistant.py +177 -0
  51. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/types/__init__.py +49 -0
  52. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/types/agent.py +73 -0
  53. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/types/exceptions.py +74 -0
  54. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/types/task.py +191 -0
  55. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/sdk/utils.py +28 -0
  56. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/config.py +19 -0
  57. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/device_hardware_bridge.py +212 -0
  58. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/device_screen_api.py +143 -0
  59. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/start_servers.py +151 -0
  60. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/stop_servers.py +215 -0
  61. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/servers/utils.py +11 -0
  62. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/services/accessibility.py +100 -0
  63. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/services/llm.py +143 -0
  64. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/index.py +54 -0
  65. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/back.py +52 -0
  66. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/copy_text_from.py +77 -0
  67. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/erase_text.py +124 -0
  68. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/input_text.py +74 -0
  69. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/launch_app.py +59 -0
  70. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/list_packages.py +78 -0
  71. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/long_press_on.py +62 -0
  72. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/open_link.py +59 -0
  73. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/paste_text.py +66 -0
  74. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/press_key.py +58 -0
  75. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/run_flow.py +57 -0
  76. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/stop_app.py +58 -0
  77. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/swipe.py +56 -0
  78. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/take_screenshot.py +70 -0
  79. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/tap.py +66 -0
  80. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/mobile/wait_for_animation_to_end.py +68 -0
  81. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/tools/tool_wrapper.py +33 -0
  82. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/cli_helpers.py +40 -0
  83. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/cli_selection.py +144 -0
  84. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/conversations.py +31 -0
  85. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/decorators.py +123 -0
  86. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/errors.py +6 -0
  87. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/file.py +13 -0
  88. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/logger.py +184 -0
  89. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/media.py +73 -0
  90. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/recorder.py +55 -0
  91. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/requests_utils.py +37 -0
  92. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/shell_utils.py +20 -0
  93. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/time.py +6 -0
  94. minitap_mobile_use-0.0.1.dev0/minitap/mobile_use/utils/ui_hierarchy.py +30 -0
  95. minitap_mobile_use-0.0.1.dev0/pyproject.toml +124 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Minitap, Inc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,274 @@
1
+ Metadata-Version: 2.3
2
+ Name: minitap-mobile-use
3
+ Version: 0.0.1.dev0
4
+ Summary: AI-powered multi-agent system that automates real Android and iOS devices through low-level control using LangGraph.
5
+ Author: Pierre-Louis Favreau, Jean-Pierre Lo, Nicolas Dehandschoewercker
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Minitap, Inc
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ Requires-Dist: langgraph==0.5.0
28
+ Requires-Dist: adbutils==2.9.3
29
+ Requires-Dist: langchain-google-genai==2.1.5
30
+ Requires-Dist: langchain==0.3.26
31
+ Requires-Dist: langchain-core==0.3.66
32
+ Requires-Dist: jinja2==3.1.6
33
+ Requires-Dist: python-dotenv==1.1.1
34
+ Requires-Dist: pydantic-settings==2.10.1
35
+ Requires-Dist: langchain-mcp-adapters==0.1.7
36
+ Requires-Dist: langchain-openai==0.3.27
37
+ Requires-Dist: typer==0.16.0
38
+ Requires-Dist: langchain-cerebras>=0.5.0
39
+ Requires-Dist: inquirer>=3.4.0
40
+ Requires-Dist: sseclient-py==1.8.0
41
+ Requires-Dist: fastapi==0.111.0
42
+ Requires-Dist: uvicorn[standard]==0.30.1
43
+ Requires-Dist: colorama>=0.4.6
44
+ Requires-Dist: psutil>=5.9.0
45
+ Requires-Dist: ruff==0.5.3 ; extra == 'dev'
46
+ Requires-Dist: pytest==8.4.1 ; extra == 'dev'
47
+ Requires-Dist: pytest-cov==5.0.0 ; extra == 'dev'
48
+ Requires-Python: >=3.10
49
+ Project-URL: Homepage, https://minitap.ai/
50
+ Project-URL: Source, https://github.com/minitap-ai/mobile-use
51
+ Provides-Extra: dev
52
+ Description-Content-Type: text/markdown
53
+
54
+ # mobile-use: automate your phone with natural language
55
+
56
+ <div align="center">
57
+
58
+ ![mobile-use in Action](./doc/linkedin-demo-with-text.gif)
59
+
60
+ </div>
61
+
62
+ <div align="center">
63
+
64
+ [![Discord](https://img.shields.io/discord/1403058278342201394?color=7289DA&label=Discord&logo=discord&logoColor=white&style=for-the-badge)](https://discord.gg/6nSqmQ9pQs)
65
+ [![GitHub stars](https://img.shields.io/github/stars/minitap-ai/mobile-use?style=for-the-badge&color=e0a8dd)](https://github.com/minitap-ai/mobile-use/stargazers)
66
+
67
+ <p align="center">
68
+ <a href="https://discord.gg/6nSqmQ9pQs"><b>Discord</b></a> •
69
+ <a href="https://x.com/minitap_ai?t=iRWtI497UhRGLeCKYQekig&s=09"><b>Twitter / X</b></a>
70
+ </p>
71
+
72
+ </div>
73
+
74
+ Mobile-use is a powerful, open-source AI agent that controls your Android or IOS device using natural language. It understands your commands and interacts with the UI to perform tasks, from sending messages to navigating complex apps.
75
+
76
+ > Mobile-use is quickly evolving. Your suggestions, ideas, and reported bugs will shape this project. Do not hesitate to join in the conversation on [Discord](https://discord.gg/6nSqmQ9pQs) or contribute directly, we will reply to everyone! ❤️
77
+
78
+ ## ✨ Features
79
+
80
+ - 🗣️ **Natural Language Control**: Interact with your phone using your native language.
81
+ - 📱 **UI-Aware Automation**: Intelligently navigates through app interfaces.
82
+ - 📊 **Data Scraping**: Extract information from any app and structure it into your desired format (e.g., JSON) using a natural language description.
83
+ - 🔧 **Extensible & Customizable**: Easily configure different LLMs to power the agents that power mobile-use.
84
+
85
+ ## Benchmarks
86
+
87
+ <p align="center">
88
+ <img src="./doc/benchmark.jpg" alt="Project banner" />
89
+ </p>
90
+
91
+ We are global number 1 Opensource pass@1 on the AndroidWorld benchmark.
92
+
93
+ More info here: https://minitap.ai/research/mobile-ai-agents-benchmark
94
+
95
+ The official leaderboard is available [here](https://docs.google.com/spreadsheets/d/1cchzP9dlTZ3WXQTfYNhh3avxoLipqHN75v1Tb86uhHo/edit?pli=1&gid=0#gid=0)
96
+
97
+ ## 🚀 Getting Started
98
+
99
+ Ready to automate your mobile experience? Follow these steps to get mobile-use up and running.
100
+
101
+ 1. **Set up Environment Variables:**
102
+ Copy the example `.env.example` file to `.env` and add your API keys.
103
+
104
+ ```bash
105
+ cp .env.example .env
106
+ ```
107
+
108
+ 2. **(Optional) Customize LLM Configuration:**
109
+ To use different models or providers, create your own LLM configuration file.
110
+ ```bash
111
+ cp llm-config.override.template.jsonc llm-config.override.jsonc
112
+ ```
113
+ Then, edit `llm-config.override.jsonc` to fit your needs.
114
+
115
+ ### Quick Launch (Docker)
116
+
117
+ > [!NOTE]
118
+ > This quickstart, is only available for Android devices/emulators as of now, and you must have Docker installed.
119
+
120
+ First:
121
+
122
+ - Either plug your Android device and enable USB-debugging via the Developer Options
123
+ - Or launch an Android emulator
124
+
125
+ > [!IMPORTANT]
126
+ > At some point, the terminal will HANG, and Maestro will ask you `Maestro CLI would like to collect anonymous usage data to improve the product.`
127
+ > It's up to you whether you accept (i.e enter 'Y') or not (i.e. enter 'n').
128
+
129
+ Then run in your terminal:
130
+
131
+ 1. For Linux/macOS:
132
+
133
+ ```bash
134
+ chmod +x mobile-use.sh
135
+ ./mobile-use.sh \
136
+ "Open Gmail, find first 3 unread emails, and list their sender and subject line" \
137
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
138
+ ```
139
+
140
+ 2. For Windows (inside a Powershell terminal):
141
+
142
+ ```powershell
143
+ powershell.exe -ExecutionPolicy Bypass -File mobile-use.ps1 `
144
+ "Open Gmail, find first 3 unread emails, and list their sender and subject line" `
145
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
146
+ ```
147
+
148
+ > [!NOTE]
149
+ > If using your own device, make sure to accept the ADB-related connection requests that will pop up on your device.
150
+ > Similarly, Maestro will need to install its APK on your device, which will also require you to accept the installation request.
151
+
152
+ #### 🧰 Troubleshooting
153
+
154
+ The script will try to connect to your device via IP.
155
+ Therefore, your device **must be connected to the same Wi-Fi network as your computer**.
156
+
157
+ ##### 1. No device IP found
158
+
159
+ If the script fails with the following message:
160
+
161
+ ```
162
+ Could not get device IP. Is a device connected via USB and on the same Wi-Fi network?
163
+ ```
164
+
165
+ Then it couldn't find one of the common Wi-Fi interfaces on your device.
166
+ Therefore, you must determine what WLAN interface your phone is using via `adb shell ip addr show up`.
167
+ Then add the `--interface <YOUR_INTERFACE_NAME>` option to the script.
168
+
169
+ ##### 2. Failed to connect to <DEVICE_IP>:5555 inside Docker
170
+
171
+ This is most probably an issue with your firewall blocking the connection. Therefore there is no clear fix for this.
172
+
173
+ ##### 3. Failed to pull GHCR docker images (unauthorized)
174
+
175
+ Since UV docker images rely on a `ghcr.io` public repositories, you may have an expired token if you used `ghcr.io` before for private repositories.
176
+ Try running `docker logout ghcr.io` and then run the script again.
177
+
178
+ ### Manual Launch (Development Mode)
179
+
180
+ For developers who want to set up the environment manually:
181
+
182
+ #### 1. Device Support
183
+
184
+ Mobile-use currently supports the following devices:
185
+
186
+ - **Physical Android Phones**: Connect via USB with USB debugging enabled.
187
+ - **Android Simulators**: Set up through Android Studio.
188
+ - **iOS Simulators**: Supported for macOS users.
189
+
190
+ > [!NOTE]
191
+ > Physical iOS devices are not yet supported.
192
+
193
+ #### 2. Prerequisites
194
+
195
+ For Android:
196
+
197
+ - **[Android Debug Bridge (ADB)](https://developer.android.com/studio/releases/platform-tools)**: A tool to connect to your device.
198
+
199
+ For iOS:
200
+
201
+ - **[Xcode](https://developer.apple.com/xcode/)**: Apple's IDE for iOS development.
202
+
203
+ Before you begin, ensure you have the following installed:
204
+
205
+ - **[uv](https://github.com/astral-sh/uv)**: A lightning-fast Python package manager.
206
+ - **[Maestro](https://maestro.mobile.dev/getting-started/installing-maestro)**: The framework we use to interact with your device.
207
+
208
+ #### 3. Installation
209
+
210
+ 1. **Clone the repository:**
211
+
212
+ ```bash
213
+ git clone https://github.com/minitap-ai/mobile-use.git && cd mobile-use
214
+ ```
215
+
216
+ 2. [**Setup environment variables**](#-getting-started)
217
+
218
+ 3. **Create & activate the virtual environment:**
219
+
220
+ ```bash
221
+ # This will create a .venv directory using the Python version in .python-version
222
+ uv venv
223
+
224
+ # Activate the environment
225
+ # On macOS/Linux:
226
+ source .venv/bin/activate
227
+ # On Windows:
228
+ .venv\Scripts\activate
229
+ ```
230
+
231
+ 4. **Install dependencies:**
232
+ ```bash
233
+ # Sync with the locked dependencies for a consistent setup
234
+ uv sync
235
+ ```
236
+
237
+ ## 👨‍💻 Usage
238
+
239
+ To run mobile-use, simply pass your command as an argument.
240
+
241
+ **Example 1: Basic Command**
242
+
243
+ ```bash
244
+ python ./src/mobile_use/main.py "Go to settings and tell me my current battery level"
245
+ ```
246
+
247
+ **Example 2: Data Scraping**
248
+
249
+ Extract specific information and get it back in a structured format. For instance, to get a list of your unread emails:
250
+
251
+ ```bash
252
+ python ./src/mobile_use/main.py \
253
+ "Open Gmail, find all unread emails, and list their sender and subject line" \
254
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
255
+ ```
256
+
257
+ > [!NOTE]
258
+ > If you haven't configured a specific model, mobile-use will prompt you to choose one from the available options.
259
+
260
+ ## ❤️ Contributing
261
+
262
+ We love contributions! Whether you're fixing a bug, adding a feature, or improving documentation, your help is welcome. Please read our **[Contributing Guidelines](CONTRIBUTING.md)** to get started.
263
+
264
+ ## ⭐ Star History
265
+
266
+ <p align="center">
267
+ <a href="https://star-history.com/#minitap-ai/mobile-use&Date">
268
+ <img src="https://api.star-history.com/svg?repos=minitap-ai/mobile-use&type=Date" alt="Star History Chart" />
269
+ </a>
270
+ </p>
271
+
272
+ ## 📜 License
273
+
274
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,221 @@
1
+ # mobile-use: automate your phone with natural language
2
+
3
+ <div align="center">
4
+
5
+ ![mobile-use in Action](./doc/linkedin-demo-with-text.gif)
6
+
7
+ </div>
8
+
9
+ <div align="center">
10
+
11
+ [![Discord](https://img.shields.io/discord/1403058278342201394?color=7289DA&label=Discord&logo=discord&logoColor=white&style=for-the-badge)](https://discord.gg/6nSqmQ9pQs)
12
+ [![GitHub stars](https://img.shields.io/github/stars/minitap-ai/mobile-use?style=for-the-badge&color=e0a8dd)](https://github.com/minitap-ai/mobile-use/stargazers)
13
+
14
+ <p align="center">
15
+ <a href="https://discord.gg/6nSqmQ9pQs"><b>Discord</b></a> •
16
+ <a href="https://x.com/minitap_ai?t=iRWtI497UhRGLeCKYQekig&s=09"><b>Twitter / X</b></a>
17
+ </p>
18
+
19
+ </div>
20
+
21
+ Mobile-use is a powerful, open-source AI agent that controls your Android or IOS device using natural language. It understands your commands and interacts with the UI to perform tasks, from sending messages to navigating complex apps.
22
+
23
+ > Mobile-use is quickly evolving. Your suggestions, ideas, and reported bugs will shape this project. Do not hesitate to join in the conversation on [Discord](https://discord.gg/6nSqmQ9pQs) or contribute directly, we will reply to everyone! ❤️
24
+
25
+ ## ✨ Features
26
+
27
+ - 🗣️ **Natural Language Control**: Interact with your phone using your native language.
28
+ - 📱 **UI-Aware Automation**: Intelligently navigates through app interfaces.
29
+ - 📊 **Data Scraping**: Extract information from any app and structure it into your desired format (e.g., JSON) using a natural language description.
30
+ - 🔧 **Extensible & Customizable**: Easily configure different LLMs to power the agents that power mobile-use.
31
+
32
+ ## Benchmarks
33
+
34
+ <p align="center">
35
+ <img src="./doc/benchmark.jpg" alt="Project banner" />
36
+ </p>
37
+
38
+ We are global number 1 Opensource pass@1 on the AndroidWorld benchmark.
39
+
40
+ More info here: https://minitap.ai/research/mobile-ai-agents-benchmark
41
+
42
+ The official leaderboard is available [here](https://docs.google.com/spreadsheets/d/1cchzP9dlTZ3WXQTfYNhh3avxoLipqHN75v1Tb86uhHo/edit?pli=1&gid=0#gid=0)
43
+
44
+ ## 🚀 Getting Started
45
+
46
+ Ready to automate your mobile experience? Follow these steps to get mobile-use up and running.
47
+
48
+ 1. **Set up Environment Variables:**
49
+ Copy the example `.env.example` file to `.env` and add your API keys.
50
+
51
+ ```bash
52
+ cp .env.example .env
53
+ ```
54
+
55
+ 2. **(Optional) Customize LLM Configuration:**
56
+ To use different models or providers, create your own LLM configuration file.
57
+ ```bash
58
+ cp llm-config.override.template.jsonc llm-config.override.jsonc
59
+ ```
60
+ Then, edit `llm-config.override.jsonc` to fit your needs.
61
+
62
+ ### Quick Launch (Docker)
63
+
64
+ > [!NOTE]
65
+ > This quickstart, is only available for Android devices/emulators as of now, and you must have Docker installed.
66
+
67
+ First:
68
+
69
+ - Either plug your Android device and enable USB-debugging via the Developer Options
70
+ - Or launch an Android emulator
71
+
72
+ > [!IMPORTANT]
73
+ > At some point, the terminal will HANG, and Maestro will ask you `Maestro CLI would like to collect anonymous usage data to improve the product.`
74
+ > It's up to you whether you accept (i.e enter 'Y') or not (i.e. enter 'n').
75
+
76
+ Then run in your terminal:
77
+
78
+ 1. For Linux/macOS:
79
+
80
+ ```bash
81
+ chmod +x mobile-use.sh
82
+ ./mobile-use.sh \
83
+ "Open Gmail, find first 3 unread emails, and list their sender and subject line" \
84
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
85
+ ```
86
+
87
+ 2. For Windows (inside a Powershell terminal):
88
+
89
+ ```powershell
90
+ powershell.exe -ExecutionPolicy Bypass -File mobile-use.ps1 `
91
+ "Open Gmail, find first 3 unread emails, and list their sender and subject line" `
92
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
93
+ ```
94
+
95
+ > [!NOTE]
96
+ > If using your own device, make sure to accept the ADB-related connection requests that will pop up on your device.
97
+ > Similarly, Maestro will need to install its APK on your device, which will also require you to accept the installation request.
98
+
99
+ #### 🧰 Troubleshooting
100
+
101
+ The script will try to connect to your device via IP.
102
+ Therefore, your device **must be connected to the same Wi-Fi network as your computer**.
103
+
104
+ ##### 1. No device IP found
105
+
106
+ If the script fails with the following message:
107
+
108
+ ```
109
+ Could not get device IP. Is a device connected via USB and on the same Wi-Fi network?
110
+ ```
111
+
112
+ Then it couldn't find one of the common Wi-Fi interfaces on your device.
113
+ Therefore, you must determine what WLAN interface your phone is using via `adb shell ip addr show up`.
114
+ Then add the `--interface <YOUR_INTERFACE_NAME>` option to the script.
115
+
116
+ ##### 2. Failed to connect to <DEVICE_IP>:5555 inside Docker
117
+
118
+ This is most probably an issue with your firewall blocking the connection. Therefore there is no clear fix for this.
119
+
120
+ ##### 3. Failed to pull GHCR docker images (unauthorized)
121
+
122
+ Since UV docker images rely on a `ghcr.io` public repositories, you may have an expired token if you used `ghcr.io` before for private repositories.
123
+ Try running `docker logout ghcr.io` and then run the script again.
124
+
125
+ ### Manual Launch (Development Mode)
126
+
127
+ For developers who want to set up the environment manually:
128
+
129
+ #### 1. Device Support
130
+
131
+ Mobile-use currently supports the following devices:
132
+
133
+ - **Physical Android Phones**: Connect via USB with USB debugging enabled.
134
+ - **Android Simulators**: Set up through Android Studio.
135
+ - **iOS Simulators**: Supported for macOS users.
136
+
137
+ > [!NOTE]
138
+ > Physical iOS devices are not yet supported.
139
+
140
+ #### 2. Prerequisites
141
+
142
+ For Android:
143
+
144
+ - **[Android Debug Bridge (ADB)](https://developer.android.com/studio/releases/platform-tools)**: A tool to connect to your device.
145
+
146
+ For iOS:
147
+
148
+ - **[Xcode](https://developer.apple.com/xcode/)**: Apple's IDE for iOS development.
149
+
150
+ Before you begin, ensure you have the following installed:
151
+
152
+ - **[uv](https://github.com/astral-sh/uv)**: A lightning-fast Python package manager.
153
+ - **[Maestro](https://maestro.mobile.dev/getting-started/installing-maestro)**: The framework we use to interact with your device.
154
+
155
+ #### 3. Installation
156
+
157
+ 1. **Clone the repository:**
158
+
159
+ ```bash
160
+ git clone https://github.com/minitap-ai/mobile-use.git && cd mobile-use
161
+ ```
162
+
163
+ 2. [**Setup environment variables**](#-getting-started)
164
+
165
+ 3. **Create & activate the virtual environment:**
166
+
167
+ ```bash
168
+ # This will create a .venv directory using the Python version in .python-version
169
+ uv venv
170
+
171
+ # Activate the environment
172
+ # On macOS/Linux:
173
+ source .venv/bin/activate
174
+ # On Windows:
175
+ .venv\Scripts\activate
176
+ ```
177
+
178
+ 4. **Install dependencies:**
179
+ ```bash
180
+ # Sync with the locked dependencies for a consistent setup
181
+ uv sync
182
+ ```
183
+
184
+ ## 👨‍💻 Usage
185
+
186
+ To run mobile-use, simply pass your command as an argument.
187
+
188
+ **Example 1: Basic Command**
189
+
190
+ ```bash
191
+ python ./src/mobile_use/main.py "Go to settings and tell me my current battery level"
192
+ ```
193
+
194
+ **Example 2: Data Scraping**
195
+
196
+ Extract specific information and get it back in a structured format. For instance, to get a list of your unread emails:
197
+
198
+ ```bash
199
+ python ./src/mobile_use/main.py \
200
+ "Open Gmail, find all unread emails, and list their sender and subject line" \
201
+ --output-description "A JSON list of objects, each with 'sender' and 'subject' keys"
202
+ ```
203
+
204
+ > [!NOTE]
205
+ > If you haven't configured a specific model, mobile-use will prompt you to choose one from the available options.
206
+
207
+ ## ❤️ Contributing
208
+
209
+ We love contributions! Whether you're fixing a bug, adding a feature, or improving documentation, your help is welcome. Please read our **[Contributing Guidelines](CONTRIBUTING.md)** to get started.
210
+
211
+ ## ⭐ Star History
212
+
213
+ <p align="center">
214
+ <a href="https://star-history.com/#minitap-ai/mobile-use&Date">
215
+ <img src="https://api.star-history.com/svg?repos=minitap-ai/mobile-use&type=Date" alt="Star History Chart" />
216
+ </a>
217
+ </p>
218
+
219
+ ## 📜 License
220
+
221
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,42 @@
1
+ from minitap.mobile_use.agents.executor.utils import is_last_tool_message_take_screenshot
2
+ from minitap.mobile_use.controllers.mobile_command_controller import get_screen_data
3
+ from minitap.mobile_use.controllers.platform_specific_commands_controller import (
4
+ get_device_date,
5
+ get_focused_app_info,
6
+ )
7
+ from minitap.mobile_use.graph.state import State
8
+ from minitap.mobile_use.utils.decorators import wrap_with_callbacks
9
+ from minitap.mobile_use.utils.logger import get_logger
10
+ from minitap.mobile_use.context import MobileUseContext
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class ContextorNode:
16
+ def __init__(self, ctx: MobileUseContext):
17
+ self.ctx = ctx
18
+
19
+ @wrap_with_callbacks(
20
+ before=lambda: logger.info("Starting Contextor Agent"),
21
+ on_success=lambda _: logger.success("Contextor Agent"),
22
+ on_failure=lambda _: logger.error("Contextor Agent"),
23
+ )
24
+ def __call__(self, state: State):
25
+ device_data = get_screen_data(self.ctx.screen_api_client)
26
+ focused_app_info = get_focused_app_info(self.ctx)
27
+ device_date = get_device_date(self.ctx)
28
+
29
+ should_add_screenshot_context = is_last_tool_message_take_screenshot(list(state.messages))
30
+
31
+ return state.sanitize_update(
32
+ ctx=self.ctx,
33
+ update={
34
+ "latest_screenshot_base64": device_data.base64
35
+ if should_add_screenshot_context
36
+ else None,
37
+ "latest_ui_hierarchy": device_data.elements,
38
+ "focused_app_info": focused_app_info,
39
+ "screen_size": (device_data.width, device_data.height),
40
+ "device_date": device_date,
41
+ },
42
+ )
@@ -0,0 +1,93 @@
1
+ ## You are the **Cortex**
2
+
3
+ Your job is to **analyze the current {{ platform }} mobile device state** and produce **structured decisions** to achieve the current subgoal.
4
+
5
+ You must act like a human brain, responsible for giving instructions to your hands (the **Executor** agent). Therefore, you must act with the same imprecision and uncertainty as a human when performing swipe actions: humans don't know where exactly they are swiping (always prefer percentages of width and height instead of absolute coordinates), they just know they are swiping up or down, left or right, and with how much force (usually amplified compared to what's truly needed - go overboard of sliders for instance).
6
+
7
+ ### Context You Receive:
8
+
9
+ You are provided with:
10
+
11
+ - 📱 **Device state**:
12
+
13
+ - Latest **UI hierarchy**
14
+ - (Optional) Latest **screenshot (base64)**. You can query one if you need it by calling the take_screenshot tool. Often, the UI hierarchy is enough to understand what is happening on the screen.
15
+ - Current **focused app info**
16
+ - **Screen size** and **device date**
17
+
18
+ - 🧭 **Task context**:
19
+
20
+ - The user's **initial goal**
21
+ - The **subgoal plan** with their statuses
22
+ - The **current subgoal** to act on (the one in `PENDING` in the plan)
23
+ - A list of **agent thoughts** (previous reasoning, observations about the environment)
24
+ - **Executor agent feedback** on the latest UI decisions
25
+
26
+ ### Your Mission:
27
+
28
+ Focus on the **current subgoal**.
29
+
30
+ 1. **Analyze the UI** and environment to understand what action is required.
31
+ 2.1. If the **subgoal is completed**, set the `complete_subgoal` field to `True`. To justify your conclusion, you will fill in the `agent_thought` field based on:
32
+
33
+ - The current UI state
34
+ - Past agent thoughts
35
+ - Recent tool effects
36
+ 2.2. Otherwise, output a **stringified structured set of instructions** that an **Executor agent** can perform on a real mobile device:
37
+
38
+ - These must be **concrete low-level actions**: back,tap, swipe, launch app, list packages, close app, input text, paste, erase, text, copy, etc.
39
+ - If you refer to a UI element or coordinates, specify it clearly (e.g., `resource-id: com.whatsapp:id/search`, `text: "Alice"`, `x: 100, y: 200`).
40
+ - **The structure is up to you**, but it must be valid **JSON stringified output**. You will accompany this output with a **natural-language summary** of your reasoning and approach in your agent thought.
41
+ - When you want to launch/stop an app, prefer using its package name.
42
+ - **Only reference UI element IDs or visible texts that are explicitly present in the provided UI hierarchy or screenshot. Do not invent, infer, or guess any IDs or texts that are not directly observed**.
43
+ - **For text clearing**: When you need to completely clear text from an input field, always use **LONG PRESS** first to select the text field, then erase. Do NOT use tap + erase as this only clears from cursor position.
44
+
45
+ ### Output
46
+
47
+ - **Structured Decisions**:
48
+ A **valid stringified JSON** describing what should be executed **right now** to advance the current subgoal **IF THE SUBGOAL IS NOT COMPLETED**.
49
+
50
+ - **Agent Thought** _(1-2 sentences)_:
51
+ If there is any information you need to remember for later steps, you must include it here, because only the agent thoughts will be used to produce the final structured output.
52
+
53
+ This also helps other agents understand your decision and learn from future failures.
54
+ You must also use this field to mention checkpoints when you perform actions without definite ending: for instance "Swiping up to reveal more recipes - last seen recipe was <ID or NAME>, stop when no more".
55
+
56
+ - **Subgoal Completion** _(boolean)_:
57
+ Set to true if the current subgoal has been successfully completed - you **cannot set it to true and provide structured decisions at the same time**. You must base your decision ONLY on what you have as input (device state, agent thoughts, executor feedback, etc) - NEVER based on the decisions you have produced.
58
+
59
+ ---
60
+
61
+ ### Example
62
+
63
+ #### Current Subgoal:
64
+
65
+ > "Search for Alice in WhatsApp"
66
+
67
+ #### Structured Decisions:
68
+
69
+ ```text
70
+ "{\"action\": \"tap\", \"target\": {\"resource_id\": \"com.whatsapp:id/menuitem_search\", \"text\": \"Search\"}}"
71
+ ```
72
+
73
+ #### Agent Thought:
74
+
75
+ > I will tap the search icon at the top of the WhatsApp interface to begin searching for Alice.
76
+
77
+ ### Input
78
+
79
+ **Initial Goal:**
80
+ {{ initial_goal }}
81
+
82
+ **Subgoal Plan:**
83
+ {{ subgoal_plan }}
84
+
85
+ **Current Subgoal (what needs to be done right now):**
86
+ {{ current_subgoal }}
87
+
88
+ **Agent thoughts (previous reasoning, observations about the environment):**
89
+ {{ agents_thoughts }}
90
+
91
+ **Executor agent feedback on latest UI decisions:**
92
+
93
+ {{ executor_feedback }}