droidrun 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. droidrun-0.2.0/CHANGELOG.md +54 -0
  2. droidrun-0.2.0/CONTRIBUTING.md +95 -0
  3. {droidrun-0.1.0 → droidrun-0.2.0}/PKG-INFO +134 -37
  4. {droidrun-0.1.0 → droidrun-0.2.0}/README.md +125 -36
  5. droidrun-0.2.0/docs/docs.json +79 -0
  6. droidrun-0.2.0/docs/quickstart.mdx +293 -0
  7. droidrun-0.2.0/docs/v1/concepts/portal-app.mdx +59 -0
  8. droidrun-0.2.0/docs/v1/overview.mdx +98 -0
  9. droidrun-0.2.0/docs/v1/quickstart.mdx +293 -0
  10. droidrun-0.2.0/docs/v2/concepts/agent.mdx +231 -0
  11. droidrun-0.2.0/docs/v2/concepts/android-control.mdx +235 -0
  12. droidrun-0.2.0/docs/v2/concepts/planning.mdx +142 -0
  13. droidrun-0.2.0/docs/v2/concepts/portal-app.mdx +59 -0
  14. droidrun-0.2.0/docs/v2/concepts/tracing.mdx +163 -0
  15. droidrun-0.2.0/docs/v2/overview.mdx +116 -0
  16. droidrun-0.2.0/docs/v2/quickstart.mdx +371 -0
  17. droidrun-0.2.0/droidrun/__init__.py +26 -0
  18. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/__main__.py +2 -3
  19. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/adb/device.py +1 -1
  20. droidrun-0.2.0/droidrun/agent/codeact/__init__.py +13 -0
  21. droidrun-0.2.0/droidrun/agent/codeact/codeact_agent.py +334 -0
  22. droidrun-0.2.0/droidrun/agent/codeact/events.py +36 -0
  23. droidrun-0.2.0/droidrun/agent/codeact/prompts.py +78 -0
  24. droidrun-0.2.0/droidrun/agent/droid/__init__.py +13 -0
  25. droidrun-0.2.0/droidrun/agent/droid/droid_agent.py +418 -0
  26. droidrun-0.2.0/droidrun/agent/planner/__init__.py +15 -0
  27. droidrun-0.2.0/droidrun/agent/planner/events.py +20 -0
  28. droidrun-0.2.0/droidrun/agent/planner/prompts.py +144 -0
  29. droidrun-0.2.0/droidrun/agent/planner/task_manager.py +355 -0
  30. droidrun-0.2.0/droidrun/agent/planner/workflow.py +371 -0
  31. droidrun-0.2.0/droidrun/agent/utils/async_utils.py +56 -0
  32. droidrun-0.2.0/droidrun/agent/utils/chat_utils.py +92 -0
  33. droidrun-0.2.0/droidrun/agent/utils/executer.py +97 -0
  34. droidrun-0.2.0/droidrun/agent/utils/llm_picker.py +143 -0
  35. droidrun-0.2.0/droidrun/cli/main.py +580 -0
  36. droidrun-0.2.0/droidrun/tools/__init__.py +14 -0
  37. droidrun-0.2.0/droidrun/tools/actions.py +838 -0
  38. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/tools/device.py +1 -1
  39. droidrun-0.2.0/droidrun/tools/loader.py +60 -0
  40. {droidrun-0.1.0 → droidrun-0.2.0}/pyproject.toml +10 -2
  41. droidrun-0.2.0/static/droidrun-dark.png +0 -0
  42. droidrun-0.2.0/static/droidrun.png +0 -0
  43. droidrun-0.1.0/docs/installation.mdx +0 -167
  44. droidrun-0.1.0/docs/mint.json +0 -48
  45. droidrun-0.1.0/docs/quickstart.mdx +0 -155
  46. droidrun-0.1.0/droidrun/__init__.py +0 -19
  47. droidrun-0.1.0/droidrun/agent/__init__.py +0 -16
  48. droidrun-0.1.0/droidrun/agent/llm_reasoning.py +0 -567
  49. droidrun-0.1.0/droidrun/agent/react_agent.py +0 -556
  50. droidrun-0.1.0/droidrun/cli/main.py +0 -265
  51. droidrun-0.1.0/droidrun/llm/__init__.py +0 -24
  52. droidrun-0.1.0/droidrun/tools/__init__.py +0 -35
  53. droidrun-0.1.0/droidrun/tools/actions.py +0 -854
  54. {droidrun-0.1.0 → droidrun-0.2.0}/.gitignore +0 -0
  55. {droidrun-0.1.0 → droidrun-0.2.0}/LICENSE +0 -0
  56. {droidrun-0.1.0 → droidrun-0.2.0}/MANIFEST.in +0 -0
  57. {droidrun-0.1.0 → droidrun-0.2.0}/docs/conf.py +0 -0
  58. {droidrun-0.1.0 → droidrun-0.2.0}/docs/favicon.png +0 -0
  59. {droidrun-0.1.0 → droidrun-0.2.0}/docs/introduction.mdx +0 -0
  60. {droidrun-0.1.0 → droidrun-0.2.0}/docs/logo/dark.svg +0 -0
  61. {droidrun-0.1.0 → droidrun-0.2.0}/docs/logo/light.svg +0 -0
  62. {droidrun-0.1.0/docs → droidrun-0.2.0/docs/v1}/concepts/agent.mdx +0 -0
  63. {droidrun-0.1.0/docs → droidrun-0.2.0/docs/v1}/concepts/android-control.mdx +0 -0
  64. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/adb/__init__.py +0 -0
  65. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/adb/manager.py +0 -0
  66. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/adb/wrapper.py +0 -0
  67. {droidrun-0.1.0 → droidrun-0.2.0}/droidrun/cli/__init__.py +0 -0
  68. {droidrun-0.1.0 → droidrun-0.2.0}/setup.py +0 -0
@@ -0,0 +1,54 @@
1
+ # Changelog
2
+
3
+ All notable changes to the DroidRun project will be documented in this file.
4
+
5
+ ## [0.2.0] - 2025-05-21
6
+
7
+ ### Added
8
+ - **New LLM Providers**
9
+ - Added support for Ollama (local LLM models)
10
+ - Added support for DeepSeek models
11
+ - Case-sensitive provider names: OpenAI, Anthropic, Gemini, Ollama, DeepSeek
12
+
13
+ - **Planning System**
14
+ - Added DroidAgent with planning capabilities for complex tasks
15
+ - Introduced task decomposition for multi-step operations
16
+
17
+ - **LlamaIndex Integration**
18
+ - Replaced custom LLM wrapper with LlamaIndex integration
19
+ - Added direct support for LlamaIndex LLM classes
20
+
21
+ - **Tracing and Debugging**
22
+ - Added integration with Arize Phoenix for execution tracing
23
+ - Added token usage analysis
24
+ - Added execution time metrics
25
+
26
+ - **CLI Enhancements**
27
+ - Added `--reasoning` flag to enable planning capabilities
28
+ - Added `--tracing` flag for execution tracing with Phoenix
29
+
30
+ - **Documentation**
31
+ - Added comprehensive documentation for new features
32
+ - Created dedicated pages for planning and tracing
33
+ - Updated all examples to reflect new API patterns
34
+
35
+ ### Changed
36
+ - **Agent Architecture**
37
+ - Replaced ReActAgent with the new DroidAgent system
38
+ - Refactored agent initialization to use tools_instance and tool_list
39
+ - Changed API from `task` parameter to `goal` parameter
40
+
41
+ ### Deprecated
42
+ - Old agent initialization pattern with `device_serial` parameter
43
+ - Direct LLM provider initialization (replaced by LlamaIndex)
44
+ - Non-case-sensitive provider names
45
+
46
+ ### Removed
47
+ - ReActAgent class (replaced by DroidAgent)
48
+ - LLMReasoner class (replaced by LlamaIndex)
49
+ - Some previously documented tools that were not fully implemented
50
+
51
+ ### Fixed
52
+ - Various UI interaction issues
53
+ - Improved error handling in device connections
54
+ - More reliable Android element detection
@@ -0,0 +1,95 @@
1
+ # Contributing to DroidRun
2
+
3
+ Thank you for your interest in contributing to DroidRun! This document provides guidelines and instructions for contributing to the project.
4
+
5
+ ## Getting Started
6
+
7
+ 1. Fork the repository on GitHub
8
+ 2. Clone your fork:
9
+ ```bash
10
+ git clone https://github.com/YOUR_USERNAME/droidrun.git
11
+ cd droidrun
12
+ ```
13
+ 3. Set up your development environment as described below
14
+
15
+ ## Development Setup
16
+
17
+ 1. Create and activate a virtual environment:
18
+ ```bash
19
+ python -m venv .venv
20
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
21
+ ```
22
+
23
+ 2. Install development dependencies:
24
+ ```bash
25
+ pip install -e ".[dev]"
26
+ ```
27
+
28
+ ## Making Contributions
29
+
30
+ 1. Create a new branch for your feature:
31
+ ```bash
32
+ git checkout -b feature/your-feature-name
33
+ ```
34
+
35
+ 2. Make your changes following our coding standards:
36
+ - Use type hints for Python functions
37
+ - Follow PEP 8 style guidelines
38
+ - Write descriptive commit messages
39
+ - Update documentation as needed
40
+
41
+ 3. Commit your changes:
42
+ ```bash
43
+ git add .
44
+ git commit -m "feat: add your feature description"
45
+ ```
46
+
47
+ 4. Push to your fork:
48
+ ```bash
49
+ git push origin feature/your-feature-name
50
+ ```
51
+
52
+ 5. Open a Pull Request
53
+
54
+ ## Documentation
55
+
56
+ - Update the README.md if you change functionality
57
+ - Add docstrings to new functions and classes
58
+ - Update the documentation in the `docs/` directory
59
+
60
+ ## Community
61
+
62
+ - Join our [Discord server](https://discord.gg/ZZbKEZZkwK) for discussions
63
+ - Follow us on [Twitter/X](https://x.com/droid_run)
64
+ - Check our [Documentation](https://docs.droidrun.ai)
65
+ - Report bugs and request features through [GitHub Issues](https://github.com/droidrun/droidrun/issues)
66
+
67
+ ## Pull Request Process
68
+
69
+ 1. Update documentation for any modified functionality
70
+ 2. Update the changelog if applicable
71
+ 3. Get at least one code review from a maintainer
72
+ 4. Once approved, a maintainer will merge your PR
73
+
74
+ ## Release Process
75
+
76
+ Releases are handled by the maintainers. Version numbers follow [Semantic Versioning](https://semver.org/).
77
+
78
+ ## Questions?
79
+
80
+ If you have questions about contributing:
81
+ 1. Check existing GitHub issues
82
+ 2. Ask in our Discord server
83
+ 3. Open a new GitHub issue for complex questions
84
+
85
+ Thank you for contributing to DroidRun! 🚀
86
+
87
+ ## Language
88
+
89
+ English is the preferred language for all contributions, including:
90
+ - Code comments
91
+ - Documentation
92
+ - Commit messages
93
+ - Pull requests
94
+ - Issue reports
95
+ - Community discussions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: droidrun
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A framework for controlling Android devices through LLM agents
5
5
  Project-URL: Homepage, https://github.com/droidrun/droidrun
6
6
  Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
@@ -28,7 +28,15 @@ Classifier: Topic :: Utilities
28
28
  Requires-Python: >=3.10
29
29
  Requires-Dist: aiofiles>=23.0.0
30
30
  Requires-Dist: anthropic>=0.7.0
31
+ Requires-Dist: arize-phoenix
31
32
  Requires-Dist: click>=8.1.0
33
+ Requires-Dist: llama-index
34
+ Requires-Dist: llama-index-callbacks-arize-phoenix
35
+ Requires-Dist: llama-index-llms-anthropic
36
+ Requires-Dist: llama-index-llms-deepseek
37
+ Requires-Dist: llama-index-llms-gemini
38
+ Requires-Dist: llama-index-llms-ollama
39
+ Requires-Dist: llama-index-llms-openai
32
40
  Requires-Dist: openai>=1.0.0
33
41
  Requires-Dist: pillow>=10.0.0
34
42
  Requires-Dist: pydantic>=2.0.0
@@ -40,17 +48,31 @@ Requires-Dist: mypy>=1.0.0; extra == 'dev'
40
48
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
41
49
  Description-Content-Type: text/markdown
42
50
 
43
- # 🤖 DroidRun
51
+
52
+ <picture>
53
+ <source media="(prefers-color-scheme: dark)" srcset="./static/droidrun-dark.png">
54
+ <source media="(prefers-color-scheme: light)" srcset="./static/droidrun.png">
55
+ <img src="./static/droidrun.png" width="full">
56
+ </picture>
57
+
58
+ [![GitHub stars](https://img.shields.io/github/stars/droidrun/droidrun?style=social)](https://github.com/droidrun/droidrun/stargazers)
59
+ [![Discord](https://img.shields.io/discord/1360219330318696488?color=7289DA&label=Discord&logo=discord&logoColor=white)](https://discord.gg/ZZbKEZZkwK)
60
+ [![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.droidrun.ai)
61
+ [![Twitter Follow](https://img.shields.io/twitter/follow/droid_run?style=social)](https://x.com/droid_run)
62
+
44
63
 
45
64
  DroidRun is a powerful framework for controlling Android devices through LLM agents. It allows you to automate Android device interactions using natural language commands.
46
65
 
47
66
  ## ✨ Features
48
67
 
49
68
  - Control Android devices with natural language commands
50
- - Supports multiple LLM providers (OpenAI, Anthropic, Gemini)
51
- - Easy to use CLI
69
+ - Supports multiple LLM providers (OpenAI, Anthropic, Gemini, Ollama, DeepSeek)
70
+ - Planning capabilities for complex multi-step tasks
71
+ - LlamaIndex integration for flexible LLM interactions
72
+ - Easy to use CLI with enhanced debugging features
52
73
  - Extendable Python API for custom automations
53
74
  - Screenshot analysis for visual understanding of the device
75
+ - Execution tracing with Arize Phoenix
54
76
 
55
77
  ## 📦 Installation
56
78
 
@@ -63,7 +85,7 @@ pip install droidrun
63
85
  ### 🔧 Option 2: Install from Source
64
86
 
65
87
  ```bash
66
- git clone https://github.com/yourusername/droidrun.git
88
+ git clone https://github.com/droidrun/droidrun.git
67
89
  cd droidrun
68
90
  pip install -e .
69
91
  ```
@@ -131,6 +153,8 @@ Create a `.env` file in your working directory or set environment variables:
131
153
  export OPENAI_API_KEY="your_openai_api_key_here"
132
154
  export ANTHROPIC_API_KEY="your_anthropic_api_key_here"
133
155
  export GEMINI_API_KEY="your_gemini_api_key_here"
156
+ export DEEPSEEK_API_KEY="your_deepseek_api_key_here"
157
+ # For Ollama, no API key is needed
134
158
  ```
135
159
 
136
160
  To load the environment variables from the `.env` file:
@@ -151,15 +175,6 @@ droidrun devices
151
175
  droidrun connect 192.168.1.100
152
176
  ```
153
177
 
154
- ### 🔄 4. Verify the setup
155
-
156
- Verify that everything is set up correctly:
157
-
158
- ```bash
159
- # Should list your connected device and show portal status
160
- droidrun status
161
- ```
162
-
163
178
  ## 💻 Using the CLI
164
179
 
165
180
  DroidRun's CLI is designed to be simple and intuitive. You can use it in two ways:
@@ -175,13 +190,16 @@ droidrun "Open the settings app"
175
190
 
176
191
  ```bash
177
192
  # Using OpenAI
178
- droidrun "Open the calculator app" --provider openai --model gpt-4o-mini
193
+ droidrun "Open the calculator app" --provider OpenAI --model gpt-4o-mini
179
194
 
180
195
  # Using Anthropic
181
- droidrun "Check the battery level" --provider anthropic --model claude-3-sonnet-20240229
196
+ droidrun "Check the battery level" --provider Anthropic --model claude-3-sonnet-20240229
182
197
 
183
198
  # Using Gemini
184
- droidrun "Install and open Instagram" --provider gemini --model gemini-2.0-flash
199
+ droidrun "Install and open Instagram" --provider Gemini --model models/gemini-2.5-pro-preview-05-06
200
+
201
+ # Using Ollama (local)
202
+ droidrun "Check battery level" --provider Ollama --model llama2
185
203
  ```
186
204
 
187
205
  ### ⚙️ Additional Options
@@ -190,6 +208,15 @@ droidrun "Install and open Instagram" --provider gemini --model gemini-2.0-flash
190
208
  # Specify a particular device
191
209
  droidrun "Open Chrome and search for weather" --device abc123
192
210
 
211
+ # Enable vision capabilities
212
+ droidrun "Analyze what's on the screen" --vision
213
+
214
+ # Enable planning for complex tasks
215
+ droidrun "Find and download a specific app" --reasoning
216
+
217
+ # Enable execution tracing (requires Phoenix server running)
218
+ droidrun "Debug this complex workflow" --tracing
219
+
193
220
  # Set maximum number of steps
194
221
  droidrun "Open settings and enable dark mode" --steps 20
195
222
  ```
@@ -201,40 +228,73 @@ If you want to use DroidRun in your Python code rather than via the CLI, you can
201
228
  ```python
202
229
  #!/usr/bin/env python3
203
230
  import asyncio
204
- import os
205
- from droidrun.agent.react_agent import ReActAgent
206
- from droidrun.agent.llm_reasoning import LLMReasoner
207
- from dotenv import load_dotenv
208
-
209
- # Load environment variables from .env file
210
- load_dotenv()
231
+ from droidrun.agent.droid import DroidAgent
232
+ from droidrun.agent.utils.llm_picker import load_llm
233
+ from droidrun.tools import load_tools
211
234
 
212
235
  async def main():
213
- # Create an LLM instance (choose your preferred provider)
214
- llm = LLMReasoner(
215
- llm_provider="gemini", # Can be "openai", "anthropic", or "gemini"
216
- model_name="gemini-2.0-flash", # Choose appropriate model for your provider
217
- api_key=os.environ.get("GEMINI_API_KEY"), # Get API key from environment
236
+ # Load tools
237
+ tool_list, tools_instance = await load_tools()
238
+
239
+ # Load LLM
240
+ llm = load_llm(
241
+ provider_name="Gemini", # Case sensitive: OpenAI, Ollama, Anthropic, Gemini, DeepSeek
242
+ model="models/gemini-2.5-pro-preview-05-06",
218
243
  temperature=0.2
219
244
  )
220
245
 
221
246
  # Create and run the agent
222
- agent = ReActAgent(
223
- task="Open the Settings app and check the Android version",
224
- llm=llm
247
+ agent = DroidAgent(
248
+ goal="Open the Settings app and check the Android version",
249
+ llm=llm,
250
+ tools_instance=tools_instance,
251
+ tool_list=tool_list,
252
+ vision=True, # Enable vision for screen analysis
253
+ reasoning=True # Enable planning for complex tasks
225
254
  )
226
255
 
227
- steps = await agent.run()
228
- print(f"Execution completed with {len(steps)} steps")
256
+ # Run the agent
257
+ result = await agent.run()
258
+ print(f"Success: {result['success']}")
259
+ if result.get('reason'):
260
+ print(f"Reason: {result['reason']}")
229
261
 
230
262
  if __name__ == "__main__":
231
263
  asyncio.run(main())
232
264
  ```
233
265
 
234
- Save this as `test_droidrun.py`, ensure your `.env` file has the appropriate API key, and run:
266
+ You can also use LlamaIndex directly:
235
267
 
236
- ```bash
237
- python test_droidrun.py
268
+ ```python
269
+ import asyncio
270
+ from llama_index.llms.gemini import Gemini
271
+ from droidrun.agent.droid import DroidAgent
272
+ from droidrun.tools import load_tools
273
+
274
+ async def main():
275
+ # Load tools
276
+ tool_list, tools_instance = await load_tools()
277
+
278
+ # Create LlamaIndex LLM directly
279
+ llm = Gemini(
280
+ model="models/gemini-2.5-pro-preview-05-06",
281
+ temperature=0.2
282
+ )
283
+
284
+ # Create and run the agent
285
+ agent = DroidAgent(
286
+ goal="Open the Settings app and check the Android version",
287
+ llm=llm,
288
+ tools_instance=tools_instance,
289
+ tool_list=tool_list
290
+ )
291
+
292
+ # Run the agent
293
+ result = await agent.run()
294
+ print(f"Success: {result['success']}")
295
+
296
+ if __name__ == "__main__":
297
+ asyncio.run(main())
238
298
  ```
239
299
 
240
300
  ## ❓ Troubleshooting
@@ -259,6 +319,27 @@ If DroidRun is using the wrong LLM provider:
259
319
  1. Explicitly specify the provider with `--provider` (in CLI) or `llm_provider=` (in code)
260
320
  2. When using Gemini, ensure you have set `GEMINI_API_KEY` and specified `--provider gemini`
261
321
 
322
+ ### 📊 Tracing Issues
323
+
324
+ If you're using the tracing feature:
325
+ 1. Make sure to install Arize Phoenix: `pip install "arize-phoenix[llama-index]"`
326
+ 2. Start the Phoenix server before running your command: `phoenix serve`
327
+ 3. Access the tracing UI at http://localhost:6006 after execution
328
+
329
+ ### 🎬 Demo Videos
330
+
331
+ 1. **Shopping Assistant**: Watch how DroidRun searches Amazon for headphones and sends the top 3 products to a colleague on WhatsApp.
332
+
333
+ Prompt: "Go to Amazon, search for headphones and write the top 3 products to my colleague on WhatsApp."
334
+
335
+ [![Shopping Assistant Demo](https://img.youtube.com/vi/VQK3JcifgwU/0.jpg)](https://www.youtube.com/watch?v=VQK3JcifgwU)
336
+
337
+ 2. **Social Media Automation**: See DroidRun open X (Twitter) and post "Hello World".
338
+
339
+ Prompt: "Open up X and post Hello World."
340
+
341
+ [![Social Media Automation Demo](https://img.youtube.com/vi/i4-sDQhzt_M/0.jpg)](https://www.youtube.com/watch?v=i4-sDQhzt_M)
342
+
262
343
  ## 💡 Example Use Cases
263
344
 
264
345
  - Automated UI testing of Android applications
@@ -267,6 +348,22 @@ If DroidRun is using the wrong LLM provider:
267
348
  - Remote assistance for less technical users
268
349
  - Exploring Android UI with natural language commands
269
350
 
351
+ ## 🗺️ Roadmap
352
+
353
+ ### 🤖 Agent:
354
+ - **Improve memory**: Enhance context retention for complex multi-step tasks
355
+ - **Expand planning capabilities**: Add support for more complex reasoning strategies
356
+ - **Add Integrations**: Support more LLM providers and agent frameworks (LangChain, Agno etc.)
357
+
358
+ ### ⚙️ Automations:
359
+ - **Create Automation Scripts**: Generate reusable scripts from agent actions that can be scheduled or shared
360
+
361
+ ### ☁️ Cloud:
362
+ - **Hosted version**: Remote device control via web interface without local setup
363
+ - **Add-Ons**: Marketplace for extensions serving specific use cases
364
+ - **Proxy Hours**: Cloud compute time with tiered pricing for running automations
365
+ - **Droidrun AppStore**: Simple installation of Apps on your hosted devices
366
+
270
367
  ## 👥 Contributing
271
368
 
272
369
  Contributions are welcome! Please feel free to submit a Pull Request.
@@ -1,14 +1,28 @@
1
- # 🤖 DroidRun
1
+
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="./static/droidrun-dark.png">
4
+ <source media="(prefers-color-scheme: light)" srcset="./static/droidrun.png">
5
+ <img src="./static/droidrun.png" width="full">
6
+ </picture>
7
+
8
+ [![GitHub stars](https://img.shields.io/github/stars/droidrun/droidrun?style=social)](https://github.com/droidrun/droidrun/stargazers)
9
+ [![Discord](https://img.shields.io/discord/1360219330318696488?color=7289DA&label=Discord&logo=discord&logoColor=white)](https://discord.gg/ZZbKEZZkwK)
10
+ [![Documentation](https://img.shields.io/badge/Documentation-📕-blue)](https://docs.droidrun.ai)
11
+ [![Twitter Follow](https://img.shields.io/twitter/follow/droid_run?style=social)](https://x.com/droid_run)
12
+
2
13
 
3
14
  DroidRun is a powerful framework for controlling Android devices through LLM agents. It allows you to automate Android device interactions using natural language commands.
4
15
 
5
16
  ## ✨ Features
6
17
 
7
18
  - Control Android devices with natural language commands
8
- - Supports multiple LLM providers (OpenAI, Anthropic, Gemini)
9
- - Easy to use CLI
19
+ - Supports multiple LLM providers (OpenAI, Anthropic, Gemini, Ollama, DeepSeek)
20
+ - Planning capabilities for complex multi-step tasks
21
+ - LlamaIndex integration for flexible LLM interactions
22
+ - Easy to use CLI with enhanced debugging features
10
23
  - Extendable Python API for custom automations
11
24
  - Screenshot analysis for visual understanding of the device
25
+ - Execution tracing with Arize Phoenix
12
26
 
13
27
  ## 📦 Installation
14
28
 
@@ -21,7 +35,7 @@ pip install droidrun
21
35
  ### 🔧 Option 2: Install from Source
22
36
 
23
37
  ```bash
24
- git clone https://github.com/yourusername/droidrun.git
38
+ git clone https://github.com/droidrun/droidrun.git
25
39
  cd droidrun
26
40
  pip install -e .
27
41
  ```
@@ -89,6 +103,8 @@ Create a `.env` file in your working directory or set environment variables:
89
103
  export OPENAI_API_KEY="your_openai_api_key_here"
90
104
  export ANTHROPIC_API_KEY="your_anthropic_api_key_here"
91
105
  export GEMINI_API_KEY="your_gemini_api_key_here"
106
+ export DEEPSEEK_API_KEY="your_deepseek_api_key_here"
107
+ # For Ollama, no API key is needed
92
108
  ```
93
109
 
94
110
  To load the environment variables from the `.env` file:
@@ -109,15 +125,6 @@ droidrun devices
109
125
  droidrun connect 192.168.1.100
110
126
  ```
111
127
 
112
- ### 🔄 4. Verify the setup
113
-
114
- Verify that everything is set up correctly:
115
-
116
- ```bash
117
- # Should list your connected device and show portal status
118
- droidrun status
119
- ```
120
-
121
128
  ## 💻 Using the CLI
122
129
 
123
130
  DroidRun's CLI is designed to be simple and intuitive. You can use it in two ways:
@@ -133,13 +140,16 @@ droidrun "Open the settings app"
133
140
 
134
141
  ```bash
135
142
  # Using OpenAI
136
- droidrun "Open the calculator app" --provider openai --model gpt-4o-mini
143
+ droidrun "Open the calculator app" --provider OpenAI --model gpt-4o-mini
137
144
 
138
145
  # Using Anthropic
139
- droidrun "Check the battery level" --provider anthropic --model claude-3-sonnet-20240229
146
+ droidrun "Check the battery level" --provider Anthropic --model claude-3-sonnet-20240229
140
147
 
141
148
  # Using Gemini
142
- droidrun "Install and open Instagram" --provider gemini --model gemini-2.0-flash
149
+ droidrun "Install and open Instagram" --provider Gemini --model models/gemini-2.5-pro-preview-05-06
150
+
151
+ # Using Ollama (local)
152
+ droidrun "Check battery level" --provider Ollama --model llama2
143
153
  ```
144
154
 
145
155
  ### ⚙️ Additional Options
@@ -148,6 +158,15 @@ droidrun "Install and open Instagram" --provider gemini --model gemini-2.0-flash
148
158
  # Specify a particular device
149
159
  droidrun "Open Chrome and search for weather" --device abc123
150
160
 
161
+ # Enable vision capabilities
162
+ droidrun "Analyze what's on the screen" --vision
163
+
164
+ # Enable planning for complex tasks
165
+ droidrun "Find and download a specific app" --reasoning
166
+
167
+ # Enable execution tracing (requires Phoenix server running)
168
+ droidrun "Debug this complex workflow" --tracing
169
+
151
170
  # Set maximum number of steps
152
171
  droidrun "Open settings and enable dark mode" --steps 20
153
172
  ```
@@ -159,40 +178,73 @@ If you want to use DroidRun in your Python code rather than via the CLI, you can
159
178
  ```python
160
179
  #!/usr/bin/env python3
161
180
  import asyncio
162
- import os
163
- from droidrun.agent.react_agent import ReActAgent
164
- from droidrun.agent.llm_reasoning import LLMReasoner
165
- from dotenv import load_dotenv
166
-
167
- # Load environment variables from .env file
168
- load_dotenv()
181
+ from droidrun.agent.droid import DroidAgent
182
+ from droidrun.agent.utils.llm_picker import load_llm
183
+ from droidrun.tools import load_tools
169
184
 
170
185
  async def main():
171
- # Create an LLM instance (choose your preferred provider)
172
- llm = LLMReasoner(
173
- llm_provider="gemini", # Can be "openai", "anthropic", or "gemini"
174
- model_name="gemini-2.0-flash", # Choose appropriate model for your provider
175
- api_key=os.environ.get("GEMINI_API_KEY"), # Get API key from environment
186
+ # Load tools
187
+ tool_list, tools_instance = await load_tools()
188
+
189
+ # Load LLM
190
+ llm = load_llm(
191
+ provider_name="Gemini", # Case sensitive: OpenAI, Ollama, Anthropic, Gemini, DeepSeek
192
+ model="models/gemini-2.5-pro-preview-05-06",
176
193
  temperature=0.2
177
194
  )
178
195
 
179
196
  # Create and run the agent
180
- agent = ReActAgent(
181
- task="Open the Settings app and check the Android version",
182
- llm=llm
197
+ agent = DroidAgent(
198
+ goal="Open the Settings app and check the Android version",
199
+ llm=llm,
200
+ tools_instance=tools_instance,
201
+ tool_list=tool_list,
202
+ vision=True, # Enable vision for screen analysis
203
+ reasoning=True # Enable planning for complex tasks
183
204
  )
184
205
 
185
- steps = await agent.run()
186
- print(f"Execution completed with {len(steps)} steps")
206
+ # Run the agent
207
+ result = await agent.run()
208
+ print(f"Success: {result['success']}")
209
+ if result.get('reason'):
210
+ print(f"Reason: {result['reason']}")
187
211
 
188
212
  if __name__ == "__main__":
189
213
  asyncio.run(main())
190
214
  ```
191
215
 
192
- Save this as `test_droidrun.py`, ensure your `.env` file has the appropriate API key, and run:
216
+ You can also use LlamaIndex directly:
193
217
 
194
- ```bash
195
- python test_droidrun.py
218
+ ```python
219
+ import asyncio
220
+ from llama_index.llms.gemini import Gemini
221
+ from droidrun.agent.droid import DroidAgent
222
+ from droidrun.tools import load_tools
223
+
224
+ async def main():
225
+ # Load tools
226
+ tool_list, tools_instance = await load_tools()
227
+
228
+ # Create LlamaIndex LLM directly
229
+ llm = Gemini(
230
+ model="models/gemini-2.5-pro-preview-05-06",
231
+ temperature=0.2
232
+ )
233
+
234
+ # Create and run the agent
235
+ agent = DroidAgent(
236
+ goal="Open the Settings app and check the Android version",
237
+ llm=llm,
238
+ tools_instance=tools_instance,
239
+ tool_list=tool_list
240
+ )
241
+
242
+ # Run the agent
243
+ result = await agent.run()
244
+ print(f"Success: {result['success']}")
245
+
246
+ if __name__ == "__main__":
247
+ asyncio.run(main())
196
248
  ```
197
249
 
198
250
  ## ❓ Troubleshooting
@@ -217,6 +269,27 @@ If DroidRun is using the wrong LLM provider:
217
269
  1. Explicitly specify the provider with `--provider` (in CLI) or `llm_provider=` (in code)
218
270
  2. When using Gemini, ensure you have set `GEMINI_API_KEY` and specified `--provider gemini`
219
271
 
272
+ ### 📊 Tracing Issues
273
+
274
+ If you're using the tracing feature:
275
+ 1. Make sure to install Arize Phoenix: `pip install "arize-phoenix[llama-index]"`
276
+ 2. Start the Phoenix server before running your command: `phoenix serve`
277
+ 3. Access the tracing UI at http://localhost:6006 after execution
278
+
279
+ ### 🎬 Demo Videos
280
+
281
+ 1. **Shopping Assistant**: Watch how DroidRun searches Amazon for headphones and sends the top 3 products to a colleague on WhatsApp.
282
+
283
+ Prompt: "Go to Amazon, search for headphones and write the top 3 products to my colleague on WhatsApp."
284
+
285
+ [![Shopping Assistant Demo](https://img.youtube.com/vi/VQK3JcifgwU/0.jpg)](https://www.youtube.com/watch?v=VQK3JcifgwU)
286
+
287
+ 2. **Social Media Automation**: See DroidRun open X (Twitter) and post "Hello World".
288
+
289
+ Prompt: "Open up X and post Hello World."
290
+
291
+ [![Social Media Automation Demo](https://img.youtube.com/vi/i4-sDQhzt_M/0.jpg)](https://www.youtube.com/watch?v=i4-sDQhzt_M)
292
+
220
293
  ## 💡 Example Use Cases
221
294
 
222
295
  - Automated UI testing of Android applications
@@ -225,6 +298,22 @@ If DroidRun is using the wrong LLM provider:
225
298
  - Remote assistance for less technical users
226
299
  - Exploring Android UI with natural language commands
227
300
 
301
+ ## 🗺️ Roadmap
302
+
303
+ ### 🤖 Agent:
304
+ - **Improve memory**: Enhance context retention for complex multi-step tasks
305
+ - **Expand planning capabilities**: Add support for more complex reasoning strategies
306
+ - **Add Integrations**: Support more LLM providers and agent frameworks (LangChain, Agno etc.)
307
+
308
+ ### ⚙️ Automations:
309
+ - **Create Automation Scripts**: Generate reusable scripts from agent actions that can be scheduled or shared
310
+
311
+ ### ☁️ Cloud:
312
+ - **Hosted version**: Remote device control via web interface without local setup
313
+ - **Add-Ons**: Marketplace for extensions serving specific use cases
314
+ - **Proxy Hours**: Cloud compute time with tiered pricing for running automations
315
+ - **Droidrun AppStore**: Simple installation of Apps on your hosted devices
316
+
228
317
  ## 👥 Contributing
229
318
 
230
319
  Contributions are welcome! Please feel free to submit a Pull Request.