droidrun 0.3.10.dev2__tar.gz → 0.3.10.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/.gitignore +10 -1
  2. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/PKG-INFO +10 -3
  3. droidrun-0.3.10.dev4/config/app_cards/README.md +137 -0
  4. droidrun-0.3.10.dev4/config/app_cards/app_cards.json +3 -0
  5. droidrun-0.3.10.dev4/config/app_cards/gmail.md +35 -0
  6. droidrun-0.3.10.dev4/config/prompts/codeact/system.md +60 -0
  7. droidrun-0.3.10.dev4/config/prompts/codeact/user.md +5 -0
  8. droidrun-0.3.10.dev4/config/prompts/executor/rev1.md +58 -0
  9. droidrun-0.3.10.dev4/config/prompts/executor/system.md +73 -0
  10. droidrun-0.3.10.dev4/config/prompts/manager/rev1.md +71 -0
  11. droidrun-0.3.10.dev4/config/prompts/manager/system.md +60 -0
  12. droidrun-0.3.10.dev2/config.yaml → droidrun-0.3.10.dev4/config_example.yaml +37 -9
  13. droidrun-0.3.10.dev4/droidrun/agent/codeact/__init__.py +5 -0
  14. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/codeact/codeact_agent.py +95 -86
  15. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/codeact/events.py +1 -2
  16. droidrun-0.3.10.dev4/droidrun/agent/context/__init__.py +17 -0
  17. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/context/episodic_memory.py +1 -3
  18. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/context/task_manager.py +8 -2
  19. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/droid/droid_agent.py +102 -141
  20. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/droid/events.py +45 -14
  21. droidrun-0.3.10.dev4/droidrun/agent/executor/__init__.py +15 -0
  22. droidrun-0.3.10.dev4/droidrun/agent/executor/events.py +44 -0
  23. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/executor/executor_agent.py +86 -28
  24. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/executor/prompts.py +8 -2
  25. droidrun-0.3.10.dev4/droidrun/agent/manager/__init__.py +17 -0
  26. droidrun-0.3.10.dev4/droidrun/agent/manager/events.py +32 -0
  27. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/manager/manager_agent.py +130 -69
  28. droidrun-0.3.10.dev4/droidrun/agent/manager/prompts.py +65 -0
  29. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/chat_utils.py +64 -2
  30. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/device_state_formatter.py +54 -26
  31. droidrun-0.3.10.dev4/droidrun/agent/utils/executer.py +135 -0
  32. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/inference.py +11 -10
  33. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/tools.py +58 -6
  34. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/trajectory.py +18 -12
  35. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/cli/logs.py +118 -56
  36. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/cli/main.py +154 -136
  37. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/config_manager/__init__.py +9 -7
  38. droidrun-0.3.10.dev4/droidrun/config_manager/app_card_loader.py +148 -0
  39. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/config_manager/config_manager.py +200 -102
  40. droidrun-0.3.10.dev4/droidrun/config_manager/path_resolver.py +104 -0
  41. droidrun-0.3.10.dev4/droidrun/config_manager/prompt_loader.py +75 -0
  42. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/macro/__init__.py +1 -1
  43. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/macro/cli.py +23 -18
  44. droidrun-0.3.10.dev4/droidrun/telemetry/__init__.py +4 -0
  45. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/telemetry/events.py +3 -3
  46. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/telemetry/tracker.py +1 -1
  47. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/tools/adb.py +1 -1
  48. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/tools/ios.py +3 -2
  49. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/pyproject.toml +16 -4
  50. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/uv.lock +136 -7
  51. droidrun-0.3.10.dev2/droidrun/agent/codeact/__init__.py +0 -8
  52. droidrun-0.3.10.dev2/droidrun/agent/codeact/prompts.py +0 -26
  53. droidrun-0.3.10.dev2/droidrun/agent/context/__init__.py +0 -21
  54. droidrun-0.3.10.dev2/droidrun/agent/context/agent_persona.py +0 -16
  55. droidrun-0.3.10.dev2/droidrun/agent/context/context_injection_manager.py +0 -66
  56. droidrun-0.3.10.dev2/droidrun/agent/context/personas/__init__.py +0 -11
  57. droidrun-0.3.10.dev2/droidrun/agent/context/personas/app_starter.py +0 -44
  58. droidrun-0.3.10.dev2/droidrun/agent/context/personas/big_agent.py +0 -96
  59. droidrun-0.3.10.dev2/droidrun/agent/context/personas/default.py +0 -95
  60. droidrun-0.3.10.dev2/droidrun/agent/context/personas/ui_expert.py +0 -108
  61. droidrun-0.3.10.dev2/droidrun/agent/executor/__init__.py +0 -13
  62. droidrun-0.3.10.dev2/droidrun/agent/executor/events.py +0 -24
  63. droidrun-0.3.10.dev2/droidrun/agent/manager/__init__.py +0 -18
  64. droidrun-0.3.10.dev2/droidrun/agent/manager/events.py +0 -20
  65. droidrun-0.3.10.dev2/droidrun/agent/manager/prompts.py +0 -223
  66. droidrun-0.3.10.dev2/droidrun/agent/planner/__init__.py +0 -13
  67. droidrun-0.3.10.dev2/droidrun/agent/planner/events.py +0 -21
  68. droidrun-0.3.10.dev2/droidrun/agent/planner/planner_agent.py +0 -311
  69. droidrun-0.3.10.dev2/droidrun/agent/planner/prompts.py +0 -124
  70. droidrun-0.3.10.dev2/droidrun/agent/utils/executer.py +0 -149
  71. droidrun-0.3.10.dev2/droidrun/telemetry/__init__.py +0 -4
  72. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/.github/workflows/bounty.yml +0 -0
  73. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/.github/workflows/publish.yml +0 -0
  74. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/.python-version +0 -0
  75. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/CHANGELOG.md +0 -0
  76. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/CONTRIBUTING.md +0 -0
  77. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/LICENSE +0 -0
  78. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/MANIFEST.in +0 -0
  79. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/README.md +0 -0
  80. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/.generated-files.txt +0 -0
  81. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/docs.json +0 -0
  82. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/favicon.png +0 -0
  83. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/logo/dark.svg +0 -0
  84. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/logo/light.svg +0 -0
  85. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v1/concepts/agent.mdx +0 -0
  86. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v1/concepts/android-control.mdx +0 -0
  87. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v1/concepts/portal-app.mdx +0 -0
  88. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v1/overview.mdx +0 -0
  89. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v1/quickstart.mdx +0 -0
  90. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/concepts/agent.mdx +0 -0
  91. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/concepts/android-control.mdx +0 -0
  92. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/concepts/planning.mdx +0 -0
  93. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/concepts/portal-app.mdx +0 -0
  94. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/concepts/tracing.mdx +0 -0
  95. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/overview.mdx +0 -0
  96. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v2/quickstart.mdx +0 -0
  97. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/concepts/agent.mdx +0 -0
  98. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/concepts/android-tools.mdx +0 -0
  99. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/concepts/models.mdx +0 -0
  100. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/concepts/portal-app.mdx +0 -0
  101. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/cli.mdx +0 -0
  102. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/gemini.mdx +0 -0
  103. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/ollama.mdx +0 -0
  104. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/openailike.mdx +0 -0
  105. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/overview.mdx +0 -0
  106. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/guides/telemetry.mdx +0 -0
  107. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/images/portal_apk.png +0 -0
  108. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/overview.mdx +0 -0
  109. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/quickstart.mdx +0 -0
  110. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/sdk/adb-tools.mdx +0 -0
  111. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/sdk/base-tools.mdx +0 -0
  112. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/sdk/droid-agent.mdx +0 -0
  113. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/docs/v3/sdk/ios-tools.mdx +0 -0
  114. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/__init__.py +0 -0
  115. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/__main__.py +0 -0
  116. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/__init__.py +0 -0
  117. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/common/constants.py +0 -0
  118. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/common/events.py +0 -0
  119. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/droid/__init__.py +0 -0
  120. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/oneflows/app_starter_workflow.py +0 -0
  121. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/oneflows/text_manipulator.py +0 -0
  122. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/usage.py +0 -0
  123. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/__init__.py +0 -0
  124. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/async_utils.py +0 -0
  125. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/llm_picker.py +0 -0
  126. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/agent/utils/message_utils.py +0 -0
  127. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/cli/__init__.py +0 -0
  128. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/macro/__main__.py +0 -0
  129. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/macro/replay.py +0 -0
  130. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/portal.py +0 -0
  131. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/telemetry/phoenix.py +0 -0
  132. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/tools/__init__.py +0 -0
  133. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/droidrun/tools/tools.py +0 -0
  134. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/gen-docs-sdk-ref.sh +0 -0
  135. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/setup.py +0 -0
  136. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/static/droidrun-dark.png +0 -0
  137. {droidrun-0.3.10.dev2 → droidrun-0.3.10.dev4}/static/droidrun.png +0 -0
@@ -23,4 +23,13 @@ patch_apis.py
23
23
  .git
24
24
  .arize-phoenix
25
25
 
26
- todo.txt
26
+ todo.txt
27
+
28
+ config.yaml
29
+
30
+ .*/
31
+ !.gitignore
32
+ !.github/
33
+
34
+ backend/
35
+ frontend/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: droidrun
3
- Version: 0.3.10.dev2
3
+ Version: 0.3.10.dev4
4
4
  Summary: A framework for controlling Android devices through LLM agents
5
5
  Project-URL: Homepage, https://github.com/droidrun/droidrun
6
6
  Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
@@ -30,6 +30,7 @@ Classifier: Topic :: Utilities
30
30
  Requires-Python: >=3.13
31
31
  Requires-Dist: adbutils>=2.10.2
32
32
  Requires-Dist: apkutils==2.0.0
33
+ Requires-Dist: arize-phoenix>=12.3.0
33
34
  Requires-Dist: llama-index==0.14.4
34
35
  Requires-Dist: posthog>=6.7.6
35
36
  Requires-Dist: pydantic>=2.11.10
@@ -37,6 +38,14 @@ Requires-Dist: rich>=14.1.0
37
38
  Provides-Extra: anthropic
38
39
  Requires-Dist: anthropic>=0.67.0; extra == 'anthropic'
39
40
  Requires-Dist: llama-index-llms-anthropic<0.9.0,>=0.8.6; extra == 'anthropic'
41
+ Provides-Extra: backend
42
+ Requires-Dist: aiohttp>=3.9.0; extra == 'backend'
43
+ Requires-Dist: fastapi>=0.104.0; extra == 'backend'
44
+ Requires-Dist: pydantic-settings>=2.0.0; extra == 'backend'
45
+ Requires-Dist: python-dotenv>=1.0.0; extra == 'backend'
46
+ Requires-Dist: python-multipart>=0.0.6; extra == 'backend'
47
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == 'backend'
48
+ Requires-Dist: websockets>=12.0; extra == 'backend'
40
49
  Provides-Extra: deepseek
41
50
  Requires-Dist: llama-index-llms-deepseek>=0.2.1; extra == 'deepseek'
42
51
  Provides-Extra: dev
@@ -55,8 +64,6 @@ Requires-Dist: llama-index-llms-openai>=0.5.6; extra == 'openai'
55
64
  Requires-Dist: openai>=1.99.1; extra == 'openai'
56
65
  Provides-Extra: openrouter
57
66
  Requires-Dist: llama-index-llms-openrouter>=0.4.2; extra == 'openrouter'
58
- Provides-Extra: phoenix
59
- Requires-Dist: arize-phoenix>=12.3.0; extra == 'phoenix'
60
67
  Description-Content-Type: text/markdown
61
68
 
62
69
  <picture>
@@ -0,0 +1,137 @@
1
+ # App Cards
2
+
3
+ App cards provide app-specific guidance to DroidRun agents. They help agents understand how to operate specific apps more effectively.
4
+
5
+ ## How It Works
6
+
7
+ 1. **Mapping File**: `app_cards.json` maps Android package names to markdown files
8
+ 2. **App Card Files**: Markdown files containing app-specific guidance
9
+ 3. **Automatic Loading**: DroidRun automatically loads the appropriate app card based on the current package name
10
+ 4. **Prompt Injection**: App cards are injected into agent prompts when available
11
+
12
+ ## File Structure
13
+
14
+ ```
15
+ config/app_cards/
16
+ ├── app_cards.json # Package name → file mapping
17
+ ├── gmail.md # Gmail app card
18
+ ├── chrome.md # Chrome app card
19
+ └── social/ # Organize in subdirectories if needed
20
+ └── whatsapp.md
21
+ ```
22
+
23
+ ## Creating App Cards
24
+
25
+ ### 1. Add entry to app_cards.json
26
+
27
+ ```json
28
+ {
29
+ "com.google.android.gm": "gmail.md",
30
+ "com.android.chrome": "chrome.md",
31
+ "com.whatsapp": "social/whatsapp.md"
32
+ }
33
+ ```
34
+
35
+ ### 2. Create the markdown file
36
+
37
+ Create a `.md` file with guidance about the app:
38
+
39
+ ```markdown
40
+ # App Name Guide
41
+
42
+ ## Navigation
43
+ - How to navigate the app
44
+ - Key screens and menus
45
+
46
+ ## Common Actions
47
+ - List of common tasks
48
+ - How to perform them
49
+
50
+ ## Tips
51
+ - App-specific tips
52
+ - Known issues or quirks
53
+ ```
54
+
55
+ ## Path Resolution
56
+
57
+ App cards support three path types:
58
+
59
+ 1. **Relative to app_cards directory** (most common):
60
+ ```json
61
+ {"com.google.gm": "gmail.md"}
62
+ ```
63
+ Resolves to: `config/app_cards/gmail.md`
64
+
65
+ 2. **Relative to project root**:
66
+ ```json
67
+ {"com.google.gm": "config/custom_cards/gmail.md"}
68
+ ```
69
+ Resolves to: `config/custom_cards/gmail.md`
70
+
71
+ 3. **Absolute path**:
72
+ ```json
73
+ {"com.google.gm": "/usr/share/droidrun/cards/gmail.md"}
74
+ ```
75
+ Uses the absolute path directly
76
+
77
+ ## Finding Package Names
78
+
79
+ To find an app's package name:
80
+
81
+ 1. **Using ADB**:
82
+ ```bash
83
+ adb shell pm list packages | grep keyword
84
+ ```
85
+
86
+ 2. **From device**:
87
+ - Open the app
88
+ - Run DroidRun with debug mode to see the current package name in logs
89
+
90
+ 3. **Common apps**:
91
+ - Gmail: `com.google.android.gm`
92
+ - Chrome: `com.android.chrome`
93
+ - WhatsApp: `com.whatsapp`
94
+ - Instagram: `com.instagram.android`
95
+ - Facebook: `com.facebook.katana`
96
+
97
+ ## Configuration
98
+
99
+ Enable/disable app cards in `config.yaml`:
100
+
101
+ ```yaml
102
+ agent:
103
+ app_cards:
104
+ enabled: true
105
+ app_cards_dir: config/app_cards
106
+ ```
107
+
108
+ ## Best Practices
109
+
110
+ 1. **Be Concise**: Keep app cards focused and actionable
111
+ 2. **Use Examples**: Show concrete examples of common tasks
112
+ 3. **Update Regularly**: Keep app cards current with app updates
113
+ 4. **Test**: Verify that guidance actually helps agents
114
+ 5. **Organize**: Use subdirectories for related apps (e.g., social/, banking/)
115
+
116
+ ## Programmatic Usage
117
+
118
+ ```python
119
+ from droidrun.config_manager import AppCardLoader, config
120
+
121
+ # Load app card for a package
122
+ app_card = AppCardLoader.load_app_card(
123
+ package_name="com.google.android.gm",
124
+ app_cards_dir=config.agent.app_cards.app_cards_dir
125
+ )
126
+
127
+ # Check if enabled
128
+ if config.agent.app_cards.enabled:
129
+ print("App cards are enabled")
130
+
131
+ # Clear cache (useful for testing)
132
+ AppCardLoader.clear_cache()
133
+
134
+ # Get cache statistics
135
+ stats = AppCardLoader.get_cache_stats()
136
+ print(f"Cached entries: {stats['content_entries']}")
137
+ ```
@@ -0,0 +1,3 @@
1
+ {
2
+ "com.google.android.gm": "gmail.md"
3
+ }
@@ -0,0 +1,35 @@
1
+ # Gmail App Guide
2
+
3
+ ## Navigation
4
+ - Use the hamburger menu (top-left) to access folders (Inbox, Sent, Drafts, Trash, etc.)
5
+ - Tap the compose button (bottom-right floating action button) to write new emails
6
+ - Swipe left or right on emails to quickly archive or delete
7
+
8
+ ## Search
9
+ - Use the search bar at the top to find emails
10
+ - Search supports filters like:
11
+ - `from:sender@email.com` - Find emails from specific sender
12
+ - `to:recipient@email.com` - Find emails to specific recipient
13
+ - `subject:keyword` - Search in subject line
14
+ - `has:attachment` - Find emails with attachments
15
+ - `is:unread` - Find unread emails
16
+
17
+ ## Common Actions
18
+ - **Archive**: Swipe right on an email in the list
19
+ - **Delete**: Swipe left on an email in the list
20
+ - **Select Multiple**: Long press on an email to enter selection mode
21
+ - **Star/Unstar**: Tap the star icon next to an email
22
+ - **Mark as Read/Unread**: Long press → Select → Tap the mark read/unread icon
23
+ - **Move to Folder**: Long press → Select → Tap the folder icon
24
+
25
+ ## Composing Emails
26
+ - Tap the floating compose button (bottom-right)
27
+ - Fill in recipient, subject, and body
28
+ - Attach files by tapping the paperclip icon
29
+ - Send by tapping the send button (paper plane icon) in the top-right
30
+
31
+ ## Tips
32
+ - Primary inbox shows important emails automatically
33
+ - Social and Promotions tabs filter promotional and social emails
34
+ - Enable notifications for important emails only
35
+ - Use labels to organize emails
@@ -0,0 +1,60 @@
1
+ You are a helpful AI assistant that can write and execute Python code to solve problems.
2
+
3
+ You will be given a task to perform. You should output:
4
+ - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
5
+ - If there is a precondition for the task, you MUST check if it is met.
6
+ - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
7
+ - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
8
+
9
+
10
+ ## Context:
11
+ The following context is given to you for analysis:
12
+ - **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
13
+ - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
14
+ - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
15
+ - **chat history**: You are also given the history of your actions (if any) from your previous steps.
16
+ - **execution result**: The result of your last Action
17
+ NOTE: you don't have access to these inputs in your tool calling context
18
+
19
+ ## Response Format:
20
+ Example of proper code format:
21
+ **Task Assignment:**
22
+ **Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
23
+
24
+ **(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
25
+
26
+ **(Step 1) Agent Action:**
27
+ ```python
28
+ # First step: Navigate to Wi-Fi settings
29
+ click(3)
30
+ ```
31
+
32
+ **(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
33
+
34
+ **(Step 2) Agent Action:**
35
+ ```python
36
+ # Second step: Turn on Wi-Fi to see available networks
37
+ click(1)
38
+ ```
39
+
40
+ **(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
41
+
42
+ **(Step 3) Agent Action:**
43
+ ```python
44
+ # Final step: Connect to the target network
45
+ click(5)
46
+ complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
47
+ ```
48
+ ```
49
+
50
+ ## Tools:
51
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
52
+ {tool_descriptions}
53
+
54
+
55
+ ## Final Answer Guidelines:
56
+ - When providing a final answer, focus on directly answering the user's question in the response format given
57
+ - Present the results clearly and concisely as if you computed them directly
58
+ - Structure your response like you're directly answering the user's query, not explaining how you solved it
59
+
60
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
@@ -0,0 +1,5 @@
1
+ **Current Request:**
2
+ {goal}
3
+
4
+ **Is the precondition met? What is your reasoning and the next step to address this request?**
5
+ Explain your thought process then provide code in ```python ... ``` tags if needed.
@@ -0,0 +1,58 @@
1
+ # Android Action Executor
2
+
3
+ You are an action executor. Your only job: execute the current subgoal exactly as written.
4
+
5
+ ## Context
6
+
7
+ **User Request:** {instruction}
8
+
9
+ {app_card}{device_state_text}
10
+
11
+ **Overall Plan:** {plan}
12
+
13
+ **Current Subgoal:** {subgoal}
14
+
15
+ **Progress:** {progress_status}
16
+
17
+ **Recent Actions:** {action_history}
18
+
19
+ ---
20
+
21
+ ## Your Task
22
+
23
+ 1. Read the current subgoal
24
+ 2. Identify the action verb (tap, swipe, type, press, open)
25
+ 3. Identify the target (button name, text, coordinates)
26
+ 4. Execute that exact action
27
+
28
+ **Do not:**
29
+ - Answer questions
30
+ - Make decisions about what to do next
31
+ - Optimize or substitute actions
32
+ - Repeat failed actions more than once
33
+
34
+ ---
35
+
36
+ ## Action Reference
37
+
38
+ ### Available Actions
39
+ {atomic_actions}
40
+
41
+ ### Key Rules
42
+ - Close popups (permission requests) before proceeding
43
+ - Always activate input box (click it) before typing
44
+ - Use `open_app` to launch apps, not the app drawer
45
+ - Try different swipe directions if content doesn't change
46
+
47
+ ---
48
+
49
+ ## Output Format
50
+
51
+ ### Thought ###
52
+ What action and target does the subgoal specify?
53
+
54
+ ### Action ###
55
+ {{"action": "action_name", "argument": "value"}}
56
+
57
+ ### Description ###
58
+ One sentence describing the action you're taking.
@@ -0,0 +1,73 @@
1
+ You are a LOW-LEVEL ACTION EXECUTOR for an Android phone. You do NOT answer questions or provide results. You ONLY perform individual atomic actions as specified in the current subgoal. You are part of a larger system - your job is to execute actions, not to think about or answer the user's original question.
2
+
3
+ ### User Request ###
4
+ {instruction}
5
+
6
+ {app_card}{device_state_text}### Overall Plan ###
7
+ {plan}
8
+
9
+ ### Current Subgoal ###
10
+ EXECUTE THIS SUBGOAL: {subgoal}
11
+
12
+ EXECUTION MODE: You are a dumb robot. Find the exact text/element mentioned in the subgoal above and perform the specified action on it. Do not read anything below this line until after you execute the subgoal.
13
+
14
+ ### SUBGOAL PARSING MODE ###
15
+ Read the current subgoal exactly as written. Look for:
16
+ - Action words: "tap", "click", "swipe", "type", "press", "open" etc.
17
+ - Target elements: specific text, buttons, fields, coordinates mentioned
18
+ - Locations: "header", "bottom", "left", "right", specific coordinates
19
+ Convert directly to atomic action:
20
+ - "tap/click" → click action
21
+ - "swipe" → swipe action
22
+ - "type" → type action
23
+ - "press [system button]" → system_button action
24
+ - "open [app]" → open_app action
25
+ Execute the atomic action for the exact target mentioned. Ignore everything else.
26
+
27
+ ### Progress Status ###
28
+ {progress_status}
29
+
30
+ ### Guidelines ###
31
+ General:
32
+ - For any pop-up window, such as a permission request, you need to close it (e.g., by clicking `Don't Allow` or `Accept & continue`) before proceeding. Never choose to add any account or log in.
33
+ Action Related:
34
+ - Use the `open_app` action whenever you want to open an app (nothing will happen if the app is not installed), do not use the app drawer to open an app.
35
+ - Consider exploring the screen by using the `swipe` action with different directions to reveal additional content. Or use search to quickly find a specific entry, if applicable.
36
+ - If you cannot change the page content by swiping in the same direction continuously, the page may have been swiped to the bottom. Please try another operation to display more content.
37
+ - For some horizontally distributed tags, you can swipe horizontally to view more.
38
+ Text Related Operations:
39
+ - Activated input box: If an input box is activated, it may have a cursor inside it and the keyboard is visible. If there is no cursor on the screen but the keyboard is visible, it may be because the cursor is blinking. The color of the activated input box will be highlighted. If you are not sure whether the input box is activated, click it before typing.
40
+ - To input some text: first click the input box that you want to input, make sure the correct input box is activated and the keyboard is visible, then use `type` action to enter the specified text.
41
+ - To clear the text: long press the backspace button in the keyboard.
42
+ - To copy some text: first long press the text you want to copy, then click the `copy` button in bar.
43
+ - To paste text into a text box: first long press the text box, then click the `paste` button in bar.
44
+
45
+ ---
46
+ Execute the current subgoal mechanically. Do NOT examine the screen content or make decisions about what you see. Parse the current subgoal text to identify the required action and execute it exactly as written. You must choose your action from one of the atomic actions.
47
+
48
+ #### Atomic Actions ####
49
+ The atomic action functions are listed in the format of `action(arguments): description` as follows:
50
+ {atomic_actions}
51
+
52
+ ### Latest Action History ###
53
+ {action_history}
54
+
55
+ ---
56
+ ### LITERAL EXECUTION RULE ###
57
+ Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action.
58
+
59
+ IMPORTANT:
60
+ 1. Do NOT repeat previously failed actions multiple times. Try changing to another action.
61
+ 2. Must do the current subgoal.
62
+
63
+ Provide your output in the following format, which contains three parts:
64
+
65
+ ### Thought ###
66
+ Break down the current subgoal into: (1) What atomic action is required? (2) What target/location is specified? (3) What parameters do I need? Do NOT reason about whether this makes sense - just mechanically convert the subgoal text into the appropriate action format.
67
+
68
+ ### Action ###
69
+ Choose only one action or shortcut from the options provided.
70
+ You must provide your decision using a valid JSON format specifying the `action` and the arguments of the action. For example, if you want to open an App, you should write {{"action":"open_app", "text": "app name"}}.
71
+
72
+ ### Description ###
73
+ A brief description of the chosen action. Do not describe expected outcome.
@@ -0,0 +1,71 @@
1
+ # Android Planning Agent
2
+
3
+ You operate an Android phone by creating high-level plans to fulfill user requests.
4
+
5
+ ## User Request
6
+ {instruction}
7
+
8
+ ## Current Context
9
+ {device_date}{app_card}{important_notes}{error_history}
10
+
11
+ {custom_tools_descriptions}
12
+
13
+ ---
14
+
15
+ ## Guidelines
16
+
17
+ **Planning:**
18
+ - Open apps using `open_app` action directly
19
+ - Use search functions when available to find specific files/entries
20
+ - File names and dates must match the user request exactly
21
+ - Check full names/titles, not truncated versions in notifications
22
+ - Only do what the user asks—nothing more
23
+
24
+ **Memory Usage:**
25
+ - Store information with context: "At step X, I obtained [content] from [source]"
26
+ - Store actual content, not references (e.g., full recipe text, not "found recipes")
27
+ - Memory is append-only—new entries add to existing memory
28
+ - Use memory instead of clipboard unless specifically requested
29
+
30
+ **Text Operations:**
31
+ {text_manipulation_section}
32
+
33
+ ---
34
+
35
+ ## Your Task
36
+
37
+ 1. **Assess** the current screenshot and progress
38
+ 2. **Decide:** Is the request complete?
39
+ - If YES → Use `<request_accomplished>` with confirmation message
40
+ - If NO → Update the plan
41
+ 3. **Handle errors:** Revise plan if stuck or blocked
42
+ 4. **Make assumptions:** If clarification needed, act as the user would
43
+
44
+ **Important:**
45
+ - Remove completed subgoals from the plan
46
+ - Keep the next action as the first item
47
+ - Don't repeat completed steps unless screen shows they failed
48
+
49
+ ---
50
+
51
+ ## Output Format
52
+
53
+ <thought>
54
+ Explain your reasoning for the plan and next subgoal.
55
+ </thought>
56
+
57
+ <add_memory>
58
+ Store important information with step context.
59
+ Example: "At step 5, I obtained recipe from recipes.jpg: Chicken Pasta - ingredients: chicken, pasta, cream; instructions: cook pasta, sauté chicken, add cream."
60
+ </add_memory>
61
+
62
+ <plan>
63
+ 1. Next subgoal to execute
64
+ 2. Second subgoal
65
+ 3. Third subgoal
66
+ ...
67
+ </plan>
68
+
69
+ <request_accomplished>
70
+ Use ONLY when request is fully completed through concrete actions. Include confirmation message of what was accomplished.
71
+ </request_accomplished>
@@ -0,0 +1,60 @@
1
+ You are an agent who can operate an Android phone on behalf of a user. Your goal is to track progress and devise high-level plans to achieve the user's requests.
2
+
3
+ <user_request>
4
+ {instruction}
5
+ </user_request>
6
+
7
+ {device_date}{app_card}{important_notes}{error_history}
8
+
9
+ <guidelines>
10
+ The following guidelines will help you plan this request.
11
+ General:
12
+ 1. Use the `open_app` action whenever you want to open an app, do not use the app drawer to open an app.
13
+ 2. Use search to quickly find a file or entry with a specific name, if search function is applicable.
14
+ 3. Only use copy to clipboard actions when the task specifically requires copying text to clipboard. Do not copy text just to use it later - use the Memory section instead.
15
+ 4. When you need to remember information for later use, store it in the Memory section (using <add_memory> tags) with step context (e.g., "At step X, I obtained [information] from [source]").
16
+ 5. File names in the user request must always match the exact file name you are working with, make that reflect in the plan too.
17
+ 6. Make sure names and titles are not cutoff. If the request is to check who sent a message, make sure to check the message sender's full name not just what appears in the notification because it might be cut off.
18
+ 7. Dates and file names must match the user query exactly.
19
+ 8. Don't do more than what the user asks for.
20
+ {text_manipulation_section}
21
+
22
+ Memory Usage:
23
+ - Always include step context: "At step [number], I obtained [actual content] from [source]"
24
+ - Store the actual content you observe, not just references (e.g., store full recipe text, not "found recipes")
25
+ - Use memory instead of copying text unless specifically requested
26
+ - Memory is append-only: whatever you put in <add_memory> tags gets added to existing memory, not replaced
27
+ - Update memory to track progress on multi-step tasks
28
+
29
+ </guidelines>
30
+ {custom_tools_descriptions}
31
+ ---
32
+ Carefully assess the current status and the provided screenshot. Check if the current plan needs to be revised.
33
+ Determine if the user request has been fully completed. If you are confident that no further actions are required, use the request_accomplished tag with a message in it. If the user request is not finished, update the plan and don't use it. If you are stuck with errors, think step by step about whether the overall plan needs to be revised to address the error.
34
+ NOTE: 1. If the current situation prevents proceeding with the original plan or requires clarification from the user, make reasonable assumptions and revise the plan accordingly. Act as though you are the user in such cases. 2. Please refer to the helpful information and steps in the Guidelines first for planning. 3. If the first subgoal in plan has been completed, please update the plan in time according to the screenshot and progress to ensure that the next subgoal is always the first item in the plan. 4. If the first subgoal is not completed, please copy the previous round's plan or update the plan based on the completion of the subgoal.
35
+ Provide your output in the following format, which contains four or five parts:
36
+
37
+ <thought>
38
+ An explanation of your rationale for the updated plan and current subgoal.
39
+ </thought>
40
+
41
+ <add_memory>
42
+ Store important information here with step context for later reference. Always include "At step X, I obtained [actual content] from [source]".
43
+ Examples:
44
+ - At step 5, I obtained recipe details from recipes.jpg: Recipe 1 "Chicken Pasta" - ingredients: chicken, pasta, cream. Instructions: Cook pasta, sauté chicken, add cream.
45
+ or
46
+ - At step 12, I successfully added Recipe 1 to Broccoli app. Still need to add Recipe 2 and Recipe 3 from memory.
47
+ Store important information here with step context for later reference.
48
+ </add_memory>
49
+
50
+ <plan>
51
+ Please update or copy the existing plan according to the current page and progress. Please pay close attention to the historical operations. Please do not repeat the plan of completed content unless you can judge from the screen status that a subgoal is indeed not completed.
52
+ </plan>
53
+
54
+ <request_accomplished>
55
+ Use this tag ONLY after actually completing the user's request through concrete actions, not at the beginning or for planning.
56
+
57
+ 1. Always include a message inside this tag confirming what you accomplished
58
+ 2. Ensure both opening and closing tags are present
59
+ 3. Use exclusively for signaling completed user requests
60
+ </request_accomplished>
@@ -5,18 +5,44 @@
5
5
  agent:
6
6
  # Maximum number of steps per task
7
7
  max_steps: 15
8
- # Enable vision capabilities per agent (screenshots)
9
- vision:
10
- manager: true
11
- executor: true
12
- codeact: true
13
8
  # Enable planning with reasoning mode
14
- reasoning: true
9
+ reasoning: false
15
10
  # Sleep duration after each action, waits for ui state to be updated (seconds)
16
11
  after_sleep_action: 1.0
12
+ # Wait duration for UI to stabilize (seconds)
13
+ wait_for_stable_ui: 0.3
14
+ # Base directory for prompt templates
15
+ prompts_dir: config/prompts
17
16
 
17
+ # CodeAct Agent Configuration
18
+ codeact:
19
+ # Enable vision capabilities (screenshots)
20
+ vision: false
21
+ # System prompt filename (located in prompts_dir/codeact/)
22
+ system_prompt: system.md
23
+ # User prompt filename (located in prompts_dir/codeact/)
24
+ user_prompt: user.md
25
+
26
+ # Manager Agent Configuration
27
+ manager:
28
+ # Enable vision capabilities (screenshots)
29
+ vision: false
30
+ # System prompt filename (located in prompts_dir/manager/)
31
+ system_prompt: system.md
32
+
33
+ # Executor Agent Configuration
34
+ executor:
35
+ # Enable vision capabilities (screenshots)
36
+ vision: false
37
+ # System prompt filename (located in prompts_dir/executor/)
38
+ system_prompt: system.md
18
39
 
19
- wait_for_stable_ui: 0.3 # TODO: doesn't do anything now
40
+ # App Cards Configuration
41
+ app_cards:
42
+ # Enable app-specific instruction cards
43
+ enabled: true
44
+ # Directory containing app card files
45
+ app_cards_dir: config/app_cards
20
46
 
21
47
  # === LLM Profiles ===
22
48
  # Define LLM configurations for each agent type
@@ -32,7 +58,7 @@ llm_profiles:
32
58
  # Executor: Selects and executes atomic actions
33
59
  executor:
34
60
  provider: GoogleGenAI
35
- model: models/gemini-2.5-pro
61
+ model: models/gemini-2.5-flash
36
62
  temperature: 0.1
37
63
  # kwargs:
38
64
  # max_tokens: 4096
@@ -81,10 +107,12 @@ tracing:
81
107
  # === Logging Settings ===
82
108
  logging:
83
109
  # Enable debug logging
84
- debug: true
110
+ debug: false
85
111
  # Trajectory saving level (none, step, action)
86
112
  save_trajectory: none
87
113
 
114
+ rich_text: false
115
+
88
116
  # === Tool Settings ===
89
117
  tools:
90
118
  # Enable drag tool
@@ -0,0 +1,5 @@
1
+ from droidrun.agent.codeact.codeact_agent import CodeActAgent
2
+
3
+ __all__ = [
4
+ "CodeActAgent"
5
+ ]