droidrun 0.3.10.dev4__tar.gz → 0.3.10.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/PKG-INFO +3 -12
  2. droidrun-0.3.10.dev4/config/prompts/codeact/system.md → droidrun-0.3.10.dev6/config/prompts/codeact/system.jinja2 +1 -1
  3. droidrun-0.3.10.dev4/config/prompts/codeact/user.md → droidrun-0.3.10.dev6/config/prompts/codeact/user.jinja2 +1 -1
  4. droidrun-0.3.10.dev6/config/prompts/executor/rev1.jinja2 +78 -0
  5. droidrun-0.3.10.dev4/config/prompts/executor/system.md → droidrun-0.3.10.dev6/config/prompts/executor/system.jinja2 +33 -8
  6. droidrun-0.3.10.dev4/config/prompts/manager/rev1.md → droidrun-0.3.10.dev6/config/prompts/manager/rev1.jinja2 +50 -6
  7. droidrun-0.3.10.dev4/config/prompts/manager/system.md → droidrun-0.3.10.dev6/config/prompts/manager/system.jinja2 +49 -4
  8. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/config_example.yaml +13 -5
  9. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/codeact/codeact_agent.py +18 -25
  10. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/droid/events.py +4 -1
  11. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/executor/executor_agent.py +24 -38
  12. droidrun-0.3.10.dev6/droidrun/agent/executor/prompts.py +34 -0
  13. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/manager/manager_agent.py +104 -87
  14. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/llm_picker.py +63 -1
  15. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/tools.py +29 -0
  16. droidrun-0.3.10.dev6/droidrun/app_cards/app_card_provider.py +27 -0
  17. droidrun-0.3.10.dev6/droidrun/app_cards/providers/__init__.py +7 -0
  18. droidrun-0.3.10.dev6/droidrun/app_cards/providers/composite_provider.py +97 -0
  19. droidrun-0.3.10.dev6/droidrun/app_cards/providers/local_provider.py +116 -0
  20. droidrun-0.3.10.dev6/droidrun/app_cards/providers/server_provider.py +126 -0
  21. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/cli/main.py +241 -30
  22. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/config_manager/__init__.py +0 -2
  23. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/config_manager/config_manager.py +45 -101
  24. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/config_manager/path_resolver.py +1 -1
  25. droidrun-0.3.10.dev6/droidrun/config_manager/prompt_loader.py +72 -0
  26. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/portal.py +17 -0
  27. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/tools/adb.py +13 -34
  28. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/pyproject.toml +3 -18
  29. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/uv.lock +765 -4
  30. droidrun-0.3.10.dev4/config/prompts/executor/rev1.md +0 -58
  31. droidrun-0.3.10.dev4/droidrun/agent/executor/prompts.py +0 -142
  32. droidrun-0.3.10.dev4/droidrun/config_manager/app_card_loader.py +0 -148
  33. droidrun-0.3.10.dev4/droidrun/config_manager/prompt_loader.py +0 -75
  34. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/.github/workflows/bounty.yml +0 -0
  35. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/.github/workflows/publish.yml +0 -0
  36. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/.gitignore +0 -0
  37. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/.python-version +0 -0
  38. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/CHANGELOG.md +0 -0
  39. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/CONTRIBUTING.md +0 -0
  40. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/LICENSE +0 -0
  41. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/MANIFEST.in +0 -0
  42. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/README.md +0 -0
  43. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/config/app_cards/README.md +0 -0
  44. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/config/app_cards/app_cards.json +0 -0
  45. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/config/app_cards/gmail.md +0 -0
  46. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/.generated-files.txt +0 -0
  47. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/docs.json +0 -0
  48. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/favicon.png +0 -0
  49. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/logo/dark.svg +0 -0
  50. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/logo/light.svg +0 -0
  51. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v1/concepts/agent.mdx +0 -0
  52. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v1/concepts/android-control.mdx +0 -0
  53. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v1/concepts/portal-app.mdx +0 -0
  54. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v1/overview.mdx +0 -0
  55. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v1/quickstart.mdx +0 -0
  56. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/concepts/agent.mdx +0 -0
  57. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/concepts/android-control.mdx +0 -0
  58. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/concepts/planning.mdx +0 -0
  59. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/concepts/portal-app.mdx +0 -0
  60. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/concepts/tracing.mdx +0 -0
  61. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/overview.mdx +0 -0
  62. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v2/quickstart.mdx +0 -0
  63. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/concepts/agent.mdx +0 -0
  64. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/concepts/android-tools.mdx +0 -0
  65. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/concepts/models.mdx +0 -0
  66. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/concepts/portal-app.mdx +0 -0
  67. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/cli.mdx +0 -0
  68. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/gemini.mdx +0 -0
  69. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/ollama.mdx +0 -0
  70. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/openailike.mdx +0 -0
  71. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/overview.mdx +0 -0
  72. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/guides/telemetry.mdx +0 -0
  73. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/images/portal_apk.png +0 -0
  74. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/overview.mdx +0 -0
  75. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/quickstart.mdx +0 -0
  76. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/sdk/adb-tools.mdx +0 -0
  77. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/sdk/base-tools.mdx +0 -0
  78. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/sdk/droid-agent.mdx +0 -0
  79. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/docs/v3/sdk/ios-tools.mdx +0 -0
  80. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/__init__.py +0 -0
  81. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/__main__.py +0 -0
  82. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/__init__.py +0 -0
  83. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/codeact/__init__.py +0 -0
  84. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/codeact/events.py +0 -0
  85. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/common/constants.py +0 -0
  86. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/common/events.py +0 -0
  87. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/context/__init__.py +0 -0
  88. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/context/episodic_memory.py +0 -0
  89. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/context/task_manager.py +0 -0
  90. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/droid/__init__.py +0 -0
  91. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/droid/droid_agent.py +0 -0
  92. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/executor/__init__.py +0 -0
  93. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/executor/events.py +0 -0
  94. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/manager/__init__.py +0 -0
  95. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/manager/events.py +0 -0
  96. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/manager/prompts.py +0 -0
  97. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/oneflows/app_starter_workflow.py +0 -0
  98. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/oneflows/text_manipulator.py +0 -0
  99. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/usage.py +0 -0
  100. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/__init__.py +0 -0
  101. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/async_utils.py +0 -0
  102. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/chat_utils.py +0 -0
  103. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/device_state_formatter.py +0 -0
  104. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/executer.py +0 -0
  105. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/inference.py +0 -0
  106. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/message_utils.py +0 -0
  107. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/agent/utils/trajectory.py +0 -0
  108. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/cli/__init__.py +0 -0
  109. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/cli/logs.py +0 -0
  110. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/macro/__init__.py +0 -0
  111. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/macro/__main__.py +0 -0
  112. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/macro/cli.py +0 -0
  113. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/macro/replay.py +0 -0
  114. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/telemetry/__init__.py +0 -0
  115. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/telemetry/events.py +0 -0
  116. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/telemetry/phoenix.py +0 -0
  117. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/telemetry/tracker.py +0 -0
  118. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/tools/__init__.py +0 -0
  119. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/tools/ios.py +0 -0
  120. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/droidrun/tools/tools.py +0 -0
  121. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/gen-docs-sdk-ref.sh +0 -0
  122. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/setup.py +0 -0
  123. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/static/droidrun-dark.png +0 -0
  124. {droidrun-0.3.10.dev4 → droidrun-0.3.10.dev6}/static/droidrun.png +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: droidrun
3
- Version: 0.3.10.dev4
3
+ Version: 0.3.10.dev6
4
4
  Summary: A framework for controlling Android devices through LLM agents
5
5
  Project-URL: Homepage, https://github.com/droidrun/droidrun
6
6
  Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
@@ -13,8 +13,6 @@ Classifier: Intended Audience :: Developers
13
13
  Classifier: Intended Audience :: Information Technology
14
14
  Classifier: Intended Audience :: Science/Research
15
15
  Classifier: License :: OSI Approved :: MIT License
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.10
18
16
  Classifier: Programming Language :: Python :: 3.11
19
17
  Classifier: Programming Language :: Python :: 3.12
20
18
  Classifier: Programming Language :: Python :: 3.13
@@ -27,10 +25,11 @@ Classifier: Topic :: Software Development :: Testing
27
25
  Classifier: Topic :: Software Development :: Testing :: Acceptance
28
26
  Classifier: Topic :: System :: Emulators
29
27
  Classifier: Topic :: Utilities
30
- Requires-Python: >=3.13
28
+ Requires-Python: >=3.11
31
29
  Requires-Dist: adbutils>=2.10.2
32
30
  Requires-Dist: apkutils==2.0.0
33
31
  Requires-Dist: arize-phoenix>=12.3.0
32
+ Requires-Dist: httpx>=0.27.0
34
33
  Requires-Dist: llama-index==0.14.4
35
34
  Requires-Dist: posthog>=6.7.6
36
35
  Requires-Dist: pydantic>=2.11.10
@@ -38,14 +37,6 @@ Requires-Dist: rich>=14.1.0
38
37
  Provides-Extra: anthropic
39
38
  Requires-Dist: anthropic>=0.67.0; extra == 'anthropic'
40
39
  Requires-Dist: llama-index-llms-anthropic<0.9.0,>=0.8.6; extra == 'anthropic'
41
- Provides-Extra: backend
42
- Requires-Dist: aiohttp>=3.9.0; extra == 'backend'
43
- Requires-Dist: fastapi>=0.104.0; extra == 'backend'
44
- Requires-Dist: pydantic-settings>=2.0.0; extra == 'backend'
45
- Requires-Dist: python-dotenv>=1.0.0; extra == 'backend'
46
- Requires-Dist: python-multipart>=0.0.6; extra == 'backend'
47
- Requires-Dist: uvicorn[standard]>=0.24.0; extra == 'backend'
48
- Requires-Dist: websockets>=12.0; extra == 'backend'
49
40
  Provides-Extra: deepseek
50
41
  Requires-Dist: llama-index-llms-deepseek>=0.2.1; extra == 'deepseek'
51
42
  Provides-Extra: dev
@@ -49,7 +49,7 @@ complete(success=True, reason="Successfully navigated to Wi-Fi settings and init
49
49
 
50
50
  ## Tools:
51
51
  In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
52
- {tool_descriptions}
52
+ {{ tool_descriptions }}
53
53
 
54
54
 
55
55
  ## Final Answer Guidelines:
@@ -1,5 +1,5 @@
1
1
  **Current Request:**
2
- {goal}
2
+ {{ goal }}
3
3
 
4
4
  **Is the precondition met? What is your reasoning and the next step to address this request?**
5
5
  Explain your thought process then provide code in ```python ... ``` tags if needed.
@@ -0,0 +1,78 @@
1
+ # Android Action Executor
2
+
3
+ You are an action executor. Your only job: execute the current subgoal exactly as written.
4
+
5
+ ## Context
6
+
7
+ **User Request:** {{ instruction }}
8
+
9
+ {% if app_card %}
10
+ App card gives information on how to operate the app and perform actions.
11
+ **App Card:** {{ app_card }}
12
+
13
+ {% endif %}
14
+ {% if device_state %}
15
+ **Device State:** {{ device_state }}
16
+
17
+ {% endif %}
18
+ **Overall Plan:** {{ plan }}
19
+
20
+ **Current Subgoal:** {{ subgoal }}
21
+
22
+ **Progress:** {{ progress_status|default("No progress yet.") }}
23
+
24
+ **Recent Actions:**
25
+ {% if action_history %}
26
+ {% for action in action_history[-5:] %}
27
+ {% if action.outcome %}
28
+ - Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Successful
29
+ {% else %}
30
+ - Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Failed | Feedback: {{ action.error }}
31
+ {% endif %}
32
+ {% endfor %}
33
+ {% else %}
34
+ No actions have been taken yet.
35
+ {% endif %}
36
+
37
+ ---
38
+
39
+ ## Your Task
40
+
41
+ 1. Read the current subgoal
42
+ 2. Identify the action verb (tap, swipe, type, press, open)
43
+ 3. Identify the target (button name, text, coordinates)
44
+ 4. Execute that exact action
45
+
46
+ **Do not:**
47
+ - Answer questions
48
+ - Make decisions about what to do next
49
+ - Optimize or substitute actions
50
+ - Repeat failed actions more than once
51
+
52
+ ---
53
+
54
+ ## Action Reference
55
+
56
+ ### Available Actions
57
+ {% for action_name, action_info in atomic_actions.items() %}
58
+ - {{ action_name }}({{ action_info.arguments|join(', ') }}): {{ action_info.description }}
59
+ {% endfor %}
60
+
61
+ ### Key Rules
62
+ - Close popups (permission requests) before proceeding
63
+ - Always activate input box (click it) before typing
64
+ - Use `open_app` to launch apps, not the app drawer
65
+ - Try different swipe directions if content doesn't change
66
+
67
+ ---
68
+
69
+ ## Output Format
70
+
71
+ ### Thought ###
72
+ What action and target does the subgoal specify?
73
+
74
+ ### Action ###
75
+ {"action": "action_name", "argument": "value"}
76
+
77
+ ### Description ###
78
+ One sentence describing the action you're taking.
@@ -1,13 +1,24 @@
1
1
  You are a LOW-LEVEL ACTION EXECUTOR for an Android phone. You do NOT answer questions or provide results. You ONLY perform individual atomic actions as specified in the current subgoal. You are part of a larger system - your job is to execute actions, not to think about or answer the user's original question.
2
2
 
3
3
  ### User Request ###
4
- {instruction}
4
+ {{ instruction }}
5
5
 
6
- {app_card}{device_state_text}### Overall Plan ###
7
- {plan}
6
+ {% if app_card %}
7
+ App card gives information on how to operate the app and perform actions.
8
+ ### App Card ###
9
+ {{ app_card }}
10
+
11
+ {% endif %}
12
+ {% if device_state %}
13
+ ### Device State ###
14
+ {{ device_state }}
15
+
16
+ {% endif %}
17
+ ### Overall Plan ###
18
+ {{ plan }}
8
19
 
9
20
  ### Current Subgoal ###
10
- EXECUTE THIS SUBGOAL: {subgoal}
21
+ EXECUTE THIS SUBGOAL: {{ subgoal }}
11
22
 
12
23
  EXECUTION MODE: You are a dumb robot. Find the exact text/element mentioned in the subgoal above and perform the specified action on it. Do not read anything below this line until after you execute the subgoal.
13
24
 
@@ -25,7 +36,7 @@ Convert directly to atomic action:
25
36
  Execute the atomic action for the exact target mentioned. Ignore everything else.
26
37
 
27
38
  ### Progress Status ###
28
- {progress_status}
39
+ {{ progress_status|default("No progress yet.") }}
29
40
 
30
41
  ### Guidelines ###
31
42
  General:
@@ -47,11 +58,25 @@ Execute the current subgoal mechanically. Do NOT examine the screen content or m
47
58
 
48
59
  #### Atomic Actions ####
49
60
  The atomic action functions are listed in the format of `action(arguments): description` as follows:
50
- {atomic_actions}
61
+ {% for action_name, action_info in atomic_actions.items() %}
62
+ - {{ action_name }}({{ action_info.arguments|join(', ') }}): {{ action_info.description }}
63
+ {% endfor %}
51
64
 
52
65
  ### Latest Action History ###
53
- {action_history}
54
-
66
+ {% if action_history %}
67
+ Recent actions you took previously and whether they were successful:
68
+ {% for action in action_history[-5:] %}
69
+ {% if action.outcome %}
70
+ Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Successful
71
+ {% else %}
72
+ Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Failed | Feedback: {{ action.error }}
73
+ {% endif %}
74
+ {% endfor %}
75
+
76
+ {% else %}
77
+ No actions have been taken yet.
78
+
79
+ {% endif %}
55
80
  ---
56
81
  ### LITERAL EXECUTION RULE ###
57
82
  Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action.
@@ -3,12 +3,46 @@
3
3
  You operate an Android phone by creating high-level plans to fulfill user requests.
4
4
 
5
5
  ## User Request
6
- {instruction}
6
+ {{ instruction }}
7
7
 
8
8
  ## Current Context
9
- {device_date}{app_card}{important_notes}{error_history}
10
-
11
- {custom_tools_descriptions}
9
+ {% if device_date %}
10
+ <device_date>
11
+ {{ device_date }}
12
+ </device_date>
13
+
14
+ {% endif %}
15
+ {% if app_card %}
16
+ App card gives information on how to operate the app and perform actions.
17
+ <app_card>
18
+ {{ app_card }}
19
+ </app_card>
20
+
21
+ {% endif %}
22
+ {% if important_notes %}
23
+ <important_notes>
24
+ {{ important_notes }}
25
+ </important_notes>
26
+
27
+ {% endif %}
28
+ {% if error_history %}
29
+ <potentially_stuck>
30
+ You have encountered several failed attempts. Here are some logs:
31
+ {% for error in error_history %}
32
+ - Attempt: Action: {{ error.action }} | Description: {{ error.summary }} | Outcome: Failed | Feedback: {{ error.error }}
33
+ {% endfor %}
34
+ </potentially_stuck>
35
+
36
+ {% endif %}
37
+ {% if custom_tools_descriptions %}
38
+
39
+ <custom_actions>
40
+ The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
41
+ {{ custom_tools_descriptions }}
42
+
43
+ You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
44
+ </custom_actions>
45
+ {% endif %}
12
46
 
13
47
  ---
14
48
 
@@ -28,7 +62,17 @@ You operate an Android phone by creating high-level plans to fulfill user reques
28
62
  - Use memory instead of clipboard unless specifically requested
29
63
 
30
64
  **Text Operations:**
31
- {text_manipulation_section}
65
+ {% if text_manipulation_enabled %}
66
+
67
+ <text_manipulation>
68
+ 1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
69
+ 2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
70
+ 3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
71
+ 4. The focused text field contains editable text that you can modify
72
+ 5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
73
+ 6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
74
+ </text_manipulation>
75
+ {% endif %}
32
76
 
33
77
  ---
34
78
 
@@ -68,4 +112,4 @@ Example: "At step 5, I obtained recipe from recipes.jpg: Chicken Pasta - ingredi
68
112
 
69
113
  <request_accomplished>
70
114
  Use ONLY when request is fully completed through concrete actions. Include confirmation message of what was accomplished.
71
- </request_accomplished>
115
+ </request_accomplished>
@@ -1,11 +1,37 @@
1
1
  You are an agent who can operate an Android phone on behalf of a user. Your goal is to track progress and devise high-level plans to achieve the user's requests.
2
2
 
3
3
  <user_request>
4
- {instruction}
4
+ {{ instruction }}
5
5
  </user_request>
6
6
 
7
- {device_date}{app_card}{important_notes}{error_history}
7
+ {% if device_date %}
8
+ <device_date>
9
+ {{ device_date }}
10
+ </device_date>
8
11
 
12
+ {% endif %}
13
+ {% if app_card %}
14
+ App card gives information on how to operate the app and perform actions.
15
+ <app_card>
16
+ {{ app_card }}
17
+ </app_card>
18
+
19
+ {% endif %}
20
+ {% if important_notes %}
21
+ <important_notes>
22
+ {{ important_notes }}
23
+ </important_notes>
24
+
25
+ {% endif %}
26
+ {% if error_history %}
27
+ <potentially_stuck>
28
+ You have encountered several failed attempts. Here are some logs:
29
+ {% for error in error_history %}
30
+ - Attempt: Action: {{ error.action }} | Description: {{ error.summary }} | Outcome: Failed | Feedback: {{ error.error }}
31
+ {% endfor %}
32
+ </potentially_stuck>
33
+
34
+ {% endif %}
9
35
  <guidelines>
10
36
  The following guidelines will help you plan this request.
11
37
  General:
@@ -17,7 +43,17 @@ General:
17
43
  6. Make sure names and titles are not cutoff. If the request is to check who sent a message, make sure to check the message sender's full name not just what appears in the notification because it might be cut off.
18
44
  7. Dates and file names must match the user query exactly.
19
45
  8. Don't do more than what the user asks for.
20
- {text_manipulation_section}
46
+ {% if text_manipulation_enabled %}
47
+
48
+ <text_manipulation>
49
+ 1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
50
+ 2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
51
+ 3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
52
+ 4. The focused text field contains editable text that you can modify
53
+ 5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
54
+ 6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
55
+ </text_manipulation>
56
+ {% endif %}
21
57
 
22
58
  Memory Usage:
23
59
  - Always include step context: "At step [number], I obtained [actual content] from [source]"
@@ -27,7 +63,16 @@ Memory Usage:
27
63
  - Update memory to track progress on multi-step tasks
28
64
 
29
65
  </guidelines>
30
- {custom_tools_descriptions}
66
+ {% if custom_tools_descriptions %}
67
+
68
+ <custom_actions>
69
+ The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
70
+ {{ custom_tools_descriptions }}
71
+
72
+ You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
73
+ </custom_actions>
74
+ {% endif %}
75
+
31
76
  ---
32
77
  Carefully assess the current status and the provided screenshot. Check if the current plan needs to be revised.
33
78
  Determine if the user request has been fully completed. If you are confident that no further actions are required, use the request_accomplished tag with a message in it. If the user request is not finished, update the plan and don't use it. If you are stuck with errors, think step by step about whether the overall plan needs to be revised to address the error.
@@ -19,30 +19,38 @@ agent:
19
19
  # Enable vision capabilities (screenshots)
20
20
  vision: false
21
21
  # System prompt filename (located in prompts_dir/codeact/)
22
- system_prompt: system.md
22
+ system_prompt: system.jinja2
23
23
  # User prompt filename (located in prompts_dir/codeact/)
24
- user_prompt: user.md
24
+ user_prompt: user.jinja2
25
25
 
26
26
  # Manager Agent Configuration
27
27
  manager:
28
28
  # Enable vision capabilities (screenshots)
29
29
  vision: false
30
30
  # System prompt filename (located in prompts_dir/manager/)
31
- system_prompt: system.md
31
+ system_prompt: system.jinja2
32
32
 
33
33
  # Executor Agent Configuration
34
34
  executor:
35
35
  # Enable vision capabilities (screenshots)
36
36
  vision: false
37
37
  # System prompt filename (located in prompts_dir/executor/)
38
- system_prompt: system.md
38
+ system_prompt: system.jinja2
39
39
 
40
40
  # App Cards Configuration
41
41
  app_cards:
42
42
  # Enable app-specific instruction cards
43
43
  enabled: true
44
- # Directory containing app card files
44
+ # Mode: local (file-based), server (HTTP API), or composite (server with local fallback)
45
+ mode: local
46
+ # Directory containing app card files (for local/composite modes)
45
47
  app_cards_dir: config/app_cards
48
+ # Server URL for remote app cards (for server/composite modes)
49
+ server_url: null
50
+ # Server request timeout in seconds
51
+ server_timeout: 10.0
52
+ # Number of server retry attempts
53
+ server_max_retries: 2
46
54
 
47
55
  # === LLM Profiles ===
48
56
  # Define LLM configurations for each agent type
@@ -85,15 +85,8 @@ class CodeActAgent(Workflow):
85
85
  self.tool_list = {}
86
86
  for action_name, signature in merged_signatures.items():
87
87
  func = signature["function"]
88
- if asyncio.iscoroutinefunction(func):
89
- # Create async bound function with proper closure
90
- def make_bound(f, ti):
91
- async def bound_func(*args, **kwargs):
92
- return await f(ti, *args, **kwargs)
93
- return bound_func
94
- self.tool_list[action_name] = make_bound(func, tools_instance)
95
- else:
96
- self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
88
+
89
+ self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
97
90
 
98
91
  self.tool_list["remember"] = tools_instance.remember
99
92
  self.tool_list["complete"] = tools_instance.complete
@@ -113,13 +106,10 @@ class CodeActAgent(Workflow):
113
106
  )
114
107
  self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
115
108
 
116
- self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
117
-
118
109
  self.executor = SimpleCodeExecutor(
119
110
  loop=asyncio.get_event_loop(),
120
111
  locals={},
121
112
  tools=self.tool_list,
122
- tools_instance=tools_instance,
123
113
  globals={"__builtins__": __builtins__},
124
114
  )
125
115
 
@@ -293,27 +283,30 @@ Now, describe the next step you will take to address the original goal: {goal}""
293
283
  try:
294
284
  self.code_exec_counter += 1
295
285
  result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
296
- logger.info(f"💡 Code execution successful. Result: {result['output']}")
286
+ logger.info(f"💡 Code execution successful. Result: {result}")
297
287
  await asyncio.sleep(self.agent_config.after_sleep_action)
298
- screenshots = result['screenshots']
299
- for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
300
- ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
301
-
302
- ui_states = result['ui_states']
303
- for ui_state in ui_states[:-1]:
304
- ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
305
288
 
289
+ # Check if complete() was called
306
290
  if self.tools.finished:
307
- logger.debug(" - Task completed.")
308
- event = TaskEndEvent(
309
- success=self.tools.success, reason=self.tools.reason
310
- )
291
+ logger.info(" Task marked as complete via complete() function")
292
+
293
+ # Validate completion state
294
+ success = self.tools.success if self.tools.success is not None else False
295
+ reason = self.tools.reason if self.tools.reason else "Task completed without reason"
296
+
297
+ # Reset finished flag for next execution
298
+ self.tools.finished = False
299
+
300
+ logger.info(f" - Success: {success}")
301
+ logger.info(f" - Reason: {reason}")
302
+
303
+ event = TaskEndEvent(success=success, reason=reason)
311
304
  ctx.write_event_to_stream(event)
312
305
  return event
313
306
 
314
307
  self.remembered_info = self.tools.memory
315
308
 
316
- event = TaskExecutionResultEvent(output=str(result['output']))
309
+ event = TaskExecutionResultEvent(output=str(result))
317
310
  ctx.write_event_to_stream(event)
318
311
  return event
319
312
 
@@ -10,6 +10,7 @@ For internal events with full debugging metadata, see:
10
10
  - codeact/events.py (Task*, EpisodicMemoryEvent)
11
11
  """
12
12
 
13
+ import asyncio
13
14
  from typing import Dict, List
14
15
 
15
16
  from llama_index.core.workflow import Event
@@ -49,7 +50,9 @@ class DroidAgentState(BaseModel):
49
50
 
50
51
  # Task context
51
52
  instruction: str = ""
52
-
53
+ # App Cards
54
+ app_card: str = ""
55
+ app_card_loading_task: asyncio.Task[str] | None = None
53
56
  # Formatted device state for prompts (complete text)
54
57
  formatted_device_state: str = ""
55
58
 
@@ -90,52 +90,38 @@ class ExecutorAgent(Workflow): # TODO: Fix a bug in bad prompt
90
90
  subgoal = ev.get("subgoal", "")
91
91
  logger.info(f"🧠 Executor thinking about action for: {subgoal}")
92
92
 
93
- # Format app card (include tags in variable value or empty string)
94
- app_card = "" # TODO: Implement app card retrieval
95
- app_card_text = ""
96
- if app_card.strip():
97
- app_card_text = "App card gives information on how to operate the app and perform actions.\n### App Card ###\n" + app_card.strip() + "\n\n"
98
-
99
- # Format device state (use unified state)
100
- device_state_text = ""
101
- if self.shared_state.formatted_device_state and self.shared_state.formatted_device_state.strip():
102
- device_state_text = "### Device State ###\n" + self.shared_state.formatted_device_state.strip() + "\n\n"
103
-
104
- # Format progress status
105
- progress_status_text = self.shared_state.progress_status + "\n\n" if self.shared_state.progress_status else "No progress yet.\n\n"
106
-
107
- # Format atomic actions
108
- atomic_actions_text = chr(10).join(
109
- f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}"
110
- for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items()
111
- ) + "\n"
112
-
113
- # Format action history
93
+ # Prepare action history as structured data (last 5 actions)
94
+ action_history = []
114
95
  if self.shared_state.action_history:
115
- action_history_text = "Recent actions you took previously and whether they were successful:\n" + "\n".join(
116
- (f"Action: {act} | Description: {summ} | Outcome: Successful" if outcome
117
- else f"Action: {act} | Description: {summ} | Outcome: Failed | Feedback: {err_des}")
96
+ n = min(5, len(self.shared_state.action_history))
97
+ action_history = [
98
+ {
99
+ "action": act,
100
+ "summary": summ,
101
+ "outcome": outcome,
102
+ "error": err_des
103
+ }
118
104
  for act, summ, outcome, err_des in zip(
119
- self.shared_state.action_history[-min(5, len(self.shared_state.action_history)):],
120
- self.shared_state.summary_history[-min(5, len(self.shared_state.action_history)):],
121
- self.shared_state.action_outcomes[-min(5, len(self.shared_state.action_history)):],
122
- self.shared_state.error_descriptions[-min(5, len(self.shared_state.action_history)):], strict=True)
123
- ) + "\n\n"
124
- else:
125
- action_history_text = "No actions have been taken yet.\n\n"
126
-
127
- # Load and format prompt
105
+ self.shared_state.action_history[-n:],
106
+ self.shared_state.summary_history[-n:],
107
+ self.shared_state.action_outcomes[-n:],
108
+ self.shared_state.error_descriptions[-n:],
109
+ strict=True
110
+ )
111
+ ]
112
+
113
+ # Let Jinja2 handle all formatting
128
114
  system_prompt = PromptLoader.load_prompt(
129
115
  self.agent_config.get_executor_system_prompt_path(),
130
116
  {
131
117
  "instruction": self.shared_state.instruction,
132
- "app_card": app_card_text,
133
- "device_state_text": device_state_text,
118
+ "app_card": "", # TODO: Implement app card loader
119
+ "device_state": self.shared_state.formatted_device_state,
134
120
  "plan": self.shared_state.plan,
135
121
  "subgoal": subgoal,
136
- "progress_status": progress_status_text,
137
- "atomic_actions": atomic_actions_text,
138
- "action_history": action_history_text
122
+ "progress_status": self.shared_state.progress_status,
123
+ "atomic_actions": ATOMIC_ACTION_SIGNATURES,
124
+ "action_history": action_history
139
125
  }
140
126
  )
141
127
 
@@ -0,0 +1,34 @@
1
+ """
2
+ Prompts for the ExecutorAgent.
3
+ """
4
+
5
+ def parse_executor_response(response: str) -> dict:
6
+ """
7
+ Parse the Executor LLM response.
8
+
9
+ Extracts:
10
+ - thought: Content between "### Thought" and "### Action"
11
+ - action: Content between "### Action" and "### Description"
12
+ - description: Content after "### Description"
13
+
14
+ Args:
15
+ response: Raw LLM response string
16
+
17
+ Returns:
18
+ Dictionary with 'thought', 'action', 'description' keys
19
+ """
20
+ thought = response.split("### Thought")[-1].split("### Action")[0].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
21
+ action_raw = response.split("### Action")[-1].split("### Description")[0].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
22
+ start_idx = action_raw.find('{')
23
+ end_idx = action_raw.rfind('}')
24
+ if start_idx != -1 and end_idx != -1:
25
+ action = action_raw[start_idx:end_idx + 1]
26
+ else:
27
+ action = action_raw
28
+ description = response.split("### Description")[-1].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
29
+
30
+ return {
31
+ "thought": thought,
32
+ "action": action,
33
+ "description": description
34
+ }