droidrun 0.3.10.dev5__tar.gz → 0.3.10.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/PKG-INFO +2 -9
- droidrun-0.3.10.dev5/config/prompts/codeact/system.md → droidrun-0.3.10.dev7/config/prompts/codeact/system.jinja2 +1 -1
- droidrun-0.3.10.dev5/config/prompts/codeact/user.md → droidrun-0.3.10.dev7/config/prompts/codeact/user.jinja2 +1 -1
- droidrun-0.3.10.dev7/config/prompts/executor/rev1.jinja2 +78 -0
- droidrun-0.3.10.dev5/config/prompts/executor/system.md → droidrun-0.3.10.dev7/config/prompts/executor/system.jinja2 +33 -8
- droidrun-0.3.10.dev5/config/prompts/manager/rev1.md → droidrun-0.3.10.dev7/config/prompts/manager/rev1.jinja2 +50 -6
- droidrun-0.3.10.dev5/config/prompts/manager/system.md → droidrun-0.3.10.dev7/config/prompts/manager/system.jinja2 +49 -4
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/config_example.yaml +13 -5
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/codeact/codeact_agent.py +21 -29
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/context/task_manager.py +0 -1
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/droid/droid_agent.py +1 -3
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/droid/events.py +6 -3
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/executor/executor_agent.py +24 -38
- droidrun-0.3.10.dev7/droidrun/agent/executor/prompts.py +34 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/manager/__init__.py +1 -1
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/manager/manager_agent.py +104 -87
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/executer.py +11 -10
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/llm_picker.py +63 -1
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/tools.py +30 -1
- droidrun-0.3.10.dev7/droidrun/app_cards/app_card_provider.py +26 -0
- droidrun-0.3.10.dev7/droidrun/app_cards/providers/__init__.py +7 -0
- droidrun-0.3.10.dev7/droidrun/app_cards/providers/composite_provider.py +97 -0
- droidrun-0.3.10.dev7/droidrun/app_cards/providers/local_provider.py +115 -0
- droidrun-0.3.10.dev7/droidrun/app_cards/providers/server_provider.py +126 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/cli/main.py +244 -34
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/config_manager/__init__.py +0 -2
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/config_manager/config_manager.py +45 -102
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/config_manager/path_resolver.py +1 -1
- droidrun-0.3.10.dev7/droidrun/config_manager/prompt_loader.py +72 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/macro/cli.py +0 -1
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/portal.py +17 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/tools/adb.py +13 -34
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/pyproject.toml +2 -15
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/uv.lock +709 -135
- droidrun-0.3.10.dev5/config/prompts/executor/rev1.md +0 -58
- droidrun-0.3.10.dev5/droidrun/agent/executor/prompts.py +0 -142
- droidrun-0.3.10.dev5/droidrun/config_manager/app_card_loader.py +0 -148
- droidrun-0.3.10.dev5/droidrun/config_manager/prompt_loader.py +0 -75
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/.github/workflows/bounty.yml +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/.github/workflows/publish.yml +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/.gitignore +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/.python-version +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/CHANGELOG.md +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/CONTRIBUTING.md +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/LICENSE +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/MANIFEST.in +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/README.md +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/config/app_cards/README.md +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/config/app_cards/app_cards.json +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/config/app_cards/gmail.md +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/.generated-files.txt +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/docs.json +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/favicon.png +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/logo/dark.svg +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/logo/light.svg +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v1/concepts/agent.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v1/concepts/android-control.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v1/concepts/portal-app.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v1/overview.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v1/quickstart.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/concepts/agent.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/concepts/android-control.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/concepts/planning.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/concepts/portal-app.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/concepts/tracing.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/overview.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v2/quickstart.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/concepts/agent.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/concepts/android-tools.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/concepts/models.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/concepts/portal-app.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/cli.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/gemini.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/ollama.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/openailike.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/overview.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/guides/telemetry.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/images/portal_apk.png +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/overview.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/quickstart.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/sdk/adb-tools.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/sdk/base-tools.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/sdk/droid-agent.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/docs/v3/sdk/ios-tools.mdx +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/__main__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/codeact/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/codeact/events.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/common/constants.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/common/events.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/context/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/context/episodic_memory.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/droid/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/executor/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/executor/events.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/manager/events.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/manager/prompts.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/oneflows/app_starter_workflow.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/oneflows/text_manipulator.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/usage.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/async_utils.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/chat_utils.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/device_state_formatter.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/inference.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/message_utils.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/agent/utils/trajectory.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/cli/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/cli/logs.py +4 -4
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/macro/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/macro/__main__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/macro/replay.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/telemetry/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/telemetry/events.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/telemetry/phoenix.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/telemetry/tracker.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/tools/__init__.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/tools/ios.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/droidrun/tools/tools.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/gen-docs-sdk-ref.sh +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/setup.py +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/static/droidrun-dark.png +0 -0
- {droidrun-0.3.10.dev5 → droidrun-0.3.10.dev7}/static/droidrun.png +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: droidrun
|
3
|
-
Version: 0.3.10.
|
3
|
+
Version: 0.3.10.dev7
|
4
4
|
Summary: A framework for controlling Android devices through LLM agents
|
5
5
|
Project-URL: Homepage, https://github.com/droidrun/droidrun
|
6
6
|
Project-URL: Bug Tracker, https://github.com/droidrun/droidrun/issues
|
@@ -29,6 +29,7 @@ Requires-Python: >=3.11
|
|
29
29
|
Requires-Dist: adbutils>=2.10.2
|
30
30
|
Requires-Dist: apkutils==2.0.0
|
31
31
|
Requires-Dist: arize-phoenix>=12.3.0
|
32
|
+
Requires-Dist: httpx>=0.27.0
|
32
33
|
Requires-Dist: llama-index==0.14.4
|
33
34
|
Requires-Dist: posthog>=6.7.6
|
34
35
|
Requires-Dist: pydantic>=2.11.10
|
@@ -36,14 +37,6 @@ Requires-Dist: rich>=14.1.0
|
|
36
37
|
Provides-Extra: anthropic
|
37
38
|
Requires-Dist: anthropic>=0.67.0; extra == 'anthropic'
|
38
39
|
Requires-Dist: llama-index-llms-anthropic<0.9.0,>=0.8.6; extra == 'anthropic'
|
39
|
-
Provides-Extra: backend
|
40
|
-
Requires-Dist: aiohttp>=3.9.0; extra == 'backend'
|
41
|
-
Requires-Dist: fastapi>=0.104.0; extra == 'backend'
|
42
|
-
Requires-Dist: pydantic-settings>=2.0.0; extra == 'backend'
|
43
|
-
Requires-Dist: python-dotenv>=1.0.0; extra == 'backend'
|
44
|
-
Requires-Dist: python-multipart>=0.0.6; extra == 'backend'
|
45
|
-
Requires-Dist: uvicorn[standard]>=0.24.0; extra == 'backend'
|
46
|
-
Requires-Dist: websockets>=12.0; extra == 'backend'
|
47
40
|
Provides-Extra: deepseek
|
48
41
|
Requires-Dist: llama-index-llms-deepseek>=0.2.1; extra == 'deepseek'
|
49
42
|
Provides-Extra: dev
|
@@ -49,7 +49,7 @@ complete(success=True, reason="Successfully navigated to Wi-Fi settings and init
|
|
49
49
|
|
50
50
|
## Tools:
|
51
51
|
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
52
|
-
{tool_descriptions}
|
52
|
+
{{ tool_descriptions }}
|
53
53
|
|
54
54
|
|
55
55
|
## Final Answer Guidelines:
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Android Action Executor
|
2
|
+
|
3
|
+
You are an action executor. Your only job: execute the current subgoal exactly as written.
|
4
|
+
|
5
|
+
## Context
|
6
|
+
|
7
|
+
**User Request:** {{ instruction }}
|
8
|
+
|
9
|
+
{% if app_card %}
|
10
|
+
App card gives information on how to operate the app and perform actions.
|
11
|
+
**App Card:** {{ app_card }}
|
12
|
+
|
13
|
+
{% endif %}
|
14
|
+
{% if device_state %}
|
15
|
+
**Device State:** {{ device_state }}
|
16
|
+
|
17
|
+
{% endif %}
|
18
|
+
**Overall Plan:** {{ plan }}
|
19
|
+
|
20
|
+
**Current Subgoal:** {{ subgoal }}
|
21
|
+
|
22
|
+
**Progress:** {{ progress_status|default("No progress yet.") }}
|
23
|
+
|
24
|
+
**Recent Actions:**
|
25
|
+
{% if action_history %}
|
26
|
+
{% for action in action_history[-5:] %}
|
27
|
+
{% if action.outcome %}
|
28
|
+
- Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Successful
|
29
|
+
{% else %}
|
30
|
+
- Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Failed | Feedback: {{ action.error }}
|
31
|
+
{% endif %}
|
32
|
+
{% endfor %}
|
33
|
+
{% else %}
|
34
|
+
No actions have been taken yet.
|
35
|
+
{% endif %}
|
36
|
+
|
37
|
+
---
|
38
|
+
|
39
|
+
## Your Task
|
40
|
+
|
41
|
+
1. Read the current subgoal
|
42
|
+
2. Identify the action verb (tap, swipe, type, press, open)
|
43
|
+
3. Identify the target (button name, text, coordinates)
|
44
|
+
4. Execute that exact action
|
45
|
+
|
46
|
+
**Do not:**
|
47
|
+
- Answer questions
|
48
|
+
- Make decisions about what to do next
|
49
|
+
- Optimize or substitute actions
|
50
|
+
- Repeat failed actions more than once
|
51
|
+
|
52
|
+
---
|
53
|
+
|
54
|
+
## Action Reference
|
55
|
+
|
56
|
+
### Available Actions
|
57
|
+
{% for action_name, action_info in atomic_actions.items() %}
|
58
|
+
- {{ action_name }}({{ action_info.arguments|join(', ') }}): {{ action_info.description }}
|
59
|
+
{% endfor %}
|
60
|
+
|
61
|
+
### Key Rules
|
62
|
+
- Close popups (permission requests) before proceeding
|
63
|
+
- Always activate input box (click it) before typing
|
64
|
+
- Use `open_app` to launch apps, not the app drawer
|
65
|
+
- Try different swipe directions if content doesn't change
|
66
|
+
|
67
|
+
---
|
68
|
+
|
69
|
+
## Output Format
|
70
|
+
|
71
|
+
### Thought ###
|
72
|
+
What action and target does the subgoal specify?
|
73
|
+
|
74
|
+
### Action ###
|
75
|
+
{"action": "action_name", "argument": "value"}
|
76
|
+
|
77
|
+
### Description ###
|
78
|
+
One sentence describing the action you're taking.
|
@@ -1,13 +1,24 @@
|
|
1
1
|
You are a LOW-LEVEL ACTION EXECUTOR for an Android phone. You do NOT answer questions or provide results. You ONLY perform individual atomic actions as specified in the current subgoal. You are part of a larger system - your job is to execute actions, not to think about or answer the user's original question.
|
2
2
|
|
3
3
|
### User Request ###
|
4
|
-
{instruction}
|
4
|
+
{{ instruction }}
|
5
5
|
|
6
|
-
{
|
7
|
-
|
6
|
+
{% if app_card %}
|
7
|
+
App card gives information on how to operate the app and perform actions.
|
8
|
+
### App Card ###
|
9
|
+
{{ app_card }}
|
10
|
+
|
11
|
+
{% endif %}
|
12
|
+
{% if device_state %}
|
13
|
+
### Device State ###
|
14
|
+
{{ device_state }}
|
15
|
+
|
16
|
+
{% endif %}
|
17
|
+
### Overall Plan ###
|
18
|
+
{{ plan }}
|
8
19
|
|
9
20
|
### Current Subgoal ###
|
10
|
-
EXECUTE THIS SUBGOAL: {subgoal}
|
21
|
+
EXECUTE THIS SUBGOAL: {{ subgoal }}
|
11
22
|
|
12
23
|
EXECUTION MODE: You are a dumb robot. Find the exact text/element mentioned in the subgoal above and perform the specified action on it. Do not read anything below this line until after you execute the subgoal.
|
13
24
|
|
@@ -25,7 +36,7 @@ Convert directly to atomic action:
|
|
25
36
|
Execute the atomic action for the exact target mentioned. Ignore everything else.
|
26
37
|
|
27
38
|
### Progress Status ###
|
28
|
-
{progress_status}
|
39
|
+
{{ progress_status|default("No progress yet.") }}
|
29
40
|
|
30
41
|
### Guidelines ###
|
31
42
|
General:
|
@@ -47,11 +58,25 @@ Execute the current subgoal mechanically. Do NOT examine the screen content or m
|
|
47
58
|
|
48
59
|
#### Atomic Actions ####
|
49
60
|
The atomic action functions are listed in the format of `action(arguments): description` as follows:
|
50
|
-
{atomic_actions}
|
61
|
+
{% for action_name, action_info in atomic_actions.items() %}
|
62
|
+
- {{ action_name }}({{ action_info.arguments|join(', ') }}): {{ action_info.description }}
|
63
|
+
{% endfor %}
|
51
64
|
|
52
65
|
### Latest Action History ###
|
53
|
-
{action_history}
|
54
|
-
|
66
|
+
{% if action_history %}
|
67
|
+
Recent actions you took previously and whether they were successful:
|
68
|
+
{% for action in action_history[-5:] %}
|
69
|
+
{% if action.outcome %}
|
70
|
+
Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Successful
|
71
|
+
{% else %}
|
72
|
+
Action: {{ action.action }} | Description: {{ action.summary }} | Outcome: Failed | Feedback: {{ action.error }}
|
73
|
+
{% endif %}
|
74
|
+
{% endfor %}
|
75
|
+
|
76
|
+
{% else %}
|
77
|
+
No actions have been taken yet.
|
78
|
+
|
79
|
+
{% endif %}
|
55
80
|
---
|
56
81
|
### LITERAL EXECUTION RULE ###
|
57
82
|
Whatever the current subgoal says to do, do that EXACTLY. Do not substitute with what you think is better. Do not optimize. Do not consider screen state. Parse the subgoal text literally and execute the matching atomic action.
|
@@ -3,12 +3,46 @@
|
|
3
3
|
You operate an Android phone by creating high-level plans to fulfill user requests.
|
4
4
|
|
5
5
|
## User Request
|
6
|
-
{instruction}
|
6
|
+
{{ instruction }}
|
7
7
|
|
8
8
|
## Current Context
|
9
|
-
{device_date}
|
10
|
-
|
11
|
-
{
|
9
|
+
{% if device_date %}
|
10
|
+
<device_date>
|
11
|
+
{{ device_date }}
|
12
|
+
</device_date>
|
13
|
+
|
14
|
+
{% endif %}
|
15
|
+
{% if app_card %}
|
16
|
+
App card gives information on how to operate the app and perform actions.
|
17
|
+
<app_card>
|
18
|
+
{{ app_card }}
|
19
|
+
</app_card>
|
20
|
+
|
21
|
+
{% endif %}
|
22
|
+
{% if important_notes %}
|
23
|
+
<important_notes>
|
24
|
+
{{ important_notes }}
|
25
|
+
</important_notes>
|
26
|
+
|
27
|
+
{% endif %}
|
28
|
+
{% if error_history %}
|
29
|
+
<potentially_stuck>
|
30
|
+
You have encountered several failed attempts. Here are some logs:
|
31
|
+
{% for error in error_history %}
|
32
|
+
- Attempt: Action: {{ error.action }} | Description: {{ error.summary }} | Outcome: Failed | Feedback: {{ error.error }}
|
33
|
+
{% endfor %}
|
34
|
+
</potentially_stuck>
|
35
|
+
|
36
|
+
{% endif %}
|
37
|
+
{% if custom_tools_descriptions %}
|
38
|
+
|
39
|
+
<custom_actions>
|
40
|
+
The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
|
41
|
+
{{ custom_tools_descriptions }}
|
42
|
+
|
43
|
+
You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
|
44
|
+
</custom_actions>
|
45
|
+
{% endif %}
|
12
46
|
|
13
47
|
---
|
14
48
|
|
@@ -28,7 +62,17 @@ You operate an Android phone by creating high-level plans to fulfill user reques
|
|
28
62
|
- Use memory instead of clipboard unless specifically requested
|
29
63
|
|
30
64
|
**Text Operations:**
|
31
|
-
{
|
65
|
+
{% if text_manipulation_enabled %}
|
66
|
+
|
67
|
+
<text_manipulation>
|
68
|
+
1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
|
69
|
+
2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
|
70
|
+
3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
|
71
|
+
4. The focused text field contains editable text that you can modify
|
72
|
+
5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
|
73
|
+
6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
|
74
|
+
</text_manipulation>
|
75
|
+
{% endif %}
|
32
76
|
|
33
77
|
---
|
34
78
|
|
@@ -68,4 +112,4 @@ Example: "At step 5, I obtained recipe from recipes.jpg: Chicken Pasta - ingredi
|
|
68
112
|
|
69
113
|
<request_accomplished>
|
70
114
|
Use ONLY when request is fully completed through concrete actions. Include confirmation message of what was accomplished.
|
71
|
-
</request_accomplished>
|
115
|
+
</request_accomplished>
|
@@ -1,11 +1,37 @@
|
|
1
1
|
You are an agent who can operate an Android phone on behalf of a user. Your goal is to track progress and devise high-level plans to achieve the user's requests.
|
2
2
|
|
3
3
|
<user_request>
|
4
|
-
{instruction}
|
4
|
+
{{ instruction }}
|
5
5
|
</user_request>
|
6
6
|
|
7
|
-
{device_date}
|
7
|
+
{% if device_date %}
|
8
|
+
<device_date>
|
9
|
+
{{ device_date }}
|
10
|
+
</device_date>
|
8
11
|
|
12
|
+
{% endif %}
|
13
|
+
{% if app_card %}
|
14
|
+
App card gives information on how to operate the app and perform actions.
|
15
|
+
<app_card>
|
16
|
+
{{ app_card }}
|
17
|
+
</app_card>
|
18
|
+
|
19
|
+
{% endif %}
|
20
|
+
{% if important_notes %}
|
21
|
+
<important_notes>
|
22
|
+
{{ important_notes }}
|
23
|
+
</important_notes>
|
24
|
+
|
25
|
+
{% endif %}
|
26
|
+
{% if error_history %}
|
27
|
+
<potentially_stuck>
|
28
|
+
You have encountered several failed attempts. Here are some logs:
|
29
|
+
{% for error in error_history %}
|
30
|
+
- Attempt: Action: {{ error.action }} | Description: {{ error.summary }} | Outcome: Failed | Feedback: {{ error.error }}
|
31
|
+
{% endfor %}
|
32
|
+
</potentially_stuck>
|
33
|
+
|
34
|
+
{% endif %}
|
9
35
|
<guidelines>
|
10
36
|
The following guidelines will help you plan this request.
|
11
37
|
General:
|
@@ -17,7 +43,17 @@ General:
|
|
17
43
|
6. Make sure names and titles are not cutoff. If the request is to check who sent a message, make sure to check the message sender's full name not just what appears in the notification because it might be cut off.
|
18
44
|
7. Dates and file names must match the user query exactly.
|
19
45
|
8. Don't do more than what the user asks for.
|
20
|
-
{
|
46
|
+
{% if text_manipulation_enabled %}
|
47
|
+
|
48
|
+
<text_manipulation>
|
49
|
+
1. Use **TEXT_TASK:** prefix in your plan when you need to modify text in the currently focused text input field
|
50
|
+
2. TEXT_TASK is for editing, formatting, or transforming existing text content in text boxes using Python code
|
51
|
+
3. Do not use TEXT_TASK for extracting text from messages, typing new text, or composing messages
|
52
|
+
4. The focused text field contains editable text that you can modify
|
53
|
+
5. Example plan item: 'TEXT_TASK: Add "Hello World" at the beginning of the text'
|
54
|
+
6. Always use TEXT_TASK for modifying text, do not try to select the text to copy/cut/paste or adjust the text
|
55
|
+
</text_manipulation>
|
56
|
+
{% endif %}
|
21
57
|
|
22
58
|
Memory Usage:
|
23
59
|
- Always include step context: "At step [number], I obtained [actual content] from [source]"
|
@@ -27,7 +63,16 @@ Memory Usage:
|
|
27
63
|
- Update memory to track progress on multi-step tasks
|
28
64
|
|
29
65
|
</guidelines>
|
30
|
-
{custom_tools_descriptions}
|
66
|
+
{% if custom_tools_descriptions %}
|
67
|
+
|
68
|
+
<custom_actions>
|
69
|
+
The executor has access to these additional custom actions beyond the standard actions (click, type, swipe, etc.):
|
70
|
+
{{ custom_tools_descriptions }}
|
71
|
+
|
72
|
+
You can reference these custom actions or tell the Executer agent to use them in your plan when they help achieve the user's goal.
|
73
|
+
</custom_actions>
|
74
|
+
{% endif %}
|
75
|
+
|
31
76
|
---
|
32
77
|
Carefully assess the current status and the provided screenshot. Check if the current plan needs to be revised.
|
33
78
|
Determine if the user request has been fully completed. If you are confident that no further actions are required, use the request_accomplished tag with a message in it. If the user request is not finished, update the plan and don't use it. If you are stuck with errors, think step by step about whether the overall plan needs to be revised to address the error.
|
@@ -19,30 +19,38 @@ agent:
|
|
19
19
|
# Enable vision capabilities (screenshots)
|
20
20
|
vision: false
|
21
21
|
# System prompt filename (located in prompts_dir/codeact/)
|
22
|
-
system_prompt: system.
|
22
|
+
system_prompt: system.jinja2
|
23
23
|
# User prompt filename (located in prompts_dir/codeact/)
|
24
|
-
user_prompt: user.
|
24
|
+
user_prompt: user.jinja2
|
25
25
|
|
26
26
|
# Manager Agent Configuration
|
27
27
|
manager:
|
28
28
|
# Enable vision capabilities (screenshots)
|
29
29
|
vision: false
|
30
30
|
# System prompt filename (located in prompts_dir/manager/)
|
31
|
-
system_prompt: system.
|
31
|
+
system_prompt: system.jinja2
|
32
32
|
|
33
33
|
# Executor Agent Configuration
|
34
34
|
executor:
|
35
35
|
# Enable vision capabilities (screenshots)
|
36
36
|
vision: false
|
37
37
|
# System prompt filename (located in prompts_dir/executor/)
|
38
|
-
system_prompt: system.
|
38
|
+
system_prompt: system.jinja2
|
39
39
|
|
40
40
|
# App Cards Configuration
|
41
41
|
app_cards:
|
42
42
|
# Enable app-specific instruction cards
|
43
43
|
enabled: true
|
44
|
-
#
|
44
|
+
# Mode: local (file-based), server (HTTP API), or composite (server with local fallback)
|
45
|
+
mode: local
|
46
|
+
# Directory containing app card files (for local/composite modes)
|
45
47
|
app_cards_dir: config/app_cards
|
48
|
+
# Server URL for remote app cards (for server/composite modes)
|
49
|
+
server_url: null
|
50
|
+
# Server request timeout in seconds
|
51
|
+
server_timeout: 10.0
|
52
|
+
# Number of server retry attempts
|
53
|
+
server_max_retries: 2
|
46
54
|
|
47
55
|
# === LLM Profiles ===
|
48
56
|
# Define LLM configurations for each agent type
|
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import re
|
5
5
|
import time
|
6
|
-
from typing import
|
6
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
7
7
|
|
8
8
|
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
9
9
|
from llama_index.core.llms.llm import LLM
|
@@ -23,9 +23,8 @@ from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
|
23
23
|
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
24
24
|
from droidrun.agent.usage import get_usage_from_response
|
25
25
|
from droidrun.agent.utils import chat_utils
|
26
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor, ExecuterState
|
27
26
|
from droidrun.agent.utils.device_state_formatter import format_device_state
|
28
|
-
|
27
|
+
from droidrun.agent.utils.executer import ExecuterState, SimpleCodeExecutor
|
29
28
|
from droidrun.agent.utils.tools import (
|
30
29
|
ATOMIC_ACTION_SIGNATURES,
|
31
30
|
build_custom_tool_descriptions,
|
@@ -85,15 +84,8 @@ class CodeActAgent(Workflow):
|
|
85
84
|
self.tool_list = {}
|
86
85
|
for action_name, signature in merged_signatures.items():
|
87
86
|
func = signature["function"]
|
88
|
-
|
89
|
-
|
90
|
-
def make_bound(f, ti):
|
91
|
-
async def bound_func(*args, **kwargs):
|
92
|
-
return await f(ti, *args, **kwargs)
|
93
|
-
return bound_func
|
94
|
-
self.tool_list[action_name] = make_bound(func, tools_instance)
|
95
|
-
else:
|
96
|
-
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
|
87
|
+
|
88
|
+
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance, **kwargs: f(ti, *args, **kwargs)
|
97
89
|
|
98
90
|
self.tool_list["remember"] = tools_instance.remember
|
99
91
|
self.tool_list["complete"] = tools_instance.complete
|
@@ -113,13 +105,10 @@ class CodeActAgent(Workflow):
|
|
113
105
|
)
|
114
106
|
self.system_prompt = ChatMessage(role="system", content=system_prompt_text)
|
115
107
|
|
116
|
-
self.user_prompt_template = PromptLoader.load_prompt(agent_config.get_codeact_user_prompt_path())
|
117
|
-
|
118
108
|
self.executor = SimpleCodeExecutor(
|
119
109
|
loop=asyncio.get_event_loop(),
|
120
110
|
locals={},
|
121
111
|
tools=self.tool_list,
|
122
|
-
tools_instance=tools_instance,
|
123
112
|
globals={"__builtins__": __builtins__},
|
124
113
|
)
|
125
114
|
|
@@ -293,27 +282,30 @@ Now, describe the next step you will take to address the original goal: {goal}""
|
|
293
282
|
try:
|
294
283
|
self.code_exec_counter += 1
|
295
284
|
result = await self.executor.execute(ExecuterState(ui_state=ctx.store.get("ui_state", None)), code)
|
296
|
-
logger.info(f"💡 Code execution successful. Result: {result
|
285
|
+
logger.info(f"💡 Code execution successful. Result: {result}")
|
297
286
|
await asyncio.sleep(self.agent_config.after_sleep_action)
|
298
|
-
screenshots = result['screenshots']
|
299
|
-
for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
|
300
|
-
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
301
|
-
|
302
|
-
ui_states = result['ui_states']
|
303
|
-
for ui_state in ui_states[:-1]:
|
304
|
-
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
|
305
287
|
|
288
|
+
# Check if complete() was called
|
306
289
|
if self.tools.finished:
|
307
|
-
logger.
|
308
|
-
|
309
|
-
|
310
|
-
|
290
|
+
logger.info("✅ Task marked as complete via complete() function")
|
291
|
+
|
292
|
+
# Validate completion state
|
293
|
+
success = self.tools.success if self.tools.success is not None else False
|
294
|
+
reason = self.tools.reason if self.tools.reason else "Task completed without reason"
|
295
|
+
|
296
|
+
# Reset finished flag for next execution
|
297
|
+
self.tools.finished = False
|
298
|
+
|
299
|
+
logger.info(f" - Success: {success}")
|
300
|
+
logger.info(f" - Reason: {reason}")
|
301
|
+
|
302
|
+
event = TaskEndEvent(success=success, reason=reason)
|
311
303
|
ctx.write_event_to_stream(event)
|
312
304
|
return event
|
313
305
|
|
314
306
|
self.remembered_info = self.tools.memory
|
315
307
|
|
316
|
-
event = TaskExecutionResultEvent(output=str(result
|
308
|
+
event = TaskExecutionResultEvent(output=str(result))
|
317
309
|
ctx.write_event_to_stream(event)
|
318
310
|
return event
|
319
311
|
|
@@ -496,7 +488,7 @@ Now, describe the next step you will take to address the original goal: {goal}""
|
|
496
488
|
try:
|
497
489
|
state = self.tools.get_state()
|
498
490
|
a11y_tree = state.get("a11y_tree", "")
|
499
|
-
phone_state = state.get("phone_state", "")
|
491
|
+
phone_state = state.get("phone_state", "") # noqa: F841
|
500
492
|
except Exception as e:
|
501
493
|
raise Exception(f"Failed to capture final UI state: {e}") from e
|
502
494
|
|
@@ -32,7 +32,7 @@ from droidrun.agent.droid.events import (
|
|
32
32
|
)
|
33
33
|
from droidrun.agent.executor import ExecutorAgent
|
34
34
|
from droidrun.agent.manager import ManagerAgent
|
35
|
-
from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES
|
35
|
+
from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, open_app
|
36
36
|
from droidrun.agent.utils.trajectory import Trajectory
|
37
37
|
from droidrun.config_manager.config_manager import (
|
38
38
|
AgentConfig,
|
@@ -43,8 +43,6 @@ from droidrun.config_manager.config_manager import (
|
|
43
43
|
ToolsConfig,
|
44
44
|
TracingConfig,
|
45
45
|
)
|
46
|
-
|
47
|
-
from droidrun.agent.utils.tools import open_app
|
48
46
|
from droidrun.telemetry import (
|
49
47
|
DroidAgentFinalizeEvent,
|
50
48
|
DroidAgentInitEvent,
|
@@ -10,10 +10,11 @@ For internal events with full debugging metadata, see:
|
|
10
10
|
- codeact/events.py (Task*, EpisodicMemoryEvent)
|
11
11
|
"""
|
12
12
|
|
13
|
+
import asyncio
|
13
14
|
from typing import Dict, List
|
14
15
|
|
15
16
|
from llama_index.core.workflow import Event
|
16
|
-
from pydantic import BaseModel, Field
|
17
|
+
from pydantic import BaseModel, ConfigDict, Field
|
17
18
|
|
18
19
|
from droidrun.agent.context import Task
|
19
20
|
|
@@ -46,10 +47,12 @@ class DroidAgentState(BaseModel):
|
|
46
47
|
"""
|
47
48
|
State model for DroidAgent workflow - shared across parent and child workflows.
|
48
49
|
"""
|
49
|
-
|
50
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
50
51
|
# Task context
|
51
52
|
instruction: str = ""
|
52
|
-
|
53
|
+
# App Cards
|
54
|
+
app_card: str = ""
|
55
|
+
app_card_loading_task: asyncio.Task[str] | None = None
|
53
56
|
# Formatted device state for prompts (complete text)
|
54
57
|
formatted_device_state: str = ""
|
55
58
|
|
@@ -90,52 +90,38 @@ class ExecutorAgent(Workflow): # TODO: Fix a bug in bad prompt
|
|
90
90
|
subgoal = ev.get("subgoal", "")
|
91
91
|
logger.info(f"🧠 Executor thinking about action for: {subgoal}")
|
92
92
|
|
93
|
-
#
|
94
|
-
|
95
|
-
app_card_text = ""
|
96
|
-
if app_card.strip():
|
97
|
-
app_card_text = "App card gives information on how to operate the app and perform actions.\n### App Card ###\n" + app_card.strip() + "\n\n"
|
98
|
-
|
99
|
-
# Format device state (use unified state)
|
100
|
-
device_state_text = ""
|
101
|
-
if self.shared_state.formatted_device_state and self.shared_state.formatted_device_state.strip():
|
102
|
-
device_state_text = "### Device State ###\n" + self.shared_state.formatted_device_state.strip() + "\n\n"
|
103
|
-
|
104
|
-
# Format progress status
|
105
|
-
progress_status_text = self.shared_state.progress_status + "\n\n" if self.shared_state.progress_status else "No progress yet.\n\n"
|
106
|
-
|
107
|
-
# Format atomic actions
|
108
|
-
atomic_actions_text = chr(10).join(
|
109
|
-
f"- {action_name}({', '.join(action_info['arguments'])}): {action_info['description']}"
|
110
|
-
for action_name, action_info in ATOMIC_ACTION_SIGNATURES.items()
|
111
|
-
) + "\n"
|
112
|
-
|
113
|
-
# Format action history
|
93
|
+
# Prepare action history as structured data (last 5 actions)
|
94
|
+
action_history = []
|
114
95
|
if self.shared_state.action_history:
|
115
|
-
|
116
|
-
|
117
|
-
|
96
|
+
n = min(5, len(self.shared_state.action_history))
|
97
|
+
action_history = [
|
98
|
+
{
|
99
|
+
"action": act,
|
100
|
+
"summary": summ,
|
101
|
+
"outcome": outcome,
|
102
|
+
"error": err_des
|
103
|
+
}
|
118
104
|
for act, summ, outcome, err_des in zip(
|
119
|
-
self.shared_state.action_history[-
|
120
|
-
self.shared_state.summary_history[-
|
121
|
-
self.shared_state.action_outcomes[-
|
122
|
-
self.shared_state.error_descriptions[-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
#
|
105
|
+
self.shared_state.action_history[-n:],
|
106
|
+
self.shared_state.summary_history[-n:],
|
107
|
+
self.shared_state.action_outcomes[-n:],
|
108
|
+
self.shared_state.error_descriptions[-n:],
|
109
|
+
strict=True
|
110
|
+
)
|
111
|
+
]
|
112
|
+
|
113
|
+
# Let Jinja2 handle all formatting
|
128
114
|
system_prompt = PromptLoader.load_prompt(
|
129
115
|
self.agent_config.get_executor_system_prompt_path(),
|
130
116
|
{
|
131
117
|
"instruction": self.shared_state.instruction,
|
132
|
-
"app_card":
|
133
|
-
"
|
118
|
+
"app_card": "", # TODO: Implement app card loader
|
119
|
+
"device_state": self.shared_state.formatted_device_state,
|
134
120
|
"plan": self.shared_state.plan,
|
135
121
|
"subgoal": subgoal,
|
136
|
-
"progress_status":
|
137
|
-
"atomic_actions":
|
138
|
-
"action_history":
|
122
|
+
"progress_status": self.shared_state.progress_status,
|
123
|
+
"atomic_actions": ATOMIC_ACTION_SIGNATURES,
|
124
|
+
"action_history": action_history
|
139
125
|
}
|
140
126
|
)
|
141
127
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""
|
2
|
+
Prompts for the ExecutorAgent.
|
3
|
+
"""
|
4
|
+
|
5
|
+
def parse_executor_response(response: str) -> dict:
|
6
|
+
"""
|
7
|
+
Parse the Executor LLM response.
|
8
|
+
|
9
|
+
Extracts:
|
10
|
+
- thought: Content between "### Thought" and "### Action"
|
11
|
+
- action: Content between "### Action" and "### Description"
|
12
|
+
- description: Content after "### Description"
|
13
|
+
|
14
|
+
Args:
|
15
|
+
response: Raw LLM response string
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Dictionary with 'thought', 'action', 'description' keys
|
19
|
+
"""
|
20
|
+
thought = response.split("### Thought")[-1].split("### Action")[0].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
|
21
|
+
action_raw = response.split("### Action")[-1].split("### Description")[0].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
|
22
|
+
start_idx = action_raw.find('{')
|
23
|
+
end_idx = action_raw.rfind('}')
|
24
|
+
if start_idx != -1 and end_idx != -1:
|
25
|
+
action = action_raw[start_idx:end_idx + 1]
|
26
|
+
else:
|
27
|
+
action = action_raw
|
28
|
+
description = response.split("### Description")[-1].replace("\n", " ").replace(" ", " ").replace("###", "").strip()
|
29
|
+
|
30
|
+
return {
|
31
|
+
"thought": thought,
|
32
|
+
"action": action,
|
33
|
+
"description": description
|
34
|
+
}
|