droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. droidrun/__init__.py +22 -10
  2. droidrun/__main__.py +1 -2
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +2 -2
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +5 -15
  7. droidrun/agent/codeact/__init__.py +11 -0
  8. droidrun/agent/codeact/codeact_agent.py +420 -0
  9. droidrun/agent/codeact/events.py +28 -0
  10. droidrun/agent/codeact/prompts.py +26 -0
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +13 -0
  26. droidrun/agent/droid/droid_agent.py +357 -0
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +13 -0
  30. droidrun/agent/planner/events.py +16 -0
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +124 -0
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +17 -0
  35. droidrun/agent/utils/chat_utils.py +312 -0
  36. droidrun/agent/utils/executer.py +132 -0
  37. droidrun/agent/utils/llm_picker.py +147 -0
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +358 -149
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +4 -30
  44. droidrun/tools/adb.py +879 -0
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/llm_reasoning.py +0 -567
  50. droidrun/agent/react_agent.py +0 -556
  51. droidrun/llm/__init__.py +0 -24
  52. droidrun/tools/actions.py +0 -854
  53. droidrun/tools/device.py +0 -29
  54. droidrun-0.1.0.dist-info/METADATA +0 -276
  55. droidrun-0.1.0.dist-info/RECORD +0 -20
  56. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  57. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  58. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,268 @@
1
+ from droidrun.agent.planner.events import *
2
+ from droidrun.agent.planner.prompts import (
3
+ DEFAULT_PLANNER_SYSTEM_PROMPT,
4
+ DEFAULT_PLANNER_USER_PROMPT,
5
+ )
6
+ import logging
7
+ import asyncio
8
+ from typing import List, TYPE_CHECKING, Union
9
+ import inspect
10
+ from llama_index.core.base.llms.types import ChatMessage, ChatResponse
11
+ from llama_index.core.prompts import PromptTemplate
12
+ from llama_index.core.llms.llm import LLM
13
+ from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
14
+ from llama_index.core.memory import Memory
15
+ from llama_index.core.llms.llm import LLM
16
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
17
+ from droidrun.agent.utils import chat_utils
18
+ from droidrun.agent.context.task_manager import TaskManager
19
+ from droidrun.tools import Tools
20
+ from droidrun.agent.common.events import ScreenshotEvent
21
+ from droidrun.agent.planner.events import (
22
+ PlanInputEvent,
23
+ PlanCreatedEvent,
24
+ PlanThinkingEvent,
25
+ )
26
+ from droidrun.agent.context.agent_persona import AgentPersona
27
+ from droidrun.agent.context.reflection import Reflection
28
+
29
+ from dotenv import load_dotenv
30
+
31
+ load_dotenv()
32
+
33
+ # Setup logger
34
+ logger = logging.getLogger("droidrun")
35
+
36
+ if TYPE_CHECKING:
37
+ from droidrun.tools import Tools
38
+
39
+
40
+ class PlannerAgent(Workflow):
41
+ def __init__(
42
+ self,
43
+ goal: str,
44
+ llm: LLM,
45
+ personas: List[AgentPersona],
46
+ task_manager: TaskManager,
47
+ tools_instance: Tools,
48
+ system_prompt=None,
49
+ user_prompt=None,
50
+ debug=False,
51
+ *args,
52
+ **kwargs,
53
+ ) -> None:
54
+ super().__init__(*args, **kwargs)
55
+
56
+ self.llm = llm
57
+ self.goal = goal
58
+ self.task_manager = task_manager
59
+ self.debug = debug
60
+
61
+ self.chat_memory = None
62
+ self.remembered_info = None
63
+ self.reflection: Reflection = None
64
+
65
+ self.current_retry = 0
66
+ self.steps_counter = 0
67
+
68
+ self.tool_list = {}
69
+ self.tool_list[self.task_manager.set_tasks_with_agents.__name__] = (
70
+ self.task_manager.set_tasks_with_agents
71
+ )
72
+ self.tool_list[self.task_manager.complete_goal.__name__] = (
73
+ self.task_manager.complete_goal
74
+ )
75
+
76
+ self.tools_description = chat_utils.parse_tool_descriptions(self.tool_list)
77
+ self.tools_instance = tools_instance
78
+
79
+ self.personas = personas
80
+
81
+ self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(
82
+ tools_description=self.tools_description,
83
+ agents=chat_utils.parse_persona_description(self.personas),
84
+ )
85
+ self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
86
+ self.system_message = ChatMessage(role="system", content=self.system_prompt)
87
+ self.user_message = ChatMessage(role="user", content=self.user_prompt)
88
+
89
+ self.executer = SimpleCodeExecutor(
90
+ loop=asyncio.get_event_loop(), globals={}, locals={}, tools=self.tool_list
91
+ )
92
+
93
+ @step
94
+ async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
95
+ logger.info("💬 Preparing planning session...")
96
+
97
+ self.chat_memory: Memory = await ctx.get(
98
+ "chat_memory", default=Memory.from_defaults()
99
+ )
100
+ await self.chat_memory.aput(self.user_message)
101
+
102
+ if ev.remembered_info:
103
+ self.remembered_info = ev.remembered_info
104
+
105
+ if ev.reflection:
106
+ self.reflection = ev.reflection
107
+ else:
108
+ self.reflection = None
109
+
110
+ assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
111
+
112
+ await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
113
+
114
+ input_messages = self.chat_memory.get_all()
115
+ logger.debug(f" - Memory contains {len(input_messages)} messages")
116
+ return PlanInputEvent(input=input_messages)
117
+
118
+ @step
119
+ async def handle_llm_input(
120
+ self, ev: PlanInputEvent, ctx: Context
121
+ ) -> PlanThinkingEvent:
122
+ """Handle LLM input."""
123
+ chat_history = ev.input
124
+ assert len(chat_history) > 0, "Chat history cannot be empty."
125
+
126
+ ctx.write_event_to_stream(ev)
127
+
128
+ self.steps_counter += 1
129
+ logger.info(f"🧠 Thinking about how to plan the goal...")
130
+
131
+ screenshot = (await self.tools_instance.take_screenshot())[1]
132
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
133
+ await ctx.set("screenshot", screenshot)
134
+
135
+ await ctx.set("ui_state", await self.tools_instance.get_clickables())
136
+ await ctx.set("phone_state", await self.tools_instance.get_phone_state())
137
+ await ctx.set("remembered_info", self.remembered_info)
138
+ await ctx.set("reflection", self.reflection)
139
+
140
+ response = await self._get_llm_response(ctx, chat_history)
141
+ await self.chat_memory.aput(response.message)
142
+
143
+ code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
144
+
145
+ event = PlanThinkingEvent(thoughts=thoughts, code=code)
146
+ ctx.write_event_to_stream(event)
147
+ return event
148
+
149
+ @step
150
+ async def handle_llm_output(
151
+ self, ev: PlanThinkingEvent, ctx: Context
152
+ ) -> Union[PlanInputEvent, PlanCreatedEvent]:
153
+ """Handle LLM output."""
154
+ logger.debug("🤖 Processing planning output...")
155
+ code = ev.code
156
+ thoughts = ev.thoughts
157
+
158
+ if code:
159
+ try:
160
+ result = await self.executer.execute(ctx, code)
161
+ logger.info(f"📝 Planning complete")
162
+ logger.debug(f" - Planning code executed. Result: {result}")
163
+
164
+ await self.chat_memory.aput(
165
+ ChatMessage(
166
+ role="user", content=f"Execution Result:\n```\n{result}\n```"
167
+ )
168
+ )
169
+
170
+ self.remembered_info = self.tools_instance.memory
171
+
172
+ tasks = self.task_manager.get_all_tasks()
173
+ event = PlanCreatedEvent(tasks=tasks)
174
+
175
+ if not self.task_manager.goal_completed:
176
+ logger.info(f"📋 Current plan created with {len(tasks)} tasks:")
177
+ for i, task in enumerate(tasks):
178
+ logger.info(
179
+ f" Task {i}: [{task.status.upper()}] [{task.agent_type}] {task.description}"
180
+ )
181
+ ctx.write_event_to_stream(event)
182
+
183
+ return event
184
+
185
+ except Exception as e:
186
+ logger.debug(f"error handling Planner: {e}")
187
+ await self.chat_memory.aput(
188
+ ChatMessage(
189
+ role="user",
190
+ content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
191
+ )
192
+ )
193
+ logger.debug("🔄 Waiting for next plan or completion.")
194
+ return PlanInputEvent(input=self.chat_memory.get_all())
195
+ else:
196
+ await self.chat_memory.aput(
197
+ ChatMessage(
198
+ role="user",
199
+ content=f"Please either set new tasks using set_tasks_with_agents() or mark the goal as complete using complete_goal() if done.",
200
+ )
201
+ )
202
+ logger.debug("🔄 Waiting for next plan or completion.")
203
+ return PlanInputEvent(input=self.chat_memory.get_all())
204
+
205
+ @step
206
+ async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
207
+ """Finalize the workflow."""
208
+ await ctx.set("chat_memory", self.chat_memory)
209
+
210
+ result = {}
211
+ result.update(
212
+ {
213
+ "tasks": ev.tasks,
214
+ }
215
+ )
216
+
217
+ return StopEvent(result=result)
218
+
219
+ async def _get_llm_response(
220
+ self, ctx: Context, chat_history: List[ChatMessage]
221
+ ) -> ChatResponse:
222
+ """Get streaming response from LLM."""
223
+ try:
224
+ logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
225
+
226
+ model = self.llm.class_name()
227
+ if model != "DeepSeek":
228
+ chat_history = await chat_utils.add_screenshot_image_block(
229
+ await ctx.get("screenshot"), chat_history
230
+ )
231
+ else:
232
+ logger.warning(
233
+ "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
234
+ )
235
+
236
+ chat_history = await chat_utils.add_task_history_block(
237
+ self.task_manager.get_completed_tasks(),
238
+ self.task_manager.get_failed_tasks(),
239
+ chat_history,
240
+ )
241
+
242
+ remembered_info = await ctx.get("remembered_info", default=None)
243
+ if remembered_info:
244
+ chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
245
+
246
+ reflection = await ctx.get("reflection", None)
247
+ if reflection:
248
+ chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
249
+
250
+ chat_history = await chat_utils.add_phone_state_block(await ctx.get("phone_state"), chat_history)
251
+ chat_history = await chat_utils.add_ui_text_block(await ctx.get("ui_state"), chat_history)
252
+
253
+ messages_to_send = [self.system_message] + chat_history
254
+ messages_to_send = [
255
+ chat_utils.message_copy(msg) for msg in messages_to_send
256
+ ]
257
+
258
+ logger.debug(f" - Final message count: {len(messages_to_send)}")
259
+
260
+ response = await self.llm.achat(messages=messages_to_send)
261
+ assert hasattr(
262
+ response, "message"
263
+ ), f"LLM response does not have a message attribute.\nResponse: {response}"
264
+ logger.debug(" - Received response from LLM.")
265
+ return response
266
+ except Exception as e:
267
+ logger.error(f"Could not get an answer from LLM: {repr(e)}")
268
+ raise e
@@ -0,0 +1,124 @@
1
+ """
2
+ Prompt templates for the PlannerAgent.
3
+
4
+ This module contains all the prompts used by the PlannerAgent,
5
+ separated from the workflow logic for better maintainability.
6
+ """
7
+
8
+ # System prompt for the PlannerAgent that explains its role and capabilities
9
+ DEFAULT_PLANNER_SYSTEM_PROMPT = """You are an Android Task Planner. Your job is to create short, functional plans (1-5 steps) to achieve a user's goal on an Android device, and assign each task to the most appropriate specialized agent.
10
+
11
+ **Inputs You Receive:**
12
+ 1. **User's Overall Goal.**
13
+ 2. **Current Device State:**
14
+ * A **screenshot** of the current screen.
15
+ * **JSON data** of visible UI elements.
16
+ * The current visible Android activity
17
+ 3. **Complete Task History:**
18
+ * A record of ALL tasks that have been completed or failed throughout the session.
19
+ * For completed tasks, the results and any discovered information.
20
+ * For failed tasks, the detailed reasons for failure.
21
+ * This history persists across all planning cycles and is never lost, even when creating new tasks.
22
+
23
+ **Available Specialized Agents:**
24
+ You have access to specialized agents, each optimized for specific types of tasks:
25
+ {agents}
26
+
27
+ **Your Task:**
28
+ Given the goal, current state, and task history, devise the **next 1-5 functional steps** and assign each to the most appropriate specialized agent.
29
+ Focus on what to achieve, not how. Planning fewer steps at a time improves accuracy, as the state can change.
30
+
31
+ **Step Format:**
32
+ Each step must be a functional goal.
33
+ A **precondition** describing the expected starting screen/state for that step is highly recommended for clarity, especially for steps after the first in your 1-5 step plan.
34
+ Each task string can start with "Precondition: ... Goal: ...".
35
+ If a specific precondition isn't critical for the first step in your current plan segment, you can use "Precondition: None. Goal: ..." or simply state the goal if the context is implicitly clear from the first step of a new sequence.
36
+
37
+ **Your Output:**
38
+ * Use the `set_tasks_with_agents` tool to provide your 1-5 step plan with agent assignments.
39
+ * Each task should be assigned to a specialized agent using it's name.
40
+
41
+ * **After your planned steps are executed, you will be invoked again with the new device state.**
42
+ You will then:
43
+ 1. Assess if the **overall user goal** is complete.
44
+ 2. If complete, call the `complete_goal(message: str)` tool.
45
+ 3. If not complete, generate the next 1-5 steps using `set_tasks_with_agents`.
46
+
47
+ **Memory Persistence:**
48
+ * You maintain a COMPLETE memory of ALL tasks across the entire session:
49
+ * Every task that was completed or failed is preserved in your context.
50
+ * Previously completed steps are never lost when calling `set_tasks_with_agents()` for new steps.
51
+ * You will see all historical tasks each time you're called.
52
+ * Use this accumulated knowledge to build progressively on successful steps.
53
+ * When you see discovered information (e.g., dates, locations), use it explicitly in future tasks.
54
+
55
+ **Key Rules:**
56
+ * **Functional Goals ONLY:** (e.g., "Navigate to Wi-Fi settings", "Enter 'MyPassword' into the password field").
57
+ * **NO Low-Level Actions:** Do NOT specify swipes, taps on coordinates, or element IDs in your plan.
58
+ * **Short Plans (1-5 steps):** Plan only the immediate next actions.
59
+ * **Learn From History:** If a task failed previously, try a different approach.
60
+ * **Use Tools:** Your response *must* be a Python code block calling `set_tasks_with_agents` or `complete_goal`.
61
+ * **Smart Agent Assignment:** Choose the most appropriate agent for each task type.
62
+
63
+ **Available Planning Tools:**
64
+ * `set_tasks_with_agents(task_assignments: List[Dict[str, str]])`: Defines the sequence of tasks with agent assignments. Each element should be a dictionary with 'task' and 'agent' keys.
65
+ * `complete_goal(message: str)`: Call this when the overall user goal has been achieved. The message can summarize the completion.
66
+
67
+ ---
68
+
69
+ **Example Interaction Flow:**
70
+
71
+ **User Goal:** Open Gmail and compose a new email.
72
+
73
+ **(Round 1) Planner Input:**
74
+ * Goal: "Open Gmail and compose a new email"
75
+ * Current State: Screenshot of Home screen, UI JSON.
76
+ * Task History: None (first planning cycle)
77
+
78
+ **Planner Thought Process (Round 1):**
79
+ Need to first open Gmail app, then navigate to compose. The first task is app launching, the second is UI navigation.
80
+
81
+ **Planner Output (Round 1):**
82
+ ```python
83
+ set_tasks_with_agents([
84
+ {{'task': 'Precondition: None. Goal: Open the Gmail app.', 'agent': <Specialized_Agent>}},
85
+ {{'task': 'Precondition: Gmail app is open and loaded. Goal: Navigate to compose new email.', 'agent': <Specialized Agents>}}
86
+ ])
87
+ ```
88
+
89
+ **(After specialized agents perform these steps...)**
90
+
91
+ **(Round 2) Planner Input:**
92
+ * Goal: "Open Gmail and compose a new email"
93
+ * Current State: Screenshot of Gmail compose screen, UI JSON showing compose interface.
94
+ * Task History: Shows completed tasks with their assigned agents
95
+
96
+ **Planner Output (Round 2):**
97
+ ```python
98
+ complete_goal(message="Gmail has been opened and compose email screen is ready for use.")
99
+ ```
100
+ """
101
+
102
+ # User prompt template that simply states the goal
103
+ DEFAULT_PLANNER_USER_PROMPT = """Goal: {goal}"""
104
+
105
+ # Prompt template for when a task fails, to help recover and plan new steps
106
+ DEFAULT_PLANNER_TASK_FAILED_PROMPT = """
107
+ PLANNING UPDATE: The execution of a task failed.
108
+
109
+ Failed Task Description: "{task_description}"
110
+ Reported Reason: {reason}
111
+
112
+ The previous plan has been stopped. I have attached a screenshot representing the device's **current state** immediately after the failure. Please analyze this visual information.
113
+
114
+ Original Goal: {goal}
115
+
116
+ Instruction: Based **only** on the provided screenshot showing the current state and the reason for the previous failure ('{reason}'), generate a NEW plan starting from this observed state to achieve the original goal: '{goal}'.
117
+ """
118
+
119
+ # Export all prompts
120
+ __all__ = [
121
+ "DEFAULT_PLANNER_SYSTEM_PROMPT",
122
+ "DEFAULT_PLANNER_USER_PROMPT",
123
+ "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
124
+ ]
@@ -0,0 +1,3 @@
1
+ """
2
+ Utility modules for DroidRun agents.
3
+ """
@@ -0,0 +1,17 @@
1
+ import asyncio
2
+
3
+ def async_to_sync(func):
4
+ """
5
+ Convert an async function to a sync function.
6
+
7
+ Args:
8
+ func: Async function to convert
9
+
10
+ Returns:
11
+ Callable: Synchronous version of the async function
12
+ """
13
+
14
+ def wrapper(*args, **kwargs):
15
+ return asyncio.run(func(*args, **kwargs))
16
+
17
+ return wrapper