droidrun 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. droidrun/__init__.py +16 -11
  2. droidrun/__main__.py +1 -1
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +1 -1
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +6 -0
  7. droidrun/agent/codeact/__init__.py +2 -4
  8. droidrun/agent/codeact/codeact_agent.py +330 -235
  9. droidrun/agent/codeact/events.py +12 -20
  10. droidrun/agent/codeact/prompts.py +0 -52
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/droid/__init__.py +2 -2
  25. droidrun/agent/droid/droid_agent.py +269 -325
  26. droidrun/agent/droid/events.py +28 -0
  27. droidrun/agent/oneflows/reflector.py +265 -0
  28. droidrun/agent/planner/__init__.py +2 -4
  29. droidrun/agent/planner/events.py +9 -13
  30. droidrun/agent/planner/planner_agent.py +288 -0
  31. droidrun/agent/planner/prompts.py +33 -53
  32. droidrun/agent/utils/__init__.py +3 -0
  33. droidrun/agent/utils/async_utils.py +1 -40
  34. droidrun/agent/utils/chat_utils.py +265 -48
  35. droidrun/agent/utils/executer.py +49 -14
  36. droidrun/agent/utils/llm_picker.py +14 -10
  37. droidrun/agent/utils/trajectory.py +184 -0
  38. droidrun/cli/__init__.py +1 -1
  39. droidrun/cli/logs.py +283 -0
  40. droidrun/cli/main.py +364 -441
  41. droidrun/tools/__init__.py +5 -10
  42. droidrun/tools/{actions.py → adb.py} +381 -412
  43. droidrun/tools/ios.py +596 -0
  44. droidrun/tools/tools.py +95 -0
  45. droidrun-0.3.1.dist-info/METADATA +150 -0
  46. droidrun-0.3.1.dist-info/RECORD +50 -0
  47. droidrun/agent/planner/task_manager.py +0 -355
  48. droidrun/agent/planner/workflow.py +0 -371
  49. droidrun/tools/device.py +0 -29
  50. droidrun/tools/loader.py +0 -60
  51. droidrun-0.2.0.dist-info/METADATA +0 -373
  52. droidrun-0.2.0.dist-info/RECORD +0 -32
  53. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/WHEEL +0 -0
  54. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/entry_points.txt +0 -0
  55. {droidrun-0.2.0.dist-info → droidrun-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,371 +0,0 @@
1
- from llama_index.core.workflow import (
2
- StartEvent,
3
- StopEvent,
4
- Workflow,
5
- step,
6
- )
7
- from .events import *
8
- from .prompts import (
9
- DEFAULT_PLANNER_SYSTEM_PROMPT,
10
- DEFAULT_PLANNER_USER_PROMPT,
11
- )
12
- import logging
13
- import re
14
- import os
15
- from typing import List, Optional, Tuple, TYPE_CHECKING, Union
16
- import inspect
17
- # LlamaIndex imports for LLM interaction and types
18
- from llama_index.core.base.llms.types import ChatMessage, ChatResponse
19
- from llama_index.core.llms.llm import LLM
20
- from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
21
- from llama_index.core.memory import ChatMemoryBuffer
22
- from llama_index.core.llms.llm import LLM
23
- from ..utils.executer import SimpleCodeExecutor
24
- from ..utils.chat_utils import add_ui_text_block, add_screenshot_image_block, add_phone_state_block, message_copy
25
- from .task_manager import TaskManager
26
-
27
- # Load environment variables
28
- from dotenv import load_dotenv
29
- load_dotenv()
30
-
31
- # Setup logger
32
- logger = logging.getLogger("droidrun")
33
-
34
- if TYPE_CHECKING:
35
- from ...tools import Tools
36
-
37
- class PlannerAgent(Workflow):
38
- def __init__(self, goal: str, llm: LLM, agent: Optional[Workflow], tools_instance: 'Tools',
39
- executer = None, system_prompt = None, user_prompt = None, max_retries = 1,
40
- enable_tracing = False, debug = False, *args, **kwargs) -> None:
41
- super().__init__(*args, **kwargs)
42
-
43
- # Setup tracing if enabled
44
- if enable_tracing:
45
- try:
46
- from llama_index.core import set_global_handler
47
- set_global_handler("arize_phoenix")
48
- logger.info("Arize Phoenix tracing enabled")
49
- except ImportError:
50
- logger.warning("Arize Phoenix package not found, tracing disabled")
51
- else:
52
- if debug:
53
- logger.debug("Arize Phoenix tracing disabled")
54
-
55
- self.llm = llm
56
- self.goal = goal
57
- self.task_manager = TaskManager()
58
- self.tools = [self.task_manager.set_tasks, self.task_manager.add_task, self.task_manager.get_all_tasks, self.task_manager.clear_tasks, self.task_manager.complete_goal, self.task_manager.start_agent]
59
- self.debug = debug # Set debug attribute before using it in other methods
60
- self.tools_description = self.parse_tool_descriptions()
61
- if not executer:
62
- self.executer = SimpleCodeExecutor(loop=None, globals={}, locals={}, tools=self.tools, use_same_scope=True)
63
- else:
64
- self.executer = executer
65
- self.system_prompt = system_prompt or DEFAULT_PLANNER_SYSTEM_PROMPT.format(tools_description=self.tools_description)
66
- self.user_prompt = user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=goal)
67
- self.system_message = ChatMessage(role="system", content=self.system_prompt)
68
- self.user_message = ChatMessage(role="user", content=self.user_prompt)
69
- self.memory = None
70
- self.agent = agent # This can now be None when used just for planning
71
- self.tools_instance = tools_instance
72
-
73
- self.max_retries = max_retries # Number of retries for a failed task
74
-
75
- self.current_retry = 0 # Current retry count
76
-
77
- self.steps_counter = 0 # Steps counter
78
-
79
- def _extract_code_and_thought(self, response_text: str) -> Tuple[Optional[str], str]:
80
- """
81
- Extracts code from Markdown blocks (```python ... ```) and the surrounding text (thought),
82
- handling indented code blocks.
83
-
84
- Returns:
85
- Tuple[Optional[code_string], thought_string]
86
- """
87
- if self.debug:
88
- logger.debug("✂️ Extracting code and thought from response...")
89
- code_pattern = r"^\s*```python\s*\n(.*?)\n^\s*```\s*?$" # Added ^\s*, re.MULTILINE, and made closing fence match more robust
90
- # Use re.DOTALL to make '.' match newlines and re.MULTILINE to make '^' match start of lines
91
- code_matches = list(re.finditer(code_pattern, response_text, re.DOTALL | re.MULTILINE))
92
-
93
- if not code_matches:
94
- # No code found, the entire response is thought
95
- if self.debug:
96
- logger.debug(" - No code block found. Entire response is thought.")
97
- return None, response_text.strip()
98
-
99
- extracted_code_parts = []
100
- for match in code_matches:
101
- # group(1) is the (.*?) part - the actual code content
102
- code_content = match.group(1)
103
- extracted_code_parts.append(code_content) # Keep original indentation for now
104
-
105
- extracted_code = "\n\n".join(extracted_code_parts)
106
- if self.debug:
107
- logger.debug(f" - Combined extracted code:\n```python\n{extracted_code}\n```")
108
-
109
-
110
- # Extract thought text (text before the first code block, between blocks, and after the last)
111
- thought_parts = []
112
- last_end = 0
113
- for match in code_matches:
114
- # Use span(0) to get the start/end of the *entire* match (including fences and indentation)
115
- start, end = match.span(0)
116
- thought_parts.append(response_text[last_end:start])
117
- last_end = end
118
- thought_parts.append(response_text[last_end:]) # Text after the last block
119
-
120
- thought_text = "".join(thought_parts).strip()
121
- # Avoid overly long debug messages for thought
122
- if self.debug:
123
- thought_preview = (thought_text[:100] + '...') if len(thought_text) > 100 else thought_text
124
- logger.debug(f" - Extracted thought: {thought_preview}")
125
-
126
- return extracted_code, thought_text
127
-
128
- def parse_tool_descriptions(self) -> str:
129
- """Parses the available tools and their descriptions for the system prompt."""
130
- if self.debug:
131
- logger.debug("🛠️ Parsing tool descriptions for Planner Agent...")
132
- # self.available_tools is a list of functions, we need to get their docstrings, names, and signatures and display them as `def name(args) -> return_type:\n"""docstring""" ...\n`
133
- tool_descriptions = []
134
- for tool in self.tools:
135
- assert callable(tool), f"Tool {tool} is not callable."
136
- tool_name = tool.__name__
137
- tool_signature = inspect.signature(tool)
138
- tool_docstring = tool.__doc__ or "No description available."
139
- # Format the function signature and docstring
140
- formatted_signature = f"def {tool_name}{tool_signature}:\n \"\"\"{tool_docstring}\"\"\"\n..."
141
- tool_descriptions.append(formatted_signature)
142
- if self.debug:
143
- logger.debug(f" - Parsed tool: {tool_name}")
144
- # Join all tool descriptions into a single string
145
- descriptions = "\n".join(tool_descriptions)
146
- if self.debug:
147
- logger.debug(f"🔩 Found {len(tool_descriptions)} tools.")
148
- return descriptions
149
-
150
- @step
151
- async def prepare_chat(self, ev: StartEvent, ctx: Context) -> InputEvent:
152
- logger.info("💬 Preparing planning session...")
153
- await ctx.set("step", "generate_plan")
154
-
155
- # Check if we already have a memory buffer, otherwise create one
156
- if not self.memory:
157
- if self.debug:
158
- logger.debug(" - Creating new memory buffer.")
159
- self.memory = ChatMemoryBuffer.from_defaults(llm=self.llm)
160
- # Add system message to memory
161
- await self.memory.aput(self.system_message)
162
- else:
163
- if self.debug:
164
- logger.debug(" - Using existing memory buffer with chat history.")
165
-
166
- # Check for user input
167
- user_input = ev.get("input", default=None)
168
-
169
- # Validate we have either memory, input, or a user prompt
170
- assert len(self.memory.get_all()) > 0 or user_input or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
171
-
172
- # Add user input to memory if provided or use the user prompt if this is a new conversation
173
- if user_input:
174
- if self.debug:
175
- logger.debug(" - Adding user input to memory.")
176
- await self.memory.aput(ChatMessage(role="user", content=user_input))
177
- elif self.user_prompt and len(self.memory.get_all()) <= 1: # Only add user prompt if memory only has system message
178
- if self.debug:
179
- logger.debug(" - Adding goal to memory.")
180
- await self.memory.aput(ChatMessage(role="user", content=self.user_prompt))
181
-
182
- # Update context
183
- await ctx.set("memory", self.memory)
184
- input_messages = self.memory.get_all()
185
- if self.debug:
186
- logger.debug(f" - Memory contains {len(input_messages)} messages")
187
- return InputEvent(input=input_messages)
188
-
189
- @step
190
- async def handle_llm_input(self, ev: InputEvent, ctx: Context) -> Union[StopEvent, ModelResponseEvent]:
191
- """Handle LLM input."""
192
- # Get chat history from event
193
- chat_history = ev.input
194
- assert len(chat_history) > 0, "Chat history cannot be empty."
195
-
196
- self.steps_counter += 1
197
- logger.info(f"🧠 Thinking about how to plan the goal...")
198
- # Get LLM response
199
- response = await self._get_llm_response(chat_history)
200
- # Add response to memory
201
- await self.memory.aput(response.message)
202
- return ModelResponseEvent(response=response.message.content)
203
-
204
- @step
205
- async def handle_llm_output(self, ev: ModelResponseEvent, ctx: Context) -> Union[StopEvent, ExecutePlan]:
206
- """Handle LLM output."""
207
- response = ev.response
208
- if response:
209
- if self.debug:
210
- logger.debug("🤖 LLM response received.")
211
- if self.debug:
212
- logger.debug("🤖 Processing planning output...")
213
- planner_step = await ctx.get("step", default=None)
214
- code, thoughts = self._extract_code_and_thought(response)
215
- if self.debug:
216
- logger.debug(f" - Thoughts: {'Yes' if thoughts else 'No'}, Code: {'Yes' if code else 'No'}")
217
- if code:
218
- # Execute code if present
219
- if self.debug:
220
- logger.debug(f"Response: {response}")
221
- result = await self.executer.execute(code)
222
- logger.info(f"📝 Planning complete")
223
- if self.debug:
224
- logger.debug(f" - Planning code executed. Result: {result}")
225
- # Add result to memory
226
- await self.memory.aput(ChatMessage(role="user", content=f"Execution Result:\n```\n{result}\n```"))
227
-
228
- # Check if there are any pending tasks
229
- pending_tasks = self.task_manager.get_pending_tasks()
230
-
231
- if self.task_manager.task_completed:
232
- logger.info("✅ Goal marked as complete by planner.")
233
- return StopEvent(result={'finished': True, 'message': "Task execution completed.", 'steps': self.steps_counter})
234
- elif pending_tasks:
235
- # If there are pending tasks, automatically start execution
236
- logger.info("🚀 Starting task execution...")
237
- return ExecutePlan()
238
- else:
239
- # If no tasks were set, prompt the planner to set tasks or complete the goal
240
- await self.memory.aput(ChatMessage(role="user", content=f"Please either set new tasks using set_tasks() or mark the goal as complete using complete_goal() if done."))
241
- if self.debug:
242
- logger.debug("🔄 Waiting for next plan or completion.")
243
- return InputEvent(input=self.memory.get_all())
244
- @step
245
- async def execute_plan(self, ev: ExecutePlan, ctx: Context) -> Union[ExecutePlan, TaskFailedEvent]:
246
- """Execute the plan by scheduling the agent to run."""
247
- step_name = await ctx.get("step")
248
- if step_name == "execute_agent":
249
- return await self.execute_agent(ev, ctx) # Sub-steps
250
- else:
251
- await ctx.set("step", "execute_agent")
252
- return ev # Reenter this step with the subcontext key set
253
-
254
- async def execute_agent(self, ev: ExecutePlan, ctx: Context) -> Union[ExecutePlan, TaskFailedEvent]:
255
- """Execute a single task using the agent."""
256
- # Skip execution if no agent is provided (used in planning-only mode)
257
- if self.agent is None:
258
- if self.debug:
259
- logger.debug("No agent provided, skipping execution")
260
- return StopEvent(result={"success": False, "reason": "No agent provided"})
261
-
262
- # Original execution logic
263
- tasks = self.task_manager.get_all_tasks()
264
- attempting_tasks = self.task_manager.get_tasks_by_status(self.task_manager.STATUS_ATTEMPTING)
265
- if attempting_tasks:
266
- task = attempting_tasks[0]
267
- logger.warning(f"A task is already being executed: {task['description']}")
268
- task_description = task["description"]
269
- else:
270
- # Find the first task in 'pending' status
271
- for task in tasks:
272
- if task['status'] == self.task_manager.STATUS_PENDING:
273
- self.task_manager.update_status(tasks.index(task), self.task_manager.STATUS_ATTEMPTING)
274
- task_description = task['description']
275
- break
276
- else:
277
- # If execution reaches here, all tasks are either completed or failed
278
- all_completed = all(task["status"] == self.task_manager.STATUS_COMPLETED for task in tasks)
279
- if all_completed and tasks:
280
- if self.debug:
281
- logger.debug(f"All tasks completed: {[task['description'] for task in tasks]}")
282
- # Return to handle_llm_input with empty input to get new plan
283
- return InputEvent(input=self.memory.get_all())
284
- else:
285
- logger.warning(f"No executable task found.")
286
- if self.debug:
287
- logger.debug(f"Tasks status: {[(task['description'], task['status']) for task in tasks]}")
288
- return TaskFailedEvent(task_description="No task to execute", reason="No executable task found")
289
-
290
- logger.info(f"🔧 Executing task: {task_description}")
291
- # After the task is selected, execute the agent with that task
292
- try:
293
- task_event = {"input": task_description}
294
- result = await self.agent.run(task_event)
295
- success = result.get("result", {}).get("success", False)
296
- if success:
297
- for task in tasks:
298
- if task["status"] == self.task_manager.STATUS_ATTEMPTING:
299
- self.task_manager.update_status(tasks.index(task), self.task_manager.STATUS_COMPLETED)
300
- return ExecutePlan() # Continue execution to find more tasks
301
- # Task failure case
302
- for task in tasks:
303
- if task["status"] == self.task_manager.STATUS_ATTEMPTING:
304
- self.task_manager.update_status(tasks.index(task), self.task_manager.STATUS_FAILED)
305
- reason = result.get("result", {}).get("reason", "Task failed without specific reason")
306
- return TaskFailedEvent(task_description=task_description, reason=reason)
307
- except Exception as e:
308
- logger.error(f"Error executing task '{task_description}': {e}")
309
- # Find the attempting task and mark it as failed
310
- for task in tasks:
311
- if task["status"] == self.task_manager.STATUS_ATTEMPTING:
312
- self.task_manager.update_status(tasks.index(task), self.task_manager.STATUS_FAILED)
313
- return TaskFailedEvent(task_description=task_description, reason=f"Execution error: {e}")
314
-
315
- # Should not reach here, but just in case:
316
- return TaskFailedEvent(task_description=task_description, reason="Task execution completed abnormally")
317
-
318
- async def _get_llm_response(self, chat_history: List[ChatMessage]) -> ChatResponse:
319
- """Get streaming response from LLM."""
320
- if self.debug:
321
- logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
322
-
323
- # Check if there's a system message in the chat history
324
- has_system_message = any(msg.role == "system" for msg in chat_history)
325
- if not has_system_message:
326
- if self.debug:
327
- logger.debug("No system message found in chat history, adding system prompt.")
328
- chat_history = [self.system_message] + chat_history
329
- else:
330
- if self.debug:
331
- logger.debug("System message already exists in chat history, using existing.")
332
-
333
- # Add remembered information if available
334
- if hasattr(self.tools_instance, 'memory') and self.tools_instance.memory:
335
- memory_block = "\n### Remembered Information:\n"
336
- for idx, item in enumerate(self.tools_instance.memory, 1):
337
- memory_block += f"{idx}. {item}\n"
338
-
339
- # Find the first user message and inject memory before it
340
- for i, msg in enumerate(chat_history):
341
- if msg.role == "user":
342
- if isinstance(msg.content, str):
343
- # For text-only messages
344
- updated_content = f"{memory_block}\n\n{msg.content}"
345
- chat_history[i] = ChatMessage(role="user", content=updated_content)
346
- elif isinstance(msg.content, list):
347
- # For multimodal content (from llama_index.core.base.llms.types import TextBlock)
348
- from llama_index.core.base.llms.types import TextBlock
349
- memory_text_block = TextBlock(text=memory_block)
350
- # Insert memory text block at beginning
351
- content_blocks = [memory_text_block] + msg.content
352
- chat_history[i] = ChatMessage(role="user", content=content_blocks)
353
- break
354
-
355
- # Add UI elements, screenshot, and phone state
356
- chat_history = await add_screenshot_image_block(self.tools_instance, chat_history)
357
- chat_history = await add_ui_text_block(self.tools_instance, chat_history)
358
- chat_history = await add_phone_state_block(self.tools_instance, chat_history)
359
-
360
- # Create copies of messages to avoid modifying the originals
361
- messages_to_send = [message_copy(msg) for msg in chat_history]
362
-
363
- if self.debug:
364
- logger.debug(f" - Final message count: {len(messages_to_send)}")
365
- response = await self.llm.achat(
366
- messages=messages_to_send
367
- )
368
- assert hasattr(response, "message"), f"LLM response does not have a message attribute.\nResponse: {response}"
369
- if self.debug:
370
- logger.debug(" - Received response from LLM.")
371
- return response
droidrun/tools/device.py DELETED
@@ -1,29 +0,0 @@
1
- """
2
- Device Manager - Handles Android device connections and management.
3
- """
4
-
5
- from typing import Optional, List
6
- from ..adb import Device, DeviceManager as ADBDeviceManager
7
-
8
- class DeviceManager:
9
- """Manages Android device connections and operations."""
10
-
11
- def __init__(self):
12
- """Initialize the device manager."""
13
- self._manager = ADBDeviceManager()
14
-
15
- async def connect(self, ip_address: str, port: int = 5555) -> Optional[Device]:
16
- """Connect to an Android device over TCP/IP."""
17
- return await self._manager.connect(ip_address, port)
18
-
19
- async def disconnect(self, serial: str) -> bool:
20
- """Disconnect from an Android device."""
21
- return await self._manager.disconnect(serial)
22
-
23
- async def list_devices(self) -> List[Device]:
24
- """List all connected devices."""
25
- return await self._manager.list_devices()
26
-
27
- async def get_device(self, serial: str) -> Optional[Device]:
28
- """Get a specific device by serial number."""
29
- return await self._manager.get_device(serial)
droidrun/tools/loader.py DELETED
@@ -1,60 +0,0 @@
1
- import asyncio
2
- import logging
3
- from .actions import Tools
4
- from .device import DeviceManager
5
- from typing import Tuple, Dict, Callable, Any, Optional
6
-
7
- # Get a logger for this module
8
- logger = logging.getLogger(__name__)
9
-
10
- async def load_tools(serial: Optional[str] = None) -> Tuple[Dict[str, Callable[..., Any]], Tools]:
11
- """
12
- Initializes the Tools class and returns a dictionary of available tool functions
13
- and the Tools instance itself. If serial is not provided, it attempts to find
14
- the first connected device.
15
-
16
- Args:
17
- serial: The device serial number. If None, finds the first available device.
18
- vision: Whether to include vision-related tools. (Set to false if you want to always send screenshot)
19
-
20
- Returns:
21
- A tuple containing:
22
- - A dictionary mapping tool names to their corresponding functions.
23
- - The initialized Tools instance.
24
-
25
- Raises:
26
- ValueError: If no device serial is provided and no devices are found.
27
- """
28
- if serial is None:
29
- logger.info("No device serial provided, attempting to find a connected device.")
30
- # Attempt to find a device if none is specified
31
- device_manager = DeviceManager()
32
- devices = await device_manager.list_devices()
33
- if not devices:
34
- logger.error("Device discovery failed: No connected devices found.")
35
- raise ValueError("No device serial provided and no connected devices found.")
36
- serial = devices[0].serial
37
- logger.info(f"Using auto-detected device: {serial}") # Use logger.info
38
-
39
- logger.debug(f"Initializing Tools for device: {serial}")
40
- tools_instance = Tools(serial=serial)
41
-
42
- tool_list = {
43
- # UI interaction
44
- "swipe": tools_instance.swipe,
45
- "input_text": tools_instance.input_text,
46
- "press_key": tools_instance.press_key,
47
- "tap_by_index": tools_instance.tap_by_index,
48
- #"tap_by_coordinates": tools_instance.tap_by_coordinates,
49
-
50
- # App management
51
- "start_app": tools_instance.start_app,
52
- "list_packages": tools_instance.list_packages,
53
- "complete": tools_instance.complete
54
- }
55
- logger.debug("Base tools loaded.")
56
-
57
-
58
- # Return both the dictionary and the instance, as the agent might need the instance
59
- logger.info(f"Tools loaded successfully for device {serial}.")
60
- return tool_list, tools_instance