droidrun 0.3.9__py3-none-any.whl → 0.3.10.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. droidrun/__init__.py +2 -3
  2. droidrun/__main__.py +1 -1
  3. droidrun/agent/__init__.py +1 -1
  4. droidrun/agent/codeact/__init__.py +1 -4
  5. droidrun/agent/codeact/codeact_agent.py +66 -40
  6. droidrun/agent/codeact/events.py +6 -3
  7. droidrun/agent/codeact/prompts.py +2 -2
  8. droidrun/agent/common/events.py +4 -2
  9. droidrun/agent/context/__init__.py +1 -3
  10. droidrun/agent/context/agent_persona.py +2 -1
  11. droidrun/agent/context/context_injection_manager.py +6 -6
  12. droidrun/agent/context/episodic_memory.py +5 -3
  13. droidrun/agent/context/personas/__init__.py +3 -3
  14. droidrun/agent/context/personas/app_starter.py +3 -3
  15. droidrun/agent/context/personas/big_agent.py +3 -3
  16. droidrun/agent/context/personas/default.py +3 -3
  17. droidrun/agent/context/personas/ui_expert.py +5 -5
  18. droidrun/agent/context/task_manager.py +15 -17
  19. droidrun/agent/droid/__init__.py +1 -1
  20. droidrun/agent/droid/droid_agent.py +327 -180
  21. droidrun/agent/droid/events.py +91 -9
  22. droidrun/agent/executor/__init__.py +13 -0
  23. droidrun/agent/executor/events.py +24 -0
  24. droidrun/agent/executor/executor_agent.py +327 -0
  25. droidrun/agent/executor/prompts.py +136 -0
  26. droidrun/agent/manager/__init__.py +18 -0
  27. droidrun/agent/manager/events.py +20 -0
  28. droidrun/agent/manager/manager_agent.py +459 -0
  29. droidrun/agent/manager/prompts.py +223 -0
  30. droidrun/agent/oneflows/app_starter_workflow.py +118 -0
  31. droidrun/agent/oneflows/text_manipulator.py +204 -0
  32. droidrun/agent/planner/__init__.py +3 -3
  33. droidrun/agent/planner/events.py +6 -3
  34. droidrun/agent/planner/planner_agent.py +27 -42
  35. droidrun/agent/planner/prompts.py +2 -2
  36. droidrun/agent/usage.py +11 -11
  37. droidrun/agent/utils/__init__.py +11 -1
  38. droidrun/agent/utils/async_utils.py +2 -1
  39. droidrun/agent/utils/chat_utils.py +48 -60
  40. droidrun/agent/utils/device_state_formatter.py +177 -0
  41. droidrun/agent/utils/executer.py +12 -11
  42. droidrun/agent/utils/inference.py +114 -0
  43. droidrun/agent/utils/llm_picker.py +2 -0
  44. droidrun/agent/utils/message_utils.py +85 -0
  45. droidrun/agent/utils/tools.py +220 -0
  46. droidrun/agent/utils/trajectory.py +8 -7
  47. droidrun/cli/__init__.py +1 -1
  48. droidrun/cli/logs.py +29 -28
  49. droidrun/cli/main.py +279 -143
  50. droidrun/config_manager/__init__.py +25 -0
  51. droidrun/config_manager/config_manager.py +583 -0
  52. droidrun/macro/__init__.py +2 -2
  53. droidrun/macro/__main__.py +1 -1
  54. droidrun/macro/cli.py +36 -34
  55. droidrun/macro/replay.py +7 -9
  56. droidrun/portal.py +1 -1
  57. droidrun/telemetry/__init__.py +2 -2
  58. droidrun/telemetry/events.py +3 -4
  59. droidrun/telemetry/phoenix.py +173 -0
  60. droidrun/telemetry/tracker.py +7 -5
  61. droidrun/tools/__init__.py +1 -1
  62. droidrun/tools/adb.py +210 -82
  63. droidrun/tools/ios.py +7 -5
  64. droidrun/tools/tools.py +25 -8
  65. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/METADATA +5 -3
  66. droidrun-0.3.10.dev3.dist-info/RECORD +70 -0
  67. droidrun/agent/common/default.py +0 -5
  68. droidrun/agent/context/reflection.py +0 -20
  69. droidrun/agent/oneflows/reflector.py +0 -265
  70. droidrun-0.3.9.dist-info/RECORD +0 -56
  71. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/WHEEL +0 -0
  72. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/entry_points.txt +0 -0
  73. {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,118 @@
1
+ """
2
+ Simple workflow to open an app based on a description.
3
+ """
4
+
5
+ import json
6
+
7
+ from workflows import Context, Workflow, step
8
+ from workflows.events import StartEvent, StopEvent
9
+
10
+ from droidrun.tools.tools import Tools
11
+
12
+
13
+ class AppStarter(Workflow):
14
+ """
15
+ A simple workflow that opens an app based on a description.
16
+
17
+ The workflow uses an LLM to intelligently match the app description
18
+ to an installed app's package name, then opens it.
19
+ """
20
+
21
+ def __init__(self, tools: Tools, llm, timeout: int = 60, **kwargs):
22
+ """
23
+ Initialize the OpenAppWorkflow.
24
+
25
+ Args:
26
+ tools: An instance of Tools (e.g., AdbTools) to interact with the device
27
+ llm: An LLM instance (e.g., OpenAI) to determine which app to open
28
+ timeout: Workflow timeout in seconds (default: 60)
29
+ **kwargs: Additional arguments passed to Workflow
30
+ """
31
+ super().__init__(timeout=timeout, **kwargs)
32
+ self.tools = tools
33
+ self.llm = llm
34
+
35
+ @step
36
+ async def open_app_step(self, ev: StartEvent, ctx: Context) -> StopEvent:
37
+ """
38
+ Opens an app based on the provided description.
39
+
40
+ Expected StartEvent attributes:
41
+ - app_description (str): The name or description of the app to open
42
+
43
+ Returns:
44
+ StopEvent with the result of the open_app operation
45
+ """
46
+ app_description = ev.app_description
47
+
48
+ # Get list of installed apps
49
+ apps = self.tools.get_apps(include_system=True)
50
+
51
+ # Format apps list for LLM
52
+ apps_list = "\n".join([
53
+ f"- {app['label']} (package: {app['package']})"
54
+ for app in apps
55
+ ])
56
+
57
+ # Construct prompt for LLM
58
+ prompt = f"""Given the following list of installed apps and a user's description, determine which app package name to open.
59
+
60
+ Installed Apps:
61
+ {apps_list}
62
+
63
+ User's Request: "{app_description}"
64
+
65
+ Return ONLY a JSON object with the following structure:
66
+ {{
67
+ "package": "com.example.package"
68
+ }}
69
+
70
+ Choose the most appropriate app based on the description. Return the package name of the best match."""
71
+
72
+ # Get LLM response
73
+ response = await self.llm.acomplete(prompt)
74
+ response_text = str(response).strip()
75
+
76
+ # Parse JSON response - extract content between { and }
77
+ try:
78
+ start = response_text.find("{")
79
+ end = response_text.rfind("}") + 1
80
+ json_str = response_text[start:end]
81
+ result_json = json.loads(json_str)
82
+ package_name = result_json["package"]
83
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
84
+ return StopEvent(result=f"Error parsing LLM response: {e}. Response: {response_text}")
85
+
86
+ # Open the selected app using the package name
87
+ result = self.tools.start_app(package_name)
88
+
89
+ return StopEvent(result=result)
90
+
91
+
92
+ # Example usage
93
+ async def main():
94
+ """
95
+ Example of how to use the OpenAppWorkflow.
96
+ """
97
+ from llama_index.llms.openai import OpenAI
98
+
99
+ from droidrun.tools.adb import AdbTools
100
+
101
+ # Initialize tools with device serial (None for default device)
102
+ tools = AdbTools(serial=None)
103
+
104
+ # Initialize LLM
105
+ llm = OpenAI(model="gpt-4o-mini")
106
+
107
+ # Create workflow instance
108
+ workflow = AppStarter(tools=tools, llm=llm, timeout=60, verbose=True)
109
+
110
+ # Run workflow to open an app
111
+ result = await workflow.run(app_description="Settings")
112
+
113
+ print(f"Result: {result}")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ import asyncio
118
+ asyncio.run(main())
@@ -0,0 +1,204 @@
1
+ '''CodeAct-style agent for text manipulation via constrained Python execution.
2
+
3
+ This agent receives two inputs:
4
+ - current_text: the current content of the focused text box
5
+ - task_instruction: a natural language instruction describing how to modify the text
6
+
7
+ It asks an LLM to produce Python code that:
8
+ - Uses ONLY a single provided function: input_text(text: str)
9
+ - Constructs the final text to type as a triple-quoted big string, assigned
10
+ to a variable of the model's choice (e.g., new_text = """...""")
11
+ - May reference the predefined variable ORIGINAL which contains the current text
12
+ from the text box
13
+ - Calls input_text(new_text) exactly once to clear the field and input the new text
14
+
15
+ The produced code is executed in a restricted sandbox exposing ONLY:
16
+ - ORIGINAL: str (the original text content)
17
+ - input_text: function (captures the final text; semantically clears and types)
18
+
19
+ If the generated code produces execution errors, the agent automatically sends the
20
+ stack trace back to the LLM for correction, with up to 3 retry attempts by default.
21
+ This enables iterative refinement of the generated code.
22
+
23
+ The agent returns the final text that should be entered into the text box and the
24
+ raw code produced by the model (potentially after corrections).
25
+ '''
26
+
27
+
28
+ import traceback
29
+
30
+ from llama_index.core.llms import ChatMessage
31
+ from llama_index.core.llms.llm import LLM
32
+
33
+ from droidrun.agent.utils.inference import call_with_retries
34
+ from droidrun.telemetry.phoenix import clean_span
35
+
36
+
37
+ @clean_span("text_manipulator")
38
+ def run_text_manipulation_agent(instruction: str, current_subgoal: str, current_text: str, overall_plan, hitorical_plan, llm: LLM, max_retries: int = 4) -> tuple[str, str]:
39
+ """Convenience function to run CodeAct text manipulation with error correction.
40
+
41
+ Args:
42
+ instruction: User's overall instruction
43
+ current_subgoal: Current subgoal to accomplish
44
+ current_text: The current content of the focused text field
45
+ overall_plan: Overall plan context
46
+ hitorical_plan: Historical progress
47
+ llm: LLM instance to use for text manipulation
48
+ max_retries: Maximum number of retry attempts if code execution fails
49
+
50
+ Returns:
51
+ Tuple of (final_text, raw_code) - the final text to input and the generated code
52
+ """
53
+ system_prompt = (
54
+ "You are CODEACT_TEXT_AGENT, a constrained Python code generator for editing text in an Android text box.\n"
55
+ "You will be given: (1) the current text in the focused text box as ORIGINAL, and (2) a TASK that describes how to modify it.\n\n"
56
+ "Your job is to output ONLY a single Python code block in ```python format that:\n"
57
+ "- Defines NO new functions, classes, or imports.\n"
58
+ "- Uses ONLY the provided function input_text(text: str).\n"
59
+ "- Builds the final content in a triple-quoted big string assigned to a variable of your choice, e.g.:\n"
60
+ " new_text = \"\"\"...\"\"\"\n"
61
+ "- Includes ORIGINAL in the new_text if needed to fulfill the TASK.\n"
62
+ "- Calls input_text(new_text) exactly once to clear the field and input the new content.\n\n"
63
+ "STRICT FORMAT RULES:\n"
64
+ "- Respond with ONLY a fenced Python code block: ```python\n<code>\n```\n"
65
+ "- Do NOT print anything. Do NOT use input().\n"
66
+ "- Do NOT import any modules. Do NOT define additional functions or classes.\n"
67
+ "- Do NOT access files, network, or system.\n"
68
+ "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters\n"
69
+ "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>\n{current_text}\n</ORIGINAL>\n"
70
+ f"""
71
+ <user_request>
72
+ {instruction}
73
+ </user_request>
74
+ <overall_plan>
75
+ {overall_plan}
76
+ </overall_plan>
77
+ <progress_status>
78
+ {hitorical_plan}
79
+ </progress_status>
80
+ <current_subgoal>
81
+ {current_subgoal}
82
+ </current_subgoal>
83
+ """
84
+ )
85
+
86
+ error_correction_prompt = (
87
+ "You are CODEACT_TEXT_AGENT, correcting your previous code that had execution errors.\n\n"
88
+ "The code you generated previously failed with this error:\n{error_message}\n\n"
89
+ "Please fix the code and output ONLY a new Python code block in ```python format.\n"
90
+ "Follow the same rules as before:\n"
91
+ "- Use ONLY the provided function input_text(text: str)\n"
92
+ "- Build the final content in a triple-quoted big string\n"
93
+ "- Include ORIGINAL in the new_text if needed\n"
94
+ "- Call input_text(new_text) exactly once\n"
95
+ "- Respond with ONLY a fenced Python code block\n"
96
+ "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters"
97
+ "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>{current_text}</ORIGINAL>\n"
98
+ )
99
+
100
+ user_prompt = (
101
+ "TASK:\n{task_instruction}\n\n"
102
+ "CURRENT TEXT (ORIGINAL):\n{current_text}\n\n"
103
+ "Write the Python code now."
104
+ ).format(
105
+ task_instruction=current_subgoal.strip(),
106
+ current_text=current_text,
107
+ )
108
+
109
+ messages = [ChatMessage(role="system", content=system_prompt.format(overall_plan=overall_plan, hitorical_plan=hitorical_plan, current_subgoal=current_subgoal, instruction=instruction, current_text=current_text)), ChatMessage(role="user", content=user_prompt)]
110
+
111
+ for attempt in range(max_retries + 1): # +1 for initial attempt
112
+ # Call the LLM with current messages
113
+ response_message = call_with_retries(llm, messages).message
114
+ content = response_message.content
115
+ messages.append(response_message)
116
+
117
+ # Extract code from ```python blocks
118
+ code = _extract_python_code(content)
119
+ if not code:
120
+ # Fallback: if no code block found, use entire response as code
121
+ code = content.strip()
122
+
123
+ # Execute the code in a sandbox
124
+ final_text, error_message = _execute_sandbox(code, current_text)
125
+
126
+ # If successful (no error), return the result
127
+ if not error_message:
128
+ return final_text, code
129
+
130
+ # If this was the last attempt, return what we have
131
+ if attempt == max_retries:
132
+ return final_text, code
133
+
134
+ # Add error correction message to conversation
135
+ correction_message = error_correction_prompt.format(error_message=error_message)
136
+ messages.append(ChatMessage(role="user", content=correction_message))
137
+
138
+ # This should never be reached, but just in case
139
+ return current_text, ""
140
+
141
+
142
+ def _extract_python_code(text: str) -> str:
143
+ """Extract Python code from ```python fenced blocks using simple string operations."""
144
+ if not text:
145
+ return ""
146
+
147
+ # Try different variations of code block markers
148
+ patterns = [
149
+ # ```python with newlines
150
+ ("```python\n", "\n```"),
151
+ # ```python without newlines
152
+ ("```python", "```"),
153
+ # Generic ``` with newlines
154
+ ("```\n", "\n```"),
155
+ # Generic ``` without newlines
156
+ ("```", "```"),
157
+ ]
158
+
159
+ for start_marker, end_marker in patterns:
160
+ if start_marker in text and end_marker in text:
161
+ # Find the start position after the marker
162
+ start_idx = text.find(start_marker) + len(start_marker)
163
+ # Find the end position before the marker
164
+ end_idx = text.find(end_marker, start_idx)
165
+ if end_idx != -1:
166
+ code = text[start_idx:end_idx].strip()
167
+ # Only return if we actually extracted some code
168
+ if code:
169
+ return code
170
+
171
+ return ""
172
+
173
+
174
+ def _execute_sandbox(code: str, original_text: str) -> tuple[str, str]:
175
+ """Execute model code in a locked-down environment with exec().
176
+
177
+ Returns:
178
+ Tuple of (result_text, error_message) - result_text is the final text if successful,
179
+ or original_text if failed. error_message is the stack trace if execution failed,
180
+ or empty string if successful.
181
+ """
182
+ if not code:
183
+ return original_text, ""
184
+
185
+ captured = {"value": None}
186
+
187
+ def input_text(text: str) -> None:
188
+ """Capture the final text to be input."""
189
+ captured["value"] = text
190
+
191
+ # Create restricted environment
192
+ sandbox_globals = {
193
+ "__builtins__": {}, # Empty builtins for security
194
+ "input_text": input_text,
195
+ "ORIGINAL": original_text
196
+ }
197
+ sandbox_locals = {}
198
+
199
+ try:
200
+ exec(code, sandbox_globals, sandbox_locals)
201
+ return captured["value"] if captured["value"] is not None else original_text, ""
202
+ except Exception:
203
+ error_message = traceback.format_exc()
204
+ return original_text, error_message
@@ -1,13 +1,13 @@
1
1
  from droidrun.agent.planner.planner_agent import PlannerAgent
2
2
  from droidrun.agent.planner.prompts import (
3
3
  DEFAULT_PLANNER_SYSTEM_PROMPT,
4
+ DEFAULT_PLANNER_TASK_FAILED_PROMPT,
4
5
  DEFAULT_PLANNER_USER_PROMPT,
5
- DEFAULT_PLANNER_TASK_FAILED_PROMPT
6
6
  )
7
7
 
8
8
  __all__ = [
9
- "PlannerAgent",
9
+ "PlannerAgent",
10
10
  "DEFAULT_PLANNER_SYSTEM_PROMPT",
11
11
  "DEFAULT_PLANNER_USER_PROMPT",
12
12
  "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
13
- ]
13
+ ]
@@ -1,16 +1,19 @@
1
- from llama_index.core.workflow import Event
1
+ from typing import Optional
2
+
2
3
  from llama_index.core.base.llms.types import ChatMessage
3
- from typing import Optional, Any
4
+ from llama_index.core.workflow import Event
5
+
4
6
  from droidrun.agent.context import Task
5
7
  from droidrun.agent.usage import UsageResult
6
8
 
9
+
7
10
  class PlanInputEvent(Event):
8
11
  input: list[ChatMessage]
9
12
 
10
13
 
11
14
  class PlanThinkingEvent(Event):
12
15
  thoughts: Optional[str] = None
13
- code: Optional[str] = None
16
+ code: Optional[str] = None
14
17
  usage: Optional[UsageResult] = None
15
18
 
16
19
 
@@ -1,34 +1,31 @@
1
- from droidrun.agent.planner.events import *
2
- from droidrun.agent.planner.prompts import (
3
- DEFAULT_PLANNER_SYSTEM_PROMPT,
4
- DEFAULT_PLANNER_USER_PROMPT,
5
- )
6
- import logging
7
1
  import asyncio
8
- from typing import List, TYPE_CHECKING, Union
9
- import inspect
2
+ import logging
3
+ from typing import TYPE_CHECKING, List, Union
4
+
5
+ from dotenv import load_dotenv
10
6
  from llama_index.core.base.llms.types import ChatMessage, ChatResponse
11
- from llama_index.core.prompts import PromptTemplate
12
7
  from llama_index.core.llms.llm import LLM
13
- from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
14
8
  from llama_index.core.memory import Memory
15
- from llama_index.core.llms.llm import LLM
16
- from droidrun.agent.usage import get_usage_from_response
17
- from droidrun.agent.utils.executer import SimpleCodeExecutor
18
- from droidrun.agent.utils import chat_utils
19
- from droidrun.agent.context.task_manager import TaskManager
20
- from droidrun.tools import Tools
9
+ from llama_index.core.prompts import PromptTemplate
10
+ from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
11
+
21
12
  from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
22
13
  from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
14
+ from droidrun.agent.context.agent_persona import AgentPersona
15
+ from droidrun.agent.context.task_manager import TaskManager
23
16
  from droidrun.agent.planner.events import (
24
- PlanInputEvent,
25
17
  PlanCreatedEvent,
18
+ PlanInputEvent,
26
19
  PlanThinkingEvent,
27
20
  )
28
- from droidrun.agent.context.agent_persona import AgentPersona
29
- from droidrun.agent.context.reflection import Reflection
30
-
31
- from dotenv import load_dotenv
21
+ from droidrun.agent.planner.prompts import (
22
+ DEFAULT_PLANNER_SYSTEM_PROMPT,
23
+ DEFAULT_PLANNER_USER_PROMPT,
24
+ )
25
+ from droidrun.agent.usage import get_usage_from_response
26
+ from droidrun.agent.utils import chat_utils
27
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
28
+ from droidrun.tools import Tools
32
29
 
33
30
  load_dotenv()
34
31
 
@@ -64,7 +61,6 @@ class PlannerAgent(Workflow):
64
61
 
65
62
  self.chat_memory = None
66
63
  self.remembered_info = None
67
- self.reflection: Reflection = None
68
64
 
69
65
  self.current_retry = 0
70
66
  self.steps_counter = 0
@@ -106,15 +102,10 @@ class PlannerAgent(Workflow):
106
102
  if ev.remembered_info:
107
103
  self.remembered_info = ev.remembered_info
108
104
 
109
- if ev.reflection:
110
- self.reflection = ev.reflection
111
- else:
112
- self.reflection = None
113
-
114
105
  assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
115
-
106
+
116
107
  await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
117
-
108
+
118
109
  input_messages = self.chat_memory.get_all()
119
110
  logger.debug(f" - Memory contains {len(input_messages)} messages")
120
111
  return PlanInputEvent(input=input_messages)
@@ -130,7 +121,7 @@ class PlannerAgent(Workflow):
130
121
  ctx.write_event_to_stream(ev)
131
122
 
132
123
  self.steps_counter += 1
133
- logger.info(f"🧠 Thinking about how to plan the goal...")
124
+ logger.info("🧠 Thinking about how to plan the goal...")
134
125
 
135
126
  if self.vision:
136
127
  screenshot = (self.tools_instance.take_screenshot())[1]
@@ -142,12 +133,11 @@ class PlannerAgent(Workflow):
142
133
  await ctx.store.set("ui_state", state["a11y_tree"])
143
134
  await ctx.store.set("phone_state", state["phone_state"])
144
135
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
145
- except Exception as e:
146
- logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
136
+ except Exception:
137
+ logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
147
138
 
148
139
 
149
140
  await ctx.store.set("remembered_info", self.remembered_info)
150
- await ctx.store.set("reflection", self.reflection)
151
141
 
152
142
  response = await self._get_llm_response(ctx, chat_history)
153
143
  try:
@@ -170,18 +160,17 @@ class PlannerAgent(Workflow):
170
160
  """Handle LLM output."""
171
161
  logger.debug("🤖 Processing planning output...")
172
162
  code = ev.code
173
- thoughts = ev.thoughts
174
163
 
175
164
  if code:
176
165
  try:
177
166
  result = await self.executer.execute(ctx, code)
178
- logger.info(f"📝 Planning complete")
167
+ logger.info("📝 Planning complete")
179
168
  logger.debug(f" - Planning code executed. Result: {result['output']}")
180
169
 
181
170
  screenshots = result['screenshots']
182
171
  for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
183
172
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
184
-
173
+
185
174
  ui_states = result['ui_states']
186
175
  for ui_state in ui_states[:-1]:
187
176
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
@@ -257,7 +246,7 @@ wrap your code inside this:
257
246
  logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
258
247
 
259
248
  model = self.llm.class_name()
260
- if self.vision == True:
249
+ if self.vision:
261
250
  if model == "DeepSeek":
262
251
  logger.warning(
263
252
  "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
@@ -265,7 +254,7 @@ wrap your code inside this:
265
254
  else:
266
255
  chat_history = await chat_utils.add_screenshot_image_block(
267
256
  await ctx.store.get("screenshot"), chat_history
268
- )
257
+ )
269
258
 
270
259
 
271
260
 
@@ -280,10 +269,6 @@ wrap your code inside this:
280
269
  if remembered_info:
281
270
  chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
282
271
 
283
- reflection = await ctx.store.get("reflection", None)
284
- if reflection:
285
- chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
286
-
287
272
  chat_history = await chat_utils.add_phone_state_block(await ctx.store.get("phone_state"), chat_history)
288
273
  chat_history = await chat_utils.add_ui_text_block(await ctx.store.get("ui_state"), chat_history)
289
274
 
@@ -119,6 +119,6 @@ Instruction: Based **only** on the provided screenshot showing the current state
119
119
  # Export all prompts
120
120
  __all__ = [
121
121
  "DEFAULT_PLANNER_SYSTEM_PROMPT",
122
- "DEFAULT_PLANNER_USER_PROMPT",
122
+ "DEFAULT_PLANNER_USER_PROMPT",
123
123
  "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
124
- ]
124
+ ]
droidrun/agent/usage.py CHANGED
@@ -1,12 +1,12 @@
1
1
  import contextlib
2
- from llama_index.core.callbacks import CallbackManager
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+ from uuid import uuid4
5
+
3
6
  from llama_index.core.callbacks.base_handler import BaseCallbackHandler
4
7
  from llama_index.core.callbacks.schema import CBEventType, EventPayload
5
8
  from llama_index.core.llms import LLM, ChatResponse
6
9
  from pydantic import BaseModel
7
- from typing import Any, Dict, List, Optional
8
- from uuid import uuid4
9
- import logging
10
10
 
11
11
  logger = logging.getLogger("droidrun")
12
12
  SUPPORTED_PROVIDERS = [
@@ -111,7 +111,7 @@ class TokenCountingHandler(BaseCallbackHandler):
111
111
  )
112
112
 
113
113
  def _get_event_usage(self, payload: Dict[str, Any]) -> UsageResult:
114
- if not EventPayload.RESPONSE in payload:
114
+ if EventPayload.RESPONSE not in payload:
115
115
  raise ValueError("No response in payload")
116
116
 
117
117
  chat_rsp: ChatResponse = payload.get(EventPayload.RESPONSE)
@@ -180,26 +180,26 @@ def create_tracker(llm: LLM) -> TokenCountingHandler:
180
180
 
181
181
  def track_usage(llm: LLM) -> TokenCountingHandler:
182
182
  """Track token usage for an LLM instance across all requests.
183
-
183
+
184
184
  This function:
185
185
  - Creates a new TokenCountingHandler for the LLM provider
186
186
  - Registers that handler as an LLM callback to monitor all requests
187
187
  - Returns the handler for accessing cumulative usage statistics
188
-
188
+
189
189
  The handler counts tokens for total LLM usage across all requests. For fine-grained
190
190
  per-request counting, use either:
191
191
  - `create_tracker()` with `llm_callback()` context manager for temporary tracking
192
192
  - `get_usage_from_response()` to extract usage from individual responses
193
-
193
+
194
194
  Args:
195
195
  llm: The LLamaIndex LLM instance to track usage for
196
-
196
+
197
197
  Returns:
198
198
  TokenCountingHandler: The registered handler that accumulates usage statistics
199
-
199
+
200
200
  Raises:
201
201
  ValueError: If the LLM provider is not supported for tracking
202
-
202
+
203
203
  Example:
204
204
  >>> llm = OpenAI()
205
205
  >>> tracker = track_usage(llm)
@@ -1,3 +1,13 @@
1
1
  """
2
2
  Utility modules for DroidRun agents.
3
- """
3
+ """
4
+
5
+ from droidrun.agent.utils.message_utils import (
6
+ convert_messages_to_chatmessages,
7
+ image_to_image_bytes,
8
+ )
9
+
10
+ __all__ = [
11
+ "convert_messages_to_chatmessages",
12
+ "image_to_image_bytes",
13
+ ]
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
 
3
+
3
4
  def async_to_sync(func):
4
5
  """
5
6
  Convert an async function to a sync function.
@@ -14,4 +15,4 @@ def async_to_sync(func):
14
15
  def wrapper(*args, **kwargs):
15
16
  return asyncio.run(func(*args, **kwargs))
16
17
 
17
- return wrapper
18
+ return wrapper