droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. droidrun/__init__.py +2 -3
  2. droidrun/__main__.py +1 -1
  3. droidrun/agent/__init__.py +1 -1
  4. droidrun/agent/codeact/__init__.py +1 -4
  5. droidrun/agent/codeact/codeact_agent.py +112 -48
  6. droidrun/agent/codeact/events.py +6 -3
  7. droidrun/agent/codeact/prompts.py +2 -2
  8. droidrun/agent/common/constants.py +2 -0
  9. droidrun/agent/common/events.py +5 -3
  10. droidrun/agent/context/__init__.py +1 -3
  11. droidrun/agent/context/agent_persona.py +2 -1
  12. droidrun/agent/context/context_injection_manager.py +6 -6
  13. droidrun/agent/context/episodic_memory.py +5 -3
  14. droidrun/agent/context/personas/__init__.py +3 -3
  15. droidrun/agent/context/personas/app_starter.py +3 -3
  16. droidrun/agent/context/personas/big_agent.py +3 -3
  17. droidrun/agent/context/personas/default.py +3 -3
  18. droidrun/agent/context/personas/ui_expert.py +5 -5
  19. droidrun/agent/context/task_manager.py +15 -17
  20. droidrun/agent/droid/__init__.py +1 -1
  21. droidrun/agent/droid/droid_agent.py +327 -182
  22. droidrun/agent/droid/events.py +91 -9
  23. droidrun/agent/executor/__init__.py +13 -0
  24. droidrun/agent/executor/events.py +24 -0
  25. droidrun/agent/executor/executor_agent.py +327 -0
  26. droidrun/agent/executor/prompts.py +136 -0
  27. droidrun/agent/manager/__init__.py +18 -0
  28. droidrun/agent/manager/events.py +20 -0
  29. droidrun/agent/manager/manager_agent.py +459 -0
  30. droidrun/agent/manager/prompts.py +223 -0
  31. droidrun/agent/oneflows/app_starter_workflow.py +118 -0
  32. droidrun/agent/oneflows/text_manipulator.py +204 -0
  33. droidrun/agent/planner/__init__.py +3 -3
  34. droidrun/agent/planner/events.py +6 -3
  35. droidrun/agent/planner/planner_agent.py +60 -53
  36. droidrun/agent/planner/prompts.py +2 -2
  37. droidrun/agent/usage.py +15 -13
  38. droidrun/agent/utils/__init__.py +11 -1
  39. droidrun/agent/utils/async_utils.py +2 -1
  40. droidrun/agent/utils/chat_utils.py +48 -60
  41. droidrun/agent/utils/device_state_formatter.py +177 -0
  42. droidrun/agent/utils/executer.py +13 -12
  43. droidrun/agent/utils/inference.py +114 -0
  44. droidrun/agent/utils/llm_picker.py +2 -0
  45. droidrun/agent/utils/message_utils.py +85 -0
  46. droidrun/agent/utils/tools.py +220 -0
  47. droidrun/agent/utils/trajectory.py +8 -7
  48. droidrun/cli/__init__.py +1 -1
  49. droidrun/cli/logs.py +29 -28
  50. droidrun/cli/main.py +279 -143
  51. droidrun/config_manager/__init__.py +25 -0
  52. droidrun/config_manager/config_manager.py +583 -0
  53. droidrun/macro/__init__.py +2 -2
  54. droidrun/macro/__main__.py +1 -1
  55. droidrun/macro/cli.py +36 -34
  56. droidrun/macro/replay.py +7 -9
  57. droidrun/portal.py +1 -1
  58. droidrun/telemetry/__init__.py +2 -2
  59. droidrun/telemetry/events.py +3 -4
  60. droidrun/telemetry/phoenix.py +173 -0
  61. droidrun/telemetry/tracker.py +7 -5
  62. droidrun/tools/__init__.py +1 -1
  63. droidrun/tools/adb.py +210 -82
  64. droidrun/tools/ios.py +7 -5
  65. droidrun/tools/tools.py +25 -8
  66. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
  67. droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
  68. droidrun/agent/common/default.py +0 -5
  69. droidrun/agent/context/reflection.py +0 -20
  70. droidrun/agent/oneflows/reflector.py +0 -265
  71. droidrun-0.3.8.dist-info/RECORD +0 -55
  72. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
  73. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
  74. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,118 @@
1
+ """
2
+ Simple workflow to open an app based on a description.
3
+ """
4
+
5
+ import json
6
+
7
+ from workflows import Context, Workflow, step
8
+ from workflows.events import StartEvent, StopEvent
9
+
10
+ from droidrun.tools.tools import Tools
11
+
12
+
13
+ class AppStarter(Workflow):
14
+ """
15
+ A simple workflow that opens an app based on a description.
16
+
17
+ The workflow uses an LLM to intelligently match the app description
18
+ to an installed app's package name, then opens it.
19
+ """
20
+
21
+ def __init__(self, tools: Tools, llm, timeout: int = 60, **kwargs):
22
+ """
23
+ Initialize the OpenAppWorkflow.
24
+
25
+ Args:
26
+ tools: An instance of Tools (e.g., AdbTools) to interact with the device
27
+ llm: An LLM instance (e.g., OpenAI) to determine which app to open
28
+ timeout: Workflow timeout in seconds (default: 60)
29
+ **kwargs: Additional arguments passed to Workflow
30
+ """
31
+ super().__init__(timeout=timeout, **kwargs)
32
+ self.tools = tools
33
+ self.llm = llm
34
+
35
+ @step
36
+ async def open_app_step(self, ev: StartEvent, ctx: Context) -> StopEvent:
37
+ """
38
+ Opens an app based on the provided description.
39
+
40
+ Expected StartEvent attributes:
41
+ - app_description (str): The name or description of the app to open
42
+
43
+ Returns:
44
+ StopEvent with the result of the open_app operation
45
+ """
46
+ app_description = ev.app_description
47
+
48
+ # Get list of installed apps
49
+ apps = self.tools.get_apps(include_system=True)
50
+
51
+ # Format apps list for LLM
52
+ apps_list = "\n".join([
53
+ f"- {app['label']} (package: {app['package']})"
54
+ for app in apps
55
+ ])
56
+
57
+ # Construct prompt for LLM
58
+ prompt = f"""Given the following list of installed apps and a user's description, determine which app package name to open.
59
+
60
+ Installed Apps:
61
+ {apps_list}
62
+
63
+ User's Request: "{app_description}"
64
+
65
+ Return ONLY a JSON object with the following structure:
66
+ {{
67
+ "package": "com.example.package"
68
+ }}
69
+
70
+ Choose the most appropriate app based on the description. Return the package name of the best match."""
71
+
72
+ # Get LLM response
73
+ response = await self.llm.acomplete(prompt)
74
+ response_text = str(response).strip()
75
+
76
+ # Parse JSON response - extract content between { and }
77
+ try:
78
+ start = response_text.find("{")
79
+ end = response_text.rfind("}") + 1
80
+ json_str = response_text[start:end]
81
+ result_json = json.loads(json_str)
82
+ package_name = result_json["package"]
83
+ except (json.JSONDecodeError, KeyError, ValueError) as e:
84
+ return StopEvent(result=f"Error parsing LLM response: {e}. Response: {response_text}")
85
+
86
+ # Open the selected app using the package name
87
+ result = self.tools.start_app(package_name)
88
+
89
+ return StopEvent(result=result)
90
+
91
+
92
+ # Example usage
93
+ async def main():
94
+ """
95
+ Example of how to use the OpenAppWorkflow.
96
+ """
97
+ from llama_index.llms.openai import OpenAI
98
+
99
+ from droidrun.tools.adb import AdbTools
100
+
101
+ # Initialize tools with device serial (None for default device)
102
+ tools = AdbTools(serial=None)
103
+
104
+ # Initialize LLM
105
+ llm = OpenAI(model="gpt-4o-mini")
106
+
107
+ # Create workflow instance
108
+ workflow = AppStarter(tools=tools, llm=llm, timeout=60, verbose=True)
109
+
110
+ # Run workflow to open an app
111
+ result = await workflow.run(app_description="Settings")
112
+
113
+ print(f"Result: {result}")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ import asyncio
118
+ asyncio.run(main())
@@ -0,0 +1,204 @@
1
+ '''CodeAct-style agent for text manipulation via constrained Python execution.
2
+
3
+ This agent receives two inputs:
4
+ - current_text: the current content of the focused text box
5
+ - task_instruction: a natural language instruction describing how to modify the text
6
+
7
+ It asks an LLM to produce Python code that:
8
+ - Uses ONLY a single provided function: input_text(text: str)
9
+ - Constructs the final text to type as a triple-quoted big string, assigned
10
+ to a variable of the model's choice (e.g., new_text = """...""")
11
+ - May reference the predefined variable ORIGINAL which contains the current text
12
+ from the text box
13
+ - Calls input_text(new_text) exactly once to clear the field and input the new text
14
+
15
+ The produced code is executed in a restricted sandbox exposing ONLY:
16
+ - ORIGINAL: str (the original text content)
17
+ - input_text: function (captures the final text; semantically clears and types)
18
+
19
+ If the generated code produces execution errors, the agent automatically sends the
20
+ stack trace back to the LLM for correction, with up to 3 retry attempts by default.
21
+ This enables iterative refinement of the generated code.
22
+
23
+ The agent returns the final text that should be entered into the text box and the
24
+ raw code produced by the model (potentially after corrections).
25
+ '''
26
+
27
+
28
+ import traceback
29
+
30
+ from llama_index.core.llms import ChatMessage
31
+ from llama_index.core.llms.llm import LLM
32
+
33
+ from droidrun.agent.utils.inference import call_with_retries
34
+ from droidrun.telemetry.phoenix import clean_span
35
+
36
+
37
+ @clean_span("text_manipulator")
38
+ def run_text_manipulation_agent(instruction: str, current_subgoal: str, current_text: str, overall_plan, hitorical_plan, llm: LLM, max_retries: int = 4) -> tuple[str, str]:
39
+ """Convenience function to run CodeAct text manipulation with error correction.
40
+
41
+ Args:
42
+ instruction: User's overall instruction
43
+ current_subgoal: Current subgoal to accomplish
44
+ current_text: The current content of the focused text field
45
+ overall_plan: Overall plan context
46
+ hitorical_plan: Historical progress
47
+ llm: LLM instance to use for text manipulation
48
+ max_retries: Maximum number of retry attempts if code execution fails
49
+
50
+ Returns:
51
+ Tuple of (final_text, raw_code) - the final text to input and the generated code
52
+ """
53
+ system_prompt = (
54
+ "You are CODEACT_TEXT_AGENT, a constrained Python code generator for editing text in an Android text box.\n"
55
+ "You will be given: (1) the current text in the focused text box as ORIGINAL, and (2) a TASK that describes how to modify it.\n\n"
56
+ "Your job is to output ONLY a single Python code block in ```python format that:\n"
57
+ "- Defines NO new functions, classes, or imports.\n"
58
+ "- Uses ONLY the provided function input_text(text: str).\n"
59
+ "- Builds the final content in a triple-quoted big string assigned to a variable of your choice, e.g.:\n"
60
+ " new_text = \"\"\"...\"\"\"\n"
61
+ "- Includes ORIGINAL in the new_text if needed to fulfill the TASK.\n"
62
+ "- Calls input_text(new_text) exactly once to clear the field and input the new content.\n\n"
63
+ "STRICT FORMAT RULES:\n"
64
+ "- Respond with ONLY a fenced Python code block: ```python\n<code>\n```\n"
65
+ "- Do NOT print anything. Do NOT use input().\n"
66
+ "- Do NOT import any modules. Do NOT define additional functions or classes.\n"
67
+ "- Do NOT access files, network, or system.\n"
68
+ "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters\n"
69
+ "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>\n{current_text}\n</ORIGINAL>\n"
70
+ f"""
71
+ <user_request>
72
+ {instruction}
73
+ </user_request>
74
+ <overall_plan>
75
+ {overall_plan}
76
+ </overall_plan>
77
+ <progress_status>
78
+ {hitorical_plan}
79
+ </progress_status>
80
+ <current_subgoal>
81
+ {current_subgoal}
82
+ </current_subgoal>
83
+ """
84
+ )
85
+
86
+ error_correction_prompt = (
87
+ "You are CODEACT_TEXT_AGENT, correcting your previous code that had execution errors.\n\n"
88
+ "The code you generated previously failed with this error:\n{error_message}\n\n"
89
+ "Please fix the code and output ONLY a new Python code block in ```python format.\n"
90
+ "Follow the same rules as before:\n"
91
+ "- Use ONLY the provided function input_text(text: str)\n"
92
+ "- Build the final content in a triple-quoted big string\n"
93
+ "- Include ORIGINAL in the new_text if needed\n"
94
+ "- Call input_text(new_text) exactly once\n"
95
+ "- Respond with ONLY a fenced Python code block\n"
96
+ "If you are unsure about the ORIGINAL, use it by referencing ORIGINAL variable so you dont make mistake with white space or new line characters"
97
+ "below is ORIGINAL use it by referencing ORIGINAL variable or directly typing it out:\n<ORIGINAL>{current_text}</ORIGINAL>\n"
98
+ )
99
+
100
+ user_prompt = (
101
+ "TASK:\n{task_instruction}\n\n"
102
+ "CURRENT TEXT (ORIGINAL):\n{current_text}\n\n"
103
+ "Write the Python code now."
104
+ ).format(
105
+ task_instruction=current_subgoal.strip(),
106
+ current_text=current_text,
107
+ )
108
+
109
+ messages = [ChatMessage(role="system", content=system_prompt.format(overall_plan=overall_plan, hitorical_plan=hitorical_plan, current_subgoal=current_subgoal, instruction=instruction, current_text=current_text)), ChatMessage(role="user", content=user_prompt)]
110
+
111
+ for attempt in range(max_retries + 1): # +1 for initial attempt
112
+ # Call the LLM with current messages
113
+ response_message = call_with_retries(llm, messages).message
114
+ content = response_message.content
115
+ messages.append(response_message)
116
+
117
+ # Extract code from ```python blocks
118
+ code = _extract_python_code(content)
119
+ if not code:
120
+ # Fallback: if no code block found, use entire response as code
121
+ code = content.strip()
122
+
123
+ # Execute the code in a sandbox
124
+ final_text, error_message = _execute_sandbox(code, current_text)
125
+
126
+ # If successful (no error), return the result
127
+ if not error_message:
128
+ return final_text, code
129
+
130
+ # If this was the last attempt, return what we have
131
+ if attempt == max_retries:
132
+ return final_text, code
133
+
134
+ # Add error correction message to conversation
135
+ correction_message = error_correction_prompt.format(error_message=error_message)
136
+ messages.append(ChatMessage(role="user", content=correction_message))
137
+
138
+ # This should never be reached, but just in case
139
+ return current_text, ""
140
+
141
+
142
+ def _extract_python_code(text: str) -> str:
143
+ """Extract Python code from ```python fenced blocks using simple string operations."""
144
+ if not text:
145
+ return ""
146
+
147
+ # Try different variations of code block markers
148
+ patterns = [
149
+ # ```python with newlines
150
+ ("```python\n", "\n```"),
151
+ # ```python without newlines
152
+ ("```python", "```"),
153
+ # Generic ``` with newlines
154
+ ("```\n", "\n```"),
155
+ # Generic ``` without newlines
156
+ ("```", "```"),
157
+ ]
158
+
159
+ for start_marker, end_marker in patterns:
160
+ if start_marker in text and end_marker in text:
161
+ # Find the start position after the marker
162
+ start_idx = text.find(start_marker) + len(start_marker)
163
+ # Find the end position before the marker
164
+ end_idx = text.find(end_marker, start_idx)
165
+ if end_idx != -1:
166
+ code = text[start_idx:end_idx].strip()
167
+ # Only return if we actually extracted some code
168
+ if code:
169
+ return code
170
+
171
+ return ""
172
+
173
+
174
+ def _execute_sandbox(code: str, original_text: str) -> tuple[str, str]:
175
+ """Execute model code in a locked-down environment with exec().
176
+
177
+ Returns:
178
+ Tuple of (result_text, error_message) - result_text is the final text if successful,
179
+ or original_text if failed. error_message is the stack trace if execution failed,
180
+ or empty string if successful.
181
+ """
182
+ if not code:
183
+ return original_text, ""
184
+
185
+ captured = {"value": None}
186
+
187
+ def input_text(text: str) -> None:
188
+ """Capture the final text to be input."""
189
+ captured["value"] = text
190
+
191
+ # Create restricted environment
192
+ sandbox_globals = {
193
+ "__builtins__": {}, # Empty builtins for security
194
+ "input_text": input_text,
195
+ "ORIGINAL": original_text
196
+ }
197
+ sandbox_locals = {}
198
+
199
+ try:
200
+ exec(code, sandbox_globals, sandbox_locals)
201
+ return captured["value"] if captured["value"] is not None else original_text, ""
202
+ except Exception:
203
+ error_message = traceback.format_exc()
204
+ return original_text, error_message
@@ -1,13 +1,13 @@
1
1
  from droidrun.agent.planner.planner_agent import PlannerAgent
2
2
  from droidrun.agent.planner.prompts import (
3
3
  DEFAULT_PLANNER_SYSTEM_PROMPT,
4
+ DEFAULT_PLANNER_TASK_FAILED_PROMPT,
4
5
  DEFAULT_PLANNER_USER_PROMPT,
5
- DEFAULT_PLANNER_TASK_FAILED_PROMPT
6
6
  )
7
7
 
8
8
  __all__ = [
9
- "PlannerAgent",
9
+ "PlannerAgent",
10
10
  "DEFAULT_PLANNER_SYSTEM_PROMPT",
11
11
  "DEFAULT_PLANNER_USER_PROMPT",
12
12
  "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
13
- ]
13
+ ]
@@ -1,16 +1,19 @@
1
- from llama_index.core.workflow import Event
1
+ from typing import Optional
2
+
2
3
  from llama_index.core.base.llms.types import ChatMessage
3
- from typing import Optional, Any
4
+ from llama_index.core.workflow import Event
5
+
4
6
  from droidrun.agent.context import Task
5
7
  from droidrun.agent.usage import UsageResult
6
8
 
9
+
7
10
  class PlanInputEvent(Event):
8
11
  input: list[ChatMessage]
9
12
 
10
13
 
11
14
  class PlanThinkingEvent(Event):
12
15
  thoughts: Optional[str] = None
13
- code: Optional[str] = None
16
+ code: Optional[str] = None
14
17
  usage: Optional[UsageResult] = None
15
18
 
16
19
 
@@ -1,33 +1,31 @@
1
- from droidrun.agent.planner.events import *
2
- from droidrun.agent.planner.prompts import (
3
- DEFAULT_PLANNER_SYSTEM_PROMPT,
4
- DEFAULT_PLANNER_USER_PROMPT,
5
- )
6
- import logging
7
1
  import asyncio
8
- from typing import List, TYPE_CHECKING, Union
9
- import inspect
2
+ import logging
3
+ from typing import TYPE_CHECKING, List, Union
4
+
5
+ from dotenv import load_dotenv
10
6
  from llama_index.core.base.llms.types import ChatMessage, ChatResponse
11
- from llama_index.core.prompts import PromptTemplate
12
7
  from llama_index.core.llms.llm import LLM
13
- from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
14
8
  from llama_index.core.memory import Memory
15
- from llama_index.core.llms.llm import LLM
16
- from droidrun.agent.usage import get_usage_from_response
17
- from droidrun.agent.utils.executer import SimpleCodeExecutor
18
- from droidrun.agent.utils import chat_utils
9
+ from llama_index.core.prompts import PromptTemplate
10
+ from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
11
+
12
+ from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
13
+ from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
14
+ from droidrun.agent.context.agent_persona import AgentPersona
19
15
  from droidrun.agent.context.task_manager import TaskManager
20
- from droidrun.tools import Tools
21
- from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
22
16
  from droidrun.agent.planner.events import (
23
- PlanInputEvent,
24
17
  PlanCreatedEvent,
18
+ PlanInputEvent,
25
19
  PlanThinkingEvent,
26
20
  )
27
- from droidrun.agent.context.agent_persona import AgentPersona
28
- from droidrun.agent.context.reflection import Reflection
29
-
30
- from dotenv import load_dotenv
21
+ from droidrun.agent.planner.prompts import (
22
+ DEFAULT_PLANNER_SYSTEM_PROMPT,
23
+ DEFAULT_PLANNER_USER_PROMPT,
24
+ )
25
+ from droidrun.agent.usage import get_usage_from_response
26
+ from droidrun.agent.utils import chat_utils
27
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
28
+ from droidrun.tools import Tools
31
29
 
32
30
  load_dotenv()
33
31
 
@@ -63,7 +61,6 @@ class PlannerAgent(Workflow):
63
61
 
64
62
  self.chat_memory = None
65
63
  self.remembered_info = None
66
- self.reflection: Reflection = None
67
64
 
68
65
  self.current_retry = 0
69
66
  self.steps_counter = 0
@@ -97,7 +94,7 @@ class PlannerAgent(Workflow):
97
94
  async def prepare_chat(self, ctx: Context, ev: StartEvent) -> PlanInputEvent:
98
95
  logger.info("💬 Preparing planning session...")
99
96
 
100
- self.chat_memory: Memory = await ctx.get(
97
+ self.chat_memory: Memory = await ctx.store.get(
101
98
  "chat_memory", default=Memory.from_defaults()
102
99
  )
103
100
  await self.chat_memory.aput(self.user_message)
@@ -105,15 +102,10 @@ class PlannerAgent(Workflow):
105
102
  if ev.remembered_info:
106
103
  self.remembered_info = ev.remembered_info
107
104
 
108
- if ev.reflection:
109
- self.reflection = ev.reflection
110
- else:
111
- self.reflection = None
112
-
113
105
  assert len(self.chat_memory.get_all()) > 0 or self.user_prompt, "Memory input, user prompt or user input cannot be empty."
114
-
106
+
115
107
  await self.chat_memory.aput(ChatMessage(role="user", content=PromptTemplate(self.user_prompt or DEFAULT_PLANNER_USER_PROMPT.format(goal=self.goal))))
116
-
108
+
117
109
  input_messages = self.chat_memory.get_all()
118
110
  logger.debug(f" - Memory contains {len(input_messages)} messages")
119
111
  return PlanInputEvent(input=input_messages)
@@ -129,24 +121,23 @@ class PlannerAgent(Workflow):
129
121
  ctx.write_event_to_stream(ev)
130
122
 
131
123
  self.steps_counter += 1
132
- logger.info(f"🧠 Thinking about how to plan the goal...")
124
+ logger.info("🧠 Thinking about how to plan the goal...")
133
125
 
134
126
  if self.vision:
135
127
  screenshot = (self.tools_instance.take_screenshot())[1]
136
128
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
137
- await ctx.set("screenshot", screenshot)
129
+ await ctx.store.set("screenshot", screenshot)
138
130
 
139
131
  try:
140
132
  state = self.tools_instance.get_state()
141
- await ctx.set("ui_state", state["a11y_tree"])
142
- await ctx.set("phone_state", state["phone_state"])
133
+ await ctx.store.set("ui_state", state["a11y_tree"])
134
+ await ctx.store.set("phone_state", state["phone_state"])
143
135
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
144
- except Exception as e:
145
- logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
136
+ except Exception:
137
+ logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
146
138
 
147
139
 
148
- await ctx.set("remembered_info", self.remembered_info)
149
- await ctx.set("reflection", self.reflection)
140
+ await ctx.store.set("remembered_info", self.remembered_info)
150
141
 
151
142
  response = await self._get_llm_response(ctx, chat_history)
152
143
  try:
@@ -169,18 +160,17 @@ class PlannerAgent(Workflow):
169
160
  """Handle LLM output."""
170
161
  logger.debug("🤖 Processing planning output...")
171
162
  code = ev.code
172
- thoughts = ev.thoughts
173
163
 
174
164
  if code:
175
165
  try:
176
166
  result = await self.executer.execute(ctx, code)
177
- logger.info(f"📝 Planning complete")
167
+ logger.info("📝 Planning complete")
178
168
  logger.debug(f" - Planning code executed. Result: {result['output']}")
179
169
 
180
170
  screenshots = result['screenshots']
181
171
  for screenshot in screenshots[:-1]: # the last screenshot will be captured by next step
182
172
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
183
-
173
+
184
174
  ui_states = result['ui_states']
185
175
  for ui_state in ui_states[:-1]:
186
176
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
@@ -237,7 +227,7 @@ wrap your code inside this:
237
227
  @step
238
228
  async def finalize(self, ev: PlanCreatedEvent, ctx: Context) -> StopEvent:
239
229
  """Finalize the workflow."""
240
- await ctx.set("chat_memory", self.chat_memory)
230
+ await ctx.store.set("chat_memory", self.chat_memory)
241
231
 
242
232
  result = {}
243
233
  result.update(
@@ -256,15 +246,15 @@ wrap your code inside this:
256
246
  logger.debug(f" - Sending {len(chat_history)} messages to LLM.")
257
247
 
258
248
  model = self.llm.class_name()
259
- if self.vision == True:
249
+ if self.vision:
260
250
  if model == "DeepSeek":
261
251
  logger.warning(
262
252
  "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
263
253
  )
264
254
  else:
265
255
  chat_history = await chat_utils.add_screenshot_image_block(
266
- await ctx.get("screenshot"), chat_history
267
- )
256
+ await ctx.store.get("screenshot"), chat_history
257
+ )
268
258
 
269
259
 
270
260
 
@@ -275,18 +265,15 @@ wrap your code inside this:
275
265
  chat_history,
276
266
  )
277
267
 
278
- remembered_info = await ctx.get("remembered_info", default=None)
268
+ remembered_info = await ctx.store.get("remembered_info", default=None)
279
269
  if remembered_info:
280
270
  chat_history = await chat_utils.add_memory_block(remembered_info, chat_history)
281
271
 
282
- reflection = await ctx.get("reflection", None)
283
- if reflection:
284
- chat_history = await chat_utils.add_reflection_summary(reflection, chat_history)
285
-
286
- chat_history = await chat_utils.add_phone_state_block(await ctx.get("phone_state"), chat_history)
287
- chat_history = await chat_utils.add_ui_text_block(await ctx.get("ui_state"), chat_history)
272
+ chat_history = await chat_utils.add_phone_state_block(await ctx.store.get("phone_state"), chat_history)
273
+ chat_history = await chat_utils.add_ui_text_block(await ctx.store.get("ui_state"), chat_history)
288
274
 
289
- messages_to_send = [self.system_message] + chat_history
275
+ limited_history = self._limit_history(chat_history)
276
+ messages_to_send = [self.system_message] + limited_history
290
277
  messages_to_send = [
291
278
  chat_utils.message_copy(msg) for msg in messages_to_send
292
279
  ]
@@ -302,3 +289,23 @@ wrap your code inside this:
302
289
  except Exception as e:
303
290
  logger.error(f"Could not get an answer from LLM: {repr(e)}")
304
291
  raise e
292
+
293
+ def _limit_history(
294
+ self, chat_history: List[ChatMessage]
295
+ ) -> List[ChatMessage]:
296
+ if LLM_HISTORY_LIMIT <= 0:
297
+ return chat_history
298
+
299
+ max_messages = LLM_HISTORY_LIMIT * 2
300
+ if len(chat_history) <= max_messages:
301
+ return chat_history
302
+
303
+ preserved_head: List[ChatMessage] = []
304
+ if chat_history and chat_history[0].role == "user":
305
+ preserved_head = [chat_history[0]]
306
+
307
+ tail = chat_history[-max_messages:]
308
+ if preserved_head and preserved_head[0] in tail:
309
+ preserved_head = []
310
+
311
+ return preserved_head + tail
@@ -119,6 +119,6 @@ Instruction: Based **only** on the provided screenshot showing the current state
119
119
  # Export all prompts
120
120
  __all__ = [
121
121
  "DEFAULT_PLANNER_SYSTEM_PROMPT",
122
- "DEFAULT_PLANNER_USER_PROMPT",
122
+ "DEFAULT_PLANNER_USER_PROMPT",
123
123
  "DEFAULT_PLANNER_TASK_FAILED_PROMPT"
124
- ]
124
+ ]
droidrun/agent/usage.py CHANGED
@@ -1,18 +1,20 @@
1
1
  import contextlib
2
- from llama_index.core.callbacks import CallbackManager
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+ from uuid import uuid4
5
+
3
6
  from llama_index.core.callbacks.base_handler import BaseCallbackHandler
4
7
  from llama_index.core.callbacks.schema import CBEventType, EventPayload
5
8
  from llama_index.core.llms import LLM, ChatResponse
6
9
  from pydantic import BaseModel
7
- from typing import Any, Dict, List, Optional
8
- from uuid import uuid4
9
- import logging
10
10
 
11
11
  logger = logging.getLogger("droidrun")
12
12
  SUPPORTED_PROVIDERS = [
13
13
  "Gemini",
14
14
  "GoogleGenAI",
15
+ "GenAI",
15
16
  "OpenAI",
17
+ "openai_llm",
16
18
  "Anthropic",
17
19
  "Ollama",
18
20
  "DeepSeek",
@@ -32,14 +34,14 @@ def get_usage_from_response(provider: str, chat_rsp: ChatResponse) -> UsageResul
32
34
 
33
35
  print(f"rsp: {rsp.__class__.__name__}")
34
36
 
35
- if provider == "Gemini" or provider == "GoogleGenAI":
37
+ if provider == "Gemini" or provider == "GoogleGenAI" or provider == "GenAI":
36
38
  return UsageResult(
37
39
  request_tokens=rsp["usage_metadata"]["prompt_token_count"],
38
40
  response_tokens=rsp["usage_metadata"]["candidates_token_count"],
39
41
  total_tokens=rsp["usage_metadata"]["total_token_count"],
40
42
  requests=1,
41
43
  )
42
- elif provider == "OpenAI":
44
+ elif provider == "OpenAI" or provider == "openai_llm":
43
45
  from openai.types import CompletionUsage as OpenAIUsage
44
46
 
45
47
  usage: OpenAIUsage = rsp.usage
@@ -109,7 +111,7 @@ class TokenCountingHandler(BaseCallbackHandler):
109
111
  )
110
112
 
111
113
  def _get_event_usage(self, payload: Dict[str, Any]) -> UsageResult:
112
- if not EventPayload.RESPONSE in payload:
114
+ if EventPayload.RESPONSE not in payload:
113
115
  raise ValueError("No response in payload")
114
116
 
115
117
  chat_rsp: ChatResponse = payload.get(EventPayload.RESPONSE)
@@ -178,26 +180,26 @@ def create_tracker(llm: LLM) -> TokenCountingHandler:
178
180
 
179
181
  def track_usage(llm: LLM) -> TokenCountingHandler:
180
182
  """Track token usage for an LLM instance across all requests.
181
-
183
+
182
184
  This function:
183
185
  - Creates a new TokenCountingHandler for the LLM provider
184
186
  - Registers that handler as an LLM callback to monitor all requests
185
187
  - Returns the handler for accessing cumulative usage statistics
186
-
188
+
187
189
  The handler counts tokens for total LLM usage across all requests. For fine-grained
188
190
  per-request counting, use either:
189
191
  - `create_tracker()` with `llm_callback()` context manager for temporary tracking
190
192
  - `get_usage_from_response()` to extract usage from individual responses
191
-
193
+
192
194
  Args:
193
195
  llm: The LLamaIndex LLM instance to track usage for
194
-
196
+
195
197
  Returns:
196
198
  TokenCountingHandler: The registered handler that accumulates usage statistics
197
-
199
+
198
200
  Raises:
199
201
  ValueError: If the LLM provider is not supported for tracking
200
-
202
+
201
203
  Example:
202
204
  >>> llm = OpenAI()
203
205
  >>> tracker = track_usage(llm)