droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +22 -10
- droidrun/__main__.py +1 -2
- droidrun/adb/__init__.py +3 -3
- droidrun/adb/device.py +2 -2
- droidrun/adb/manager.py +2 -2
- droidrun/agent/__init__.py +5 -15
- droidrun/agent/codeact/__init__.py +11 -0
- droidrun/agent/codeact/codeact_agent.py +420 -0
- droidrun/agent/codeact/events.py +28 -0
- droidrun/agent/codeact/prompts.py +26 -0
- droidrun/agent/common/default.py +5 -0
- droidrun/agent/common/events.py +4 -0
- droidrun/agent/context/__init__.py +23 -0
- droidrun/agent/context/agent_persona.py +15 -0
- droidrun/agent/context/context_injection_manager.py +66 -0
- droidrun/agent/context/episodic_memory.py +15 -0
- droidrun/agent/context/personas/__init__.py +11 -0
- droidrun/agent/context/personas/app_starter.py +44 -0
- droidrun/agent/context/personas/default.py +95 -0
- droidrun/agent/context/personas/extractor.py +52 -0
- droidrun/agent/context/personas/ui_expert.py +107 -0
- droidrun/agent/context/reflection.py +20 -0
- droidrun/agent/context/task_manager.py +124 -0
- droidrun/agent/context/todo.txt +4 -0
- droidrun/agent/droid/__init__.py +13 -0
- droidrun/agent/droid/droid_agent.py +357 -0
- droidrun/agent/droid/events.py +28 -0
- droidrun/agent/oneflows/reflector.py +265 -0
- droidrun/agent/planner/__init__.py +13 -0
- droidrun/agent/planner/events.py +16 -0
- droidrun/agent/planner/planner_agent.py +268 -0
- droidrun/agent/planner/prompts.py +124 -0
- droidrun/agent/utils/__init__.py +3 -0
- droidrun/agent/utils/async_utils.py +17 -0
- droidrun/agent/utils/chat_utils.py +312 -0
- droidrun/agent/utils/executer.py +132 -0
- droidrun/agent/utils/llm_picker.py +147 -0
- droidrun/agent/utils/trajectory.py +184 -0
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +283 -0
- droidrun/cli/main.py +358 -149
- droidrun/run.py +105 -0
- droidrun/tools/__init__.py +4 -30
- droidrun/tools/adb.py +879 -0
- droidrun/tools/ios.py +594 -0
- droidrun/tools/tools.py +99 -0
- droidrun-0.3.0.dist-info/METADATA +149 -0
- droidrun-0.3.0.dist-info/RECORD +52 -0
- droidrun/agent/llm_reasoning.py +0 -567
- droidrun/agent/react_agent.py +0 -556
- droidrun/llm/__init__.py +0 -24
- droidrun/tools/actions.py +0 -854
- droidrun/tools/device.py +0 -29
- droidrun-0.1.0.dist-info/METADATA +0 -276
- droidrun-0.1.0.dist-info/RECORD +0 -20
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
- {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py
CHANGED
@@ -2,18 +2,30 @@
|
|
2
2
|
DroidRun - A framework for controlling Android devices through LLM agents.
|
3
3
|
"""
|
4
4
|
|
5
|
-
__version__ = "0.
|
5
|
+
__version__ = "0.3.0"
|
6
6
|
|
7
7
|
# Import main classes for easier access
|
8
|
-
from droidrun.agent.
|
9
|
-
from droidrun.agent.
|
10
|
-
from droidrun.
|
8
|
+
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
9
|
+
from droidrun.agent.planner.planner_agent import PlannerAgent
|
10
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
11
|
+
from droidrun.agent.utils.llm_picker import load_llm
|
12
|
+
from droidrun.adb.manager import DeviceManager
|
13
|
+
from droidrun.tools.tools import Tools
|
14
|
+
from droidrun.tools.adb import AdbTools
|
15
|
+
from droidrun.tools.ios import IOSTools
|
16
|
+
from droidrun.agent.droid import DroidAgent
|
17
|
+
|
11
18
|
|
12
19
|
# Make main components available at package level
|
13
20
|
__all__ = [
|
14
|
-
"
|
15
|
-
"
|
16
|
-
"
|
17
|
-
"
|
18
|
-
"
|
19
|
-
|
21
|
+
"DroidAgent",
|
22
|
+
"CodeActAgent",
|
23
|
+
"PlannerAgent",
|
24
|
+
"DeviceManager",
|
25
|
+
"Tools",
|
26
|
+
"load_llm",
|
27
|
+
"SimpleCodeExecutor",
|
28
|
+
"Tools",
|
29
|
+
"AdbTools",
|
30
|
+
"IOSTools",
|
31
|
+
]
|
droidrun/__main__.py
CHANGED
droidrun/adb/__init__.py
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
ADB Package - Android Debug Bridge functionality.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from .device import Device
|
6
|
-
from .manager import DeviceManager
|
7
|
-
from .wrapper import ADBWrapper
|
5
|
+
from droidrun.adb.device import Device
|
6
|
+
from droidrun.adb.manager import DeviceManager
|
7
|
+
from droidrun.adb.wrapper import ADBWrapper
|
8
8
|
|
9
9
|
__all__ = [
|
10
10
|
'Device',
|
droidrun/adb/device.py
CHANGED
@@ -8,7 +8,7 @@ import time
|
|
8
8
|
import random
|
9
9
|
import string
|
10
10
|
from typing import Dict, Optional, Tuple, List
|
11
|
-
from .wrapper import ADBWrapper
|
11
|
+
from droidrun.adb.wrapper import ADBWrapper
|
12
12
|
|
13
13
|
class Device:
|
14
14
|
"""High-level representation of an Android device."""
|
@@ -264,7 +264,7 @@ class Device:
|
|
264
264
|
|
265
265
|
import logging
|
266
266
|
logger = logging.getLogger("droidrun")
|
267
|
-
logger.
|
267
|
+
logger.debug(
|
268
268
|
f"Screenshot compressed successfully: {png_size:.1f}KB → {jpg_size:.1f}KB ({reduction:.1f}% reduction)"
|
269
269
|
)
|
270
270
|
|
droidrun/adb/manager.py
CHANGED
@@ -3,8 +3,8 @@ Device Manager - Manages Android device connections.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from typing import Dict, List, Optional
|
6
|
-
from .wrapper import ADBWrapper
|
7
|
-
from .device import Device
|
6
|
+
from droidrun.adb.wrapper import ADBWrapper
|
7
|
+
from droidrun.adb.device import Device
|
8
8
|
|
9
9
|
class DeviceManager:
|
10
10
|
"""Manages Android device connections."""
|
droidrun/agent/__init__.py
CHANGED
@@ -1,16 +1,6 @@
|
|
1
|
-
|
2
|
-
Droidrun Agent Module.
|
1
|
+
#import logging
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
from .llm_reasoning import LLMReasoner
|
9
|
-
|
10
|
-
__all__ = [
|
11
|
-
"ReActAgent",
|
12
|
-
"ReActStep",
|
13
|
-
"ReActStepType",
|
14
|
-
"run_agent",
|
15
|
-
"LLMReasoner",
|
16
|
-
]
|
3
|
+
#logger = logging.getLogger("droidrun")
|
4
|
+
#logger.propagate = False # Don't send to root logger
|
5
|
+
#logger.handlers = [] # No handlers by default
|
6
|
+
#logger.setLevel(logging.INFO) # Or WARNING
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
2
|
+
from droidrun.agent.codeact.prompts import (
|
3
|
+
DEFAULT_CODE_ACT_USER_PROMPT,
|
4
|
+
DEFAULT_NO_THOUGHTS_PROMPT
|
5
|
+
)
|
6
|
+
|
7
|
+
__all__ = [
|
8
|
+
"CodeActAgent",
|
9
|
+
"DEFAULT_CODE_ACT_USER_PROMPT",
|
10
|
+
"DEFAULT_NO_THOUGHTS_PROMPT"
|
11
|
+
]
|
@@ -0,0 +1,420 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
import time
|
4
|
+
import asyncio
|
5
|
+
import json
|
6
|
+
import os
|
7
|
+
from typing import List, Optional, Tuple, Union
|
8
|
+
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
9
|
+
from llama_index.core.prompts import PromptTemplate
|
10
|
+
from llama_index.core.llms.llm import LLM
|
11
|
+
from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
|
12
|
+
from llama_index.core.memory import Memory
|
13
|
+
from droidrun.agent.codeact.events import (
|
14
|
+
TaskInputEvent,
|
15
|
+
TaskEndEvent,
|
16
|
+
TaskExecutionEvent,
|
17
|
+
TaskExecutionResultEvent,
|
18
|
+
TaskThinkingEvent,
|
19
|
+
EpisodicMemoryEvent,
|
20
|
+
)
|
21
|
+
from droidrun.agent.common.events import ScreenshotEvent
|
22
|
+
from droidrun.agent.utils import chat_utils
|
23
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
24
|
+
from droidrun.agent.codeact.prompts import (
|
25
|
+
DEFAULT_CODE_ACT_USER_PROMPT,
|
26
|
+
DEFAULT_NO_THOUGHTS_PROMPT,
|
27
|
+
)
|
28
|
+
|
29
|
+
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
30
|
+
from droidrun.tools import Tools
|
31
|
+
from typing import Optional, Dict, Tuple, List, Any, Callable
|
32
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
33
|
+
|
34
|
+
logger = logging.getLogger("droidrun")
|
35
|
+
|
36
|
+
|
37
|
+
class CodeActAgent(Workflow):
|
38
|
+
"""
|
39
|
+
An agent that uses a ReAct-like cycle (Thought -> Code -> Observation)
|
40
|
+
to solve problems requiring code execution. It extracts code from
|
41
|
+
Markdown blocks and uses specific step types for tracking.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
llm: LLM,
|
47
|
+
persona: AgentPersona,
|
48
|
+
tools_instance: "Tools",
|
49
|
+
all_tools_list: Dict[str, Callable[..., Any]],
|
50
|
+
max_steps: int = 5,
|
51
|
+
debug: bool = False,
|
52
|
+
*args,
|
53
|
+
**kwargs,
|
54
|
+
):
|
55
|
+
# assert instead of if
|
56
|
+
assert llm, "llm must be provided."
|
57
|
+
super().__init__(*args, **kwargs)
|
58
|
+
|
59
|
+
self.llm = llm
|
60
|
+
self.max_steps = max_steps
|
61
|
+
|
62
|
+
self.user_prompt = persona.user_prompt
|
63
|
+
self.no_thoughts_prompt = None
|
64
|
+
|
65
|
+
self.chat_memory = None
|
66
|
+
self.episodic_memory = EpisodicMemory(persona=persona)
|
67
|
+
self.remembered_info = None
|
68
|
+
|
69
|
+
self.goal = None
|
70
|
+
self.steps_counter = 0
|
71
|
+
self.code_exec_counter = 0
|
72
|
+
self.debug = debug
|
73
|
+
|
74
|
+
self.tools = tools_instance
|
75
|
+
|
76
|
+
self.tool_list = {}
|
77
|
+
|
78
|
+
for tool_name in persona.allowed_tools:
|
79
|
+
if tool_name in all_tools_list:
|
80
|
+
self.tool_list[tool_name] = all_tools_list[tool_name]
|
81
|
+
|
82
|
+
self.tool_descriptions = chat_utils.parse_tool_descriptions(self.tool_list)
|
83
|
+
|
84
|
+
self.system_prompt_content = persona.system_prompt.format(
|
85
|
+
tool_descriptions=self.tool_descriptions
|
86
|
+
)
|
87
|
+
self.system_prompt = ChatMessage(
|
88
|
+
role="system", content=self.system_prompt_content
|
89
|
+
)
|
90
|
+
|
91
|
+
self.required_context = persona.required_context
|
92
|
+
|
93
|
+
self.executor = SimpleCodeExecutor(
|
94
|
+
loop=asyncio.get_event_loop(),
|
95
|
+
locals={},
|
96
|
+
tools=self.tool_list,
|
97
|
+
globals={"__builtins__": __builtins__},
|
98
|
+
)
|
99
|
+
|
100
|
+
logger.info("✅ CodeActAgent initialized successfully.")
|
101
|
+
|
102
|
+
@step
|
103
|
+
async def prepare_chat(self, ctx: Context, ev: StartEvent) -> TaskInputEvent:
|
104
|
+
"""Prepare chat history from user input."""
|
105
|
+
logger.info("💬 Preparing chat for task execution...")
|
106
|
+
|
107
|
+
self.chat_memory: Memory = await ctx.get(
|
108
|
+
"chat_memory", default=Memory.from_defaults()
|
109
|
+
)
|
110
|
+
|
111
|
+
user_input = ev.get("input", default=None)
|
112
|
+
assert user_input, "User input cannot be empty."
|
113
|
+
|
114
|
+
if ev.remembered_info:
|
115
|
+
self.remembered_info = ev.remembered_info
|
116
|
+
|
117
|
+
logger.debug(" - Adding goal to memory.")
|
118
|
+
goal = user_input
|
119
|
+
self.user_message = ChatMessage(
|
120
|
+
role="user",
|
121
|
+
content=PromptTemplate(
|
122
|
+
self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT
|
123
|
+
).format(goal=goal),
|
124
|
+
)
|
125
|
+
self.no_thoughts_prompt = ChatMessage(
|
126
|
+
role="user",
|
127
|
+
content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
await self.chat_memory.aput(self.user_message)
|
132
|
+
|
133
|
+
await ctx.set("chat_memory", self.chat_memory)
|
134
|
+
input_messages = self.chat_memory.get_all()
|
135
|
+
return TaskInputEvent(input=input_messages)
|
136
|
+
|
137
|
+
@step
|
138
|
+
async def handle_llm_input(
|
139
|
+
self, ctx: Context, ev: TaskInputEvent
|
140
|
+
) -> TaskThinkingEvent | TaskEndEvent:
|
141
|
+
"""Handle LLM input."""
|
142
|
+
chat_history = ev.input
|
143
|
+
assert len(chat_history) > 0, "Chat history cannot be empty."
|
144
|
+
ctx.write_event_to_stream(ev)
|
145
|
+
|
146
|
+
if self.steps_counter >= self.max_steps:
|
147
|
+
ev = TaskEndEvent(
|
148
|
+
success=False,
|
149
|
+
reason=f"Reached max step count of {self.max_steps} steps",
|
150
|
+
)
|
151
|
+
ctx.write_event_to_stream(ev)
|
152
|
+
return ev
|
153
|
+
|
154
|
+
self.steps_counter += 1
|
155
|
+
logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
|
156
|
+
|
157
|
+
model = self.llm.class_name()
|
158
|
+
|
159
|
+
if "remember" in self.tool_list and self.remembered_info:
|
160
|
+
await ctx.set("remembered_info", self.remembered_info)
|
161
|
+
chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
|
162
|
+
|
163
|
+
for context in self.required_context:
|
164
|
+
if context == "screenshot" and model != "DeepSeek":
|
165
|
+
screenshot = (await self.tools.take_screenshot())[1]
|
166
|
+
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
167
|
+
|
168
|
+
await ctx.set("screenshot", screenshot)
|
169
|
+
chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
|
170
|
+
|
171
|
+
if context == "phone_state":
|
172
|
+
chat_history = await chat_utils.add_phone_state_block(await self.tools.get_phone_state(), chat_history)
|
173
|
+
|
174
|
+
if context == "ui_state":
|
175
|
+
ui_state = await self.tools.get_clickables()
|
176
|
+
await ctx.set("ui_state", ui_state)
|
177
|
+
chat_history = await chat_utils.add_ui_text_block(
|
178
|
+
ui_state, chat_history
|
179
|
+
)
|
180
|
+
|
181
|
+
if context == "packages":
|
182
|
+
chat_history = await chat_utils.add_packages_block(
|
183
|
+
await self.tools.list_packages(include_system_apps=True),
|
184
|
+
chat_history,
|
185
|
+
)
|
186
|
+
|
187
|
+
response = await self._get_llm_response(ctx, chat_history)
|
188
|
+
if response is None:
|
189
|
+
return TaskEndEvent(
|
190
|
+
success=False, reason="LLM response is None. This is a critical error."
|
191
|
+
)
|
192
|
+
|
193
|
+
await self.chat_memory.aput(response.message)
|
194
|
+
|
195
|
+
code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
|
196
|
+
|
197
|
+
event = TaskThinkingEvent(thoughts=thoughts, code=code)
|
198
|
+
ctx.write_event_to_stream(event)
|
199
|
+
return event
|
200
|
+
|
201
|
+
@step
|
202
|
+
async def handle_llm_output(
|
203
|
+
self, ctx: Context, ev: TaskThinkingEvent
|
204
|
+
) -> Union[TaskExecutionEvent, TaskInputEvent]:
|
205
|
+
"""Handle LLM output."""
|
206
|
+
logger.debug("⚙️ Handling LLM output...")
|
207
|
+
code = ev.code
|
208
|
+
thoughts = ev.thoughts
|
209
|
+
|
210
|
+
if not thoughts:
|
211
|
+
logger.warning(
|
212
|
+
"🤔 LLM provided code without thoughts. Adding reminder prompt."
|
213
|
+
)
|
214
|
+
await self.chat_memory.aput(self.no_thoughts_prompt)
|
215
|
+
else:
|
216
|
+
logger.info(f"🤔 Reasoning: {thoughts}")
|
217
|
+
|
218
|
+
if code:
|
219
|
+
return TaskExecutionEvent(code=code)
|
220
|
+
else:
|
221
|
+
message = ChatMessage(
|
222
|
+
role="user",
|
223
|
+
content="No code was provided. If you want to mark task as complete (whether it failed or succeeded), use complete(success:bool, reason:str) function within a code block ```pythn\n```.",
|
224
|
+
)
|
225
|
+
await self.chat_memory.aput(message)
|
226
|
+
return TaskInputEvent(input=self.chat_memory.get_all())
|
227
|
+
|
228
|
+
@step
|
229
|
+
async def execute_code(
|
230
|
+
self, ctx: Context, ev: TaskExecutionEvent
|
231
|
+
) -> Union[TaskExecutionResultEvent, TaskEndEvent]:
|
232
|
+
"""Execute the code and return the result."""
|
233
|
+
code = ev.code
|
234
|
+
assert code, "Code cannot be empty."
|
235
|
+
logger.info(f"⚡ Executing action...")
|
236
|
+
logger.debug(f"Code to execute:\n```python\n{code}\n```")
|
237
|
+
|
238
|
+
try:
|
239
|
+
self.code_exec_counter += 1
|
240
|
+
result = await self.executor.execute(ctx, code)
|
241
|
+
logger.info(f"💡 Code execution successful. Result: {result}")
|
242
|
+
|
243
|
+
if self.tools.finished == True:
|
244
|
+
logger.debug(" - Task completed.")
|
245
|
+
event = TaskEndEvent(
|
246
|
+
success=self.tools.success, reason=self.tools.reason
|
247
|
+
)
|
248
|
+
ctx.write_event_to_stream(event)
|
249
|
+
return event
|
250
|
+
|
251
|
+
self.remembered_info = self.tools.memory
|
252
|
+
|
253
|
+
event = TaskExecutionResultEvent(output=str(result))
|
254
|
+
ctx.write_event_to_stream(event)
|
255
|
+
return event
|
256
|
+
|
257
|
+
except Exception as e:
|
258
|
+
logger.error(f"💥 Action failed: {e}")
|
259
|
+
if self.debug:
|
260
|
+
logger.error("Exception details:", exc_info=True)
|
261
|
+
error_message = f"Error during execution: {e}"
|
262
|
+
|
263
|
+
event = TaskExecutionResultEvent(output=error_message)
|
264
|
+
ctx.write_event_to_stream(event)
|
265
|
+
return event
|
266
|
+
|
267
|
+
@step
|
268
|
+
async def handle_execution_result(
|
269
|
+
self, ctx: Context, ev: TaskExecutionResultEvent
|
270
|
+
) -> TaskInputEvent:
|
271
|
+
"""Handle the execution result. Currently it just returns InputEvent."""
|
272
|
+
logger.debug("📊 Handling execution result...")
|
273
|
+
# Get the output from the event
|
274
|
+
output = ev.output
|
275
|
+
if output is None:
|
276
|
+
output = "Code executed, but produced no output."
|
277
|
+
logger.warning(" - Execution produced no output.")
|
278
|
+
else:
|
279
|
+
logger.debug(
|
280
|
+
f" - Execution output: {output[:100]}..."
|
281
|
+
if len(output) > 100
|
282
|
+
else f" - Execution output: {output}"
|
283
|
+
)
|
284
|
+
# Add the output to memory as an user message (observation)
|
285
|
+
observation_message = ChatMessage(
|
286
|
+
role="user", content=f"Execution Result:\n```\n{output}\n```"
|
287
|
+
)
|
288
|
+
await self.chat_memory.aput(observation_message)
|
289
|
+
|
290
|
+
return TaskInputEvent(input=self.chat_memory.get_all())
|
291
|
+
|
292
|
+
@step
|
293
|
+
async def finalize(self, ev: TaskEndEvent, ctx: Context) -> StopEvent:
|
294
|
+
"""Finalize the workflow."""
|
295
|
+
self.tools.finished = False
|
296
|
+
await ctx.set("chat_memory", self.chat_memory)
|
297
|
+
|
298
|
+
# Add final state observation to episodic memory
|
299
|
+
await self._add_final_state_observation(ctx)
|
300
|
+
|
301
|
+
result = {}
|
302
|
+
result.update(
|
303
|
+
{
|
304
|
+
"success": ev.success,
|
305
|
+
"reason": ev.reason,
|
306
|
+
"codeact_steps": self.steps_counter,
|
307
|
+
"code_executions": self.code_exec_counter,
|
308
|
+
}
|
309
|
+
)
|
310
|
+
|
311
|
+
ctx.write_event_to_stream(
|
312
|
+
EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
|
313
|
+
)
|
314
|
+
|
315
|
+
return StopEvent(result=result)
|
316
|
+
|
317
|
+
async def _get_llm_response(
|
318
|
+
self, ctx: Context, chat_history: List[ChatMessage]
|
319
|
+
) -> ChatResponse | None:
|
320
|
+
logger.debug("🔍 Getting LLM response...")
|
321
|
+
messages_to_send = [self.system_prompt] + chat_history
|
322
|
+
messages_to_send = [chat_utils.message_copy(msg) for msg in messages_to_send]
|
323
|
+
try:
|
324
|
+
response = await self.llm.achat(messages=messages_to_send)
|
325
|
+
logger.debug("🔍 Received LLM response.")
|
326
|
+
|
327
|
+
filtered_chat_history = []
|
328
|
+
for msg in chat_history:
|
329
|
+
filtered_msg = chat_utils.message_copy(msg)
|
330
|
+
if hasattr(filtered_msg, "blocks") and filtered_msg.blocks:
|
331
|
+
filtered_msg.blocks = [
|
332
|
+
block
|
333
|
+
for block in filtered_msg.blocks
|
334
|
+
if not isinstance(block, chat_utils.ImageBlock)
|
335
|
+
]
|
336
|
+
filtered_chat_history.append(filtered_msg)
|
337
|
+
|
338
|
+
# Convert chat history and response to JSON strings
|
339
|
+
chat_history_str = json.dumps(
|
340
|
+
[
|
341
|
+
{"role": msg.role, "content": msg.content}
|
342
|
+
for msg in filtered_chat_history
|
343
|
+
]
|
344
|
+
)
|
345
|
+
response_str = json.dumps(
|
346
|
+
{"role": response.message.role, "content": response.message.content}
|
347
|
+
)
|
348
|
+
|
349
|
+
step = EpisodicMemoryStep(
|
350
|
+
chat_history=chat_history_str,
|
351
|
+
response=response_str,
|
352
|
+
timestamp=time.time(),
|
353
|
+
screenshot=(await ctx.get("screenshot", None))
|
354
|
+
)
|
355
|
+
|
356
|
+
self.episodic_memory.steps.append(step)
|
357
|
+
|
358
|
+
assert hasattr(
|
359
|
+
response, "message"
|
360
|
+
), f"LLM response does not have a message attribute.\nResponse: {response}"
|
361
|
+
except Exception as e:
|
362
|
+
if (
|
363
|
+
self.llm.class_name() == "Gemini_LLM"
|
364
|
+
and "You exceeded your current quota" in str(e)
|
365
|
+
):
|
366
|
+
s = str(e._details[2])
|
367
|
+
match = re.search(r"seconds:\s*(\d+)", s)
|
368
|
+
if match:
|
369
|
+
seconds = int(match.group(1)) + 1
|
370
|
+
logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
|
371
|
+
time.sleep(seconds)
|
372
|
+
else:
|
373
|
+
logger.error(f"Rate limit error. Retrying in 5 seconds...")
|
374
|
+
time.sleep(40)
|
375
|
+
logger.debug("🔍 Retrying call to LLM...")
|
376
|
+
response = await self.llm.achat(messages=messages_to_send)
|
377
|
+
else:
|
378
|
+
logger.error(f"Could not get an answer from LLM: {repr(e)}")
|
379
|
+
raise e
|
380
|
+
logger.debug(" - Received response from LLM.")
|
381
|
+
return response
|
382
|
+
|
383
|
+
async def _add_final_state_observation(self, ctx: Context) -> None:
|
384
|
+
"""Add the current UI state and screenshot as the final observation step."""
|
385
|
+
try:
|
386
|
+
# Get current screenshot and UI state
|
387
|
+
screenshot = None
|
388
|
+
ui_state = None
|
389
|
+
|
390
|
+
try:
|
391
|
+
_, screenshot_bytes = await self.tools.take_screenshot()
|
392
|
+
screenshot = screenshot_bytes
|
393
|
+
except Exception as e:
|
394
|
+
logger.warning(f"Failed to capture final screenshot: {e}")
|
395
|
+
|
396
|
+
try:
|
397
|
+
ui_state = await self.tools.get_clickables()
|
398
|
+
except Exception as e:
|
399
|
+
logger.warning(f"Failed to capture final UI state: {e}")
|
400
|
+
|
401
|
+
# Create final observation chat history and response
|
402
|
+
final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
|
403
|
+
final_response = {
|
404
|
+
"role": "user",
|
405
|
+
"content": f"Final State Observation:\nUI State: {ui_state}\nScreenshot: {'Available' if screenshot else 'Not available'}"
|
406
|
+
}
|
407
|
+
|
408
|
+
# Create final episodic memory step
|
409
|
+
final_step = EpisodicMemoryStep(
|
410
|
+
chat_history=json.dumps(final_chat_history),
|
411
|
+
response=json.dumps(final_response),
|
412
|
+
timestamp=time.time(),
|
413
|
+
screenshot=screenshot
|
414
|
+
)
|
415
|
+
|
416
|
+
self.episodic_memory.steps.append(final_step)
|
417
|
+
logger.info("Added final state observation to episodic memory")
|
418
|
+
|
419
|
+
except Exception as e:
|
420
|
+
logger.error(f"Failed to add final state observation: {e}")
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from llama_index.core.llms import ChatMessage
|
2
|
+
from llama_index.core.workflow import Event
|
3
|
+
from typing import Optional
|
4
|
+
from ..context.episodic_memory import EpisodicMemory
|
5
|
+
|
6
|
+
class TaskInputEvent(Event):
|
7
|
+
input: list[ChatMessage]
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
class TaskThinkingEvent(Event):
|
12
|
+
thoughts: Optional[str] = None
|
13
|
+
code: Optional[str] = None
|
14
|
+
|
15
|
+
class TaskExecutionEvent(Event):
|
16
|
+
code: str
|
17
|
+
globals: dict[str, str] = {}
|
18
|
+
locals: dict[str, str] = {}
|
19
|
+
|
20
|
+
class TaskExecutionResultEvent(Event):
|
21
|
+
output: str
|
22
|
+
|
23
|
+
class TaskEndEvent(Event):
|
24
|
+
success: bool
|
25
|
+
reason: str
|
26
|
+
|
27
|
+
class EpisodicMemoryEvent(Event):
|
28
|
+
episodic_memory: EpisodicMemory
|
@@ -0,0 +1,26 @@
|
|
1
|
+
"""
|
2
|
+
Prompt templates for the CodeActAgent.
|
3
|
+
|
4
|
+
This module contains all the prompts used by the CodeActAgent,
|
5
|
+
separated from the workflow logic for better maintainability.
|
6
|
+
"""
|
7
|
+
|
8
|
+
|
9
|
+
# User prompt template that presents the current request and prompts for reasoning
|
10
|
+
DEFAULT_CODE_ACT_USER_PROMPT = """**Current Request:**
|
11
|
+
{goal}
|
12
|
+
|
13
|
+
**Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed."""""
|
14
|
+
|
15
|
+
# Prompt to remind the agent to provide thoughts before code
|
16
|
+
DEFAULT_NO_THOUGHTS_PROMPT = """Your previous response provided code without explaining your reasoning first. Remember to always describe your thought process and plan *before* providing the code block.
|
17
|
+
|
18
|
+
The code you provided will be executed below.
|
19
|
+
|
20
|
+
Now, describe the next step you will take to address the original goal: {goal}"""
|
21
|
+
|
22
|
+
# Export all prompts
|
23
|
+
__all__ = [
|
24
|
+
"DEFAULT_CODE_ACT_USER_PROMPT",
|
25
|
+
"DEFAULT_NO_THOUGHTS_PROMPT"
|
26
|
+
]
|
@@ -0,0 +1,23 @@
|
|
1
|
+
"""
|
2
|
+
Agent Context Module - Provides specialized agent personas and context injection management.
|
3
|
+
|
4
|
+
This module contains:
|
5
|
+
- AgentPersona: Dataclass for defining specialized agent configurations
|
6
|
+
- ContextInjectionManager: Manager for handling different agent personas and their contexts
|
7
|
+
"""
|
8
|
+
|
9
|
+
from .agent_persona import AgentPersona
|
10
|
+
from .context_injection_manager import ContextInjectionManager
|
11
|
+
from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
12
|
+
from .reflection import Reflection
|
13
|
+
from .task_manager import TaskManager, Task
|
14
|
+
|
15
|
+
__all__ = [
|
16
|
+
"AgentPersona",
|
17
|
+
"ContextInjectionManager",
|
18
|
+
"EpisodicMemory",
|
19
|
+
"EpisodicMemoryStep",
|
20
|
+
"Reflection",
|
21
|
+
"TaskManager",
|
22
|
+
"Task"
|
23
|
+
]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from typing import Dict, List, Callable, Any, Optional
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
@dataclass
|
5
|
+
class AgentPersona:
|
6
|
+
"""Represents a specialized agent persona with its configuration."""
|
7
|
+
name: str
|
8
|
+
system_prompt: str
|
9
|
+
user_prompt: str
|
10
|
+
description: str
|
11
|
+
allowed_tools: List[str]
|
12
|
+
required_context: List[str]
|
13
|
+
expertise_areas: List[str]
|
14
|
+
|
15
|
+
AppAgent = AgentPersona
|