droidrun 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. droidrun/__init__.py +22 -10
  2. droidrun/__main__.py +1 -2
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +2 -2
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +5 -15
  7. droidrun/agent/codeact/__init__.py +11 -0
  8. droidrun/agent/codeact/codeact_agent.py +420 -0
  9. droidrun/agent/codeact/events.py +28 -0
  10. droidrun/agent/codeact/prompts.py +26 -0
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +13 -0
  26. droidrun/agent/droid/droid_agent.py +357 -0
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +13 -0
  30. droidrun/agent/planner/events.py +16 -0
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +124 -0
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +17 -0
  35. droidrun/agent/utils/chat_utils.py +312 -0
  36. droidrun/agent/utils/executer.py +132 -0
  37. droidrun/agent/utils/llm_picker.py +147 -0
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +358 -149
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +4 -30
  44. droidrun/tools/adb.py +879 -0
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/llm_reasoning.py +0 -567
  50. droidrun/agent/react_agent.py +0 -556
  51. droidrun/llm/__init__.py +0 -24
  52. droidrun/tools/actions.py +0 -854
  53. droidrun/tools/device.py +0 -29
  54. droidrun-0.1.0.dist-info/METADATA +0 -276
  55. droidrun-0.1.0.dist-info/RECORD +0 -20
  56. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  57. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  58. {droidrun-0.1.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py CHANGED
@@ -2,18 +2,30 @@
2
2
  DroidRun - A framework for controlling Android devices through LLM agents.
3
3
  """
4
4
 
5
- __version__ = "0.1.0"
5
+ __version__ = "0.3.0"
6
6
 
7
7
  # Import main classes for easier access
8
- from droidrun.agent.react_agent import ReActAgent as Agent
9
- from droidrun.agent.react_agent import ReActStep, ReActStepType
10
- from droidrun.llm import OpenAILLM, AnthropicLLM
8
+ from droidrun.agent.codeact.codeact_agent import CodeActAgent
9
+ from droidrun.agent.planner.planner_agent import PlannerAgent
10
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
11
+ from droidrun.agent.utils.llm_picker import load_llm
12
+ from droidrun.adb.manager import DeviceManager
13
+ from droidrun.tools.tools import Tools
14
+ from droidrun.tools.adb import AdbTools
15
+ from droidrun.tools.ios import IOSTools
16
+ from droidrun.agent.droid import DroidAgent
17
+
11
18
 
12
19
  # Make main components available at package level
13
20
  __all__ = [
14
- "Agent",
15
- "ReActStep",
16
- "ReActStepType",
17
- "OpenAILLM",
18
- "AnthropicLLM",
19
- ]
21
+ "DroidAgent",
22
+ "CodeActAgent",
23
+ "PlannerAgent",
24
+ "DeviceManager",
25
+ "Tools",
26
+ "load_llm",
27
+ "SimpleCodeExecutor",
28
+ "Tools",
29
+ "AdbTools",
30
+ "IOSTools",
31
+ ]
droidrun/__main__.py CHANGED
@@ -1,8 +1,7 @@
1
1
  """
2
2
  DroidRun main entry point
3
3
  """
4
-
5
4
  from droidrun.cli.main import cli
6
5
 
7
6
  if __name__ == '__main__':
8
- cli()
7
+ cli()
droidrun/adb/__init__.py CHANGED
@@ -2,9 +2,9 @@
2
2
  ADB Package - Android Debug Bridge functionality.
3
3
  """
4
4
 
5
- from .device import Device
6
- from .manager import DeviceManager
7
- from .wrapper import ADBWrapper
5
+ from droidrun.adb.device import Device
6
+ from droidrun.adb.manager import DeviceManager
7
+ from droidrun.adb.wrapper import ADBWrapper
8
8
 
9
9
  __all__ = [
10
10
  'Device',
droidrun/adb/device.py CHANGED
@@ -8,7 +8,7 @@ import time
8
8
  import random
9
9
  import string
10
10
  from typing import Dict, Optional, Tuple, List
11
- from .wrapper import ADBWrapper
11
+ from droidrun.adb.wrapper import ADBWrapper
12
12
 
13
13
  class Device:
14
14
  """High-level representation of an Android device."""
@@ -264,7 +264,7 @@ class Device:
264
264
 
265
265
  import logging
266
266
  logger = logging.getLogger("droidrun")
267
- logger.info(
267
+ logger.debug(
268
268
  f"Screenshot compressed successfully: {png_size:.1f}KB → {jpg_size:.1f}KB ({reduction:.1f}% reduction)"
269
269
  )
270
270
 
droidrun/adb/manager.py CHANGED
@@ -3,8 +3,8 @@ Device Manager - Manages Android device connections.
3
3
  """
4
4
 
5
5
  from typing import Dict, List, Optional
6
- from .wrapper import ADBWrapper
7
- from .device import Device
6
+ from droidrun.adb.wrapper import ADBWrapper
7
+ from droidrun.adb.device import Device
8
8
 
9
9
  class DeviceManager:
10
10
  """Manages Android device connections."""
@@ -1,16 +1,6 @@
1
- """
2
- Droidrun Agent Module.
1
+ #import logging
3
2
 
4
- This module provides a ReAct agent for automating Android devices using reasoning and acting.
5
- """
6
-
7
- from .react_agent import ReActAgent, ReActStep, ReActStepType, run_agent
8
- from .llm_reasoning import LLMReasoner
9
-
10
- __all__ = [
11
- "ReActAgent",
12
- "ReActStep",
13
- "ReActStepType",
14
- "run_agent",
15
- "LLMReasoner",
16
- ]
3
+ #logger = logging.getLogger("droidrun")
4
+ #logger.propagate = False # Don't send to root logger
5
+ #logger.handlers = [] # No handlers by default
6
+ #logger.setLevel(logging.INFO) # Or WARNING
@@ -0,0 +1,11 @@
1
+ from droidrun.agent.codeact.codeact_agent import CodeActAgent
2
+ from droidrun.agent.codeact.prompts import (
3
+ DEFAULT_CODE_ACT_USER_PROMPT,
4
+ DEFAULT_NO_THOUGHTS_PROMPT
5
+ )
6
+
7
+ __all__ = [
8
+ "CodeActAgent",
9
+ "DEFAULT_CODE_ACT_USER_PROMPT",
10
+ "DEFAULT_NO_THOUGHTS_PROMPT"
11
+ ]
@@ -0,0 +1,420 @@
1
+ import logging
2
+ import re
3
+ import time
4
+ import asyncio
5
+ import json
6
+ import os
7
+ from typing import List, Optional, Tuple, Union
8
+ from llama_index.core.base.llms.types import ChatMessage, ChatResponse
9
+ from llama_index.core.prompts import PromptTemplate
10
+ from llama_index.core.llms.llm import LLM
11
+ from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
12
+ from llama_index.core.memory import Memory
13
+ from droidrun.agent.codeact.events import (
14
+ TaskInputEvent,
15
+ TaskEndEvent,
16
+ TaskExecutionEvent,
17
+ TaskExecutionResultEvent,
18
+ TaskThinkingEvent,
19
+ EpisodicMemoryEvent,
20
+ )
21
+ from droidrun.agent.common.events import ScreenshotEvent
22
+ from droidrun.agent.utils import chat_utils
23
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
24
+ from droidrun.agent.codeact.prompts import (
25
+ DEFAULT_CODE_ACT_USER_PROMPT,
26
+ DEFAULT_NO_THOUGHTS_PROMPT,
27
+ )
28
+
29
+ from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
30
+ from droidrun.tools import Tools
31
+ from typing import Optional, Dict, Tuple, List, Any, Callable
32
+ from droidrun.agent.context.agent_persona import AgentPersona
33
+
34
+ logger = logging.getLogger("droidrun")
35
+
36
+
37
+ class CodeActAgent(Workflow):
38
+ """
39
+ An agent that uses a ReAct-like cycle (Thought -> Code -> Observation)
40
+ to solve problems requiring code execution. It extracts code from
41
+ Markdown blocks and uses specific step types for tracking.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ llm: LLM,
47
+ persona: AgentPersona,
48
+ tools_instance: "Tools",
49
+ all_tools_list: Dict[str, Callable[..., Any]],
50
+ max_steps: int = 5,
51
+ debug: bool = False,
52
+ *args,
53
+ **kwargs,
54
+ ):
55
+ # assert instead of if
56
+ assert llm, "llm must be provided."
57
+ super().__init__(*args, **kwargs)
58
+
59
+ self.llm = llm
60
+ self.max_steps = max_steps
61
+
62
+ self.user_prompt = persona.user_prompt
63
+ self.no_thoughts_prompt = None
64
+
65
+ self.chat_memory = None
66
+ self.episodic_memory = EpisodicMemory(persona=persona)
67
+ self.remembered_info = None
68
+
69
+ self.goal = None
70
+ self.steps_counter = 0
71
+ self.code_exec_counter = 0
72
+ self.debug = debug
73
+
74
+ self.tools = tools_instance
75
+
76
+ self.tool_list = {}
77
+
78
+ for tool_name in persona.allowed_tools:
79
+ if tool_name in all_tools_list:
80
+ self.tool_list[tool_name] = all_tools_list[tool_name]
81
+
82
+ self.tool_descriptions = chat_utils.parse_tool_descriptions(self.tool_list)
83
+
84
+ self.system_prompt_content = persona.system_prompt.format(
85
+ tool_descriptions=self.tool_descriptions
86
+ )
87
+ self.system_prompt = ChatMessage(
88
+ role="system", content=self.system_prompt_content
89
+ )
90
+
91
+ self.required_context = persona.required_context
92
+
93
+ self.executor = SimpleCodeExecutor(
94
+ loop=asyncio.get_event_loop(),
95
+ locals={},
96
+ tools=self.tool_list,
97
+ globals={"__builtins__": __builtins__},
98
+ )
99
+
100
+ logger.info("✅ CodeActAgent initialized successfully.")
101
+
102
+ @step
103
+ async def prepare_chat(self, ctx: Context, ev: StartEvent) -> TaskInputEvent:
104
+ """Prepare chat history from user input."""
105
+ logger.info("💬 Preparing chat for task execution...")
106
+
107
+ self.chat_memory: Memory = await ctx.get(
108
+ "chat_memory", default=Memory.from_defaults()
109
+ )
110
+
111
+ user_input = ev.get("input", default=None)
112
+ assert user_input, "User input cannot be empty."
113
+
114
+ if ev.remembered_info:
115
+ self.remembered_info = ev.remembered_info
116
+
117
+ logger.debug(" - Adding goal to memory.")
118
+ goal = user_input
119
+ self.user_message = ChatMessage(
120
+ role="user",
121
+ content=PromptTemplate(
122
+ self.user_prompt or DEFAULT_CODE_ACT_USER_PROMPT
123
+ ).format(goal=goal),
124
+ )
125
+ self.no_thoughts_prompt = ChatMessage(
126
+ role="user",
127
+ content=PromptTemplate(DEFAULT_NO_THOUGHTS_PROMPT).format(goal=goal),
128
+ )
129
+
130
+
131
+ await self.chat_memory.aput(self.user_message)
132
+
133
+ await ctx.set("chat_memory", self.chat_memory)
134
+ input_messages = self.chat_memory.get_all()
135
+ return TaskInputEvent(input=input_messages)
136
+
137
+ @step
138
+ async def handle_llm_input(
139
+ self, ctx: Context, ev: TaskInputEvent
140
+ ) -> TaskThinkingEvent | TaskEndEvent:
141
+ """Handle LLM input."""
142
+ chat_history = ev.input
143
+ assert len(chat_history) > 0, "Chat history cannot be empty."
144
+ ctx.write_event_to_stream(ev)
145
+
146
+ if self.steps_counter >= self.max_steps:
147
+ ev = TaskEndEvent(
148
+ success=False,
149
+ reason=f"Reached max step count of {self.max_steps} steps",
150
+ )
151
+ ctx.write_event_to_stream(ev)
152
+ return ev
153
+
154
+ self.steps_counter += 1
155
+ logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
156
+
157
+ model = self.llm.class_name()
158
+
159
+ if "remember" in self.tool_list and self.remembered_info:
160
+ await ctx.set("remembered_info", self.remembered_info)
161
+ chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
162
+
163
+ for context in self.required_context:
164
+ if context == "screenshot" and model != "DeepSeek":
165
+ screenshot = (await self.tools.take_screenshot())[1]
166
+ ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
167
+
168
+ await ctx.set("screenshot", screenshot)
169
+ chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
170
+
171
+ if context == "phone_state":
172
+ chat_history = await chat_utils.add_phone_state_block(await self.tools.get_phone_state(), chat_history)
173
+
174
+ if context == "ui_state":
175
+ ui_state = await self.tools.get_clickables()
176
+ await ctx.set("ui_state", ui_state)
177
+ chat_history = await chat_utils.add_ui_text_block(
178
+ ui_state, chat_history
179
+ )
180
+
181
+ if context == "packages":
182
+ chat_history = await chat_utils.add_packages_block(
183
+ await self.tools.list_packages(include_system_apps=True),
184
+ chat_history,
185
+ )
186
+
187
+ response = await self._get_llm_response(ctx, chat_history)
188
+ if response is None:
189
+ return TaskEndEvent(
190
+ success=False, reason="LLM response is None. This is a critical error."
191
+ )
192
+
193
+ await self.chat_memory.aput(response.message)
194
+
195
+ code, thoughts = chat_utils.extract_code_and_thought(response.message.content)
196
+
197
+ event = TaskThinkingEvent(thoughts=thoughts, code=code)
198
+ ctx.write_event_to_stream(event)
199
+ return event
200
+
201
+ @step
202
+ async def handle_llm_output(
203
+ self, ctx: Context, ev: TaskThinkingEvent
204
+ ) -> Union[TaskExecutionEvent, TaskInputEvent]:
205
+ """Handle LLM output."""
206
+ logger.debug("⚙️ Handling LLM output...")
207
+ code = ev.code
208
+ thoughts = ev.thoughts
209
+
210
+ if not thoughts:
211
+ logger.warning(
212
+ "🤔 LLM provided code without thoughts. Adding reminder prompt."
213
+ )
214
+ await self.chat_memory.aput(self.no_thoughts_prompt)
215
+ else:
216
+ logger.info(f"🤔 Reasoning: {thoughts}")
217
+
218
+ if code:
219
+ return TaskExecutionEvent(code=code)
220
+ else:
221
+ message = ChatMessage(
222
+ role="user",
223
+ content="No code was provided. If you want to mark task as complete (whether it failed or succeeded), use complete(success:bool, reason:str) function within a code block ```pythn\n```.",
224
+ )
225
+ await self.chat_memory.aput(message)
226
+ return TaskInputEvent(input=self.chat_memory.get_all())
227
+
228
+ @step
229
+ async def execute_code(
230
+ self, ctx: Context, ev: TaskExecutionEvent
231
+ ) -> Union[TaskExecutionResultEvent, TaskEndEvent]:
232
+ """Execute the code and return the result."""
233
+ code = ev.code
234
+ assert code, "Code cannot be empty."
235
+ logger.info(f"⚡ Executing action...")
236
+ logger.debug(f"Code to execute:\n```python\n{code}\n```")
237
+
238
+ try:
239
+ self.code_exec_counter += 1
240
+ result = await self.executor.execute(ctx, code)
241
+ logger.info(f"💡 Code execution successful. Result: {result}")
242
+
243
+ if self.tools.finished == True:
244
+ logger.debug(" - Task completed.")
245
+ event = TaskEndEvent(
246
+ success=self.tools.success, reason=self.tools.reason
247
+ )
248
+ ctx.write_event_to_stream(event)
249
+ return event
250
+
251
+ self.remembered_info = self.tools.memory
252
+
253
+ event = TaskExecutionResultEvent(output=str(result))
254
+ ctx.write_event_to_stream(event)
255
+ return event
256
+
257
+ except Exception as e:
258
+ logger.error(f"💥 Action failed: {e}")
259
+ if self.debug:
260
+ logger.error("Exception details:", exc_info=True)
261
+ error_message = f"Error during execution: {e}"
262
+
263
+ event = TaskExecutionResultEvent(output=error_message)
264
+ ctx.write_event_to_stream(event)
265
+ return event
266
+
267
+ @step
268
+ async def handle_execution_result(
269
+ self, ctx: Context, ev: TaskExecutionResultEvent
270
+ ) -> TaskInputEvent:
271
+ """Handle the execution result. Currently it just returns InputEvent."""
272
+ logger.debug("📊 Handling execution result...")
273
+ # Get the output from the event
274
+ output = ev.output
275
+ if output is None:
276
+ output = "Code executed, but produced no output."
277
+ logger.warning(" - Execution produced no output.")
278
+ else:
279
+ logger.debug(
280
+ f" - Execution output: {output[:100]}..."
281
+ if len(output) > 100
282
+ else f" - Execution output: {output}"
283
+ )
284
+ # Add the output to memory as an user message (observation)
285
+ observation_message = ChatMessage(
286
+ role="user", content=f"Execution Result:\n```\n{output}\n```"
287
+ )
288
+ await self.chat_memory.aput(observation_message)
289
+
290
+ return TaskInputEvent(input=self.chat_memory.get_all())
291
+
292
+ @step
293
+ async def finalize(self, ev: TaskEndEvent, ctx: Context) -> StopEvent:
294
+ """Finalize the workflow."""
295
+ self.tools.finished = False
296
+ await ctx.set("chat_memory", self.chat_memory)
297
+
298
+ # Add final state observation to episodic memory
299
+ await self._add_final_state_observation(ctx)
300
+
301
+ result = {}
302
+ result.update(
303
+ {
304
+ "success": ev.success,
305
+ "reason": ev.reason,
306
+ "codeact_steps": self.steps_counter,
307
+ "code_executions": self.code_exec_counter,
308
+ }
309
+ )
310
+
311
+ ctx.write_event_to_stream(
312
+ EpisodicMemoryEvent(episodic_memory=self.episodic_memory)
313
+ )
314
+
315
+ return StopEvent(result=result)
316
+
317
+ async def _get_llm_response(
318
+ self, ctx: Context, chat_history: List[ChatMessage]
319
+ ) -> ChatResponse | None:
320
+ logger.debug("🔍 Getting LLM response...")
321
+ messages_to_send = [self.system_prompt] + chat_history
322
+ messages_to_send = [chat_utils.message_copy(msg) for msg in messages_to_send]
323
+ try:
324
+ response = await self.llm.achat(messages=messages_to_send)
325
+ logger.debug("🔍 Received LLM response.")
326
+
327
+ filtered_chat_history = []
328
+ for msg in chat_history:
329
+ filtered_msg = chat_utils.message_copy(msg)
330
+ if hasattr(filtered_msg, "blocks") and filtered_msg.blocks:
331
+ filtered_msg.blocks = [
332
+ block
333
+ for block in filtered_msg.blocks
334
+ if not isinstance(block, chat_utils.ImageBlock)
335
+ ]
336
+ filtered_chat_history.append(filtered_msg)
337
+
338
+ # Convert chat history and response to JSON strings
339
+ chat_history_str = json.dumps(
340
+ [
341
+ {"role": msg.role, "content": msg.content}
342
+ for msg in filtered_chat_history
343
+ ]
344
+ )
345
+ response_str = json.dumps(
346
+ {"role": response.message.role, "content": response.message.content}
347
+ )
348
+
349
+ step = EpisodicMemoryStep(
350
+ chat_history=chat_history_str,
351
+ response=response_str,
352
+ timestamp=time.time(),
353
+ screenshot=(await ctx.get("screenshot", None))
354
+ )
355
+
356
+ self.episodic_memory.steps.append(step)
357
+
358
+ assert hasattr(
359
+ response, "message"
360
+ ), f"LLM response does not have a message attribute.\nResponse: {response}"
361
+ except Exception as e:
362
+ if (
363
+ self.llm.class_name() == "Gemini_LLM"
364
+ and "You exceeded your current quota" in str(e)
365
+ ):
366
+ s = str(e._details[2])
367
+ match = re.search(r"seconds:\s*(\d+)", s)
368
+ if match:
369
+ seconds = int(match.group(1)) + 1
370
+ logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
371
+ time.sleep(seconds)
372
+ else:
373
+ logger.error(f"Rate limit error. Retrying in 5 seconds...")
374
+ time.sleep(40)
375
+ logger.debug("🔍 Retrying call to LLM...")
376
+ response = await self.llm.achat(messages=messages_to_send)
377
+ else:
378
+ logger.error(f"Could not get an answer from LLM: {repr(e)}")
379
+ raise e
380
+ logger.debug(" - Received response from LLM.")
381
+ return response
382
+
383
+ async def _add_final_state_observation(self, ctx: Context) -> None:
384
+ """Add the current UI state and screenshot as the final observation step."""
385
+ try:
386
+ # Get current screenshot and UI state
387
+ screenshot = None
388
+ ui_state = None
389
+
390
+ try:
391
+ _, screenshot_bytes = await self.tools.take_screenshot()
392
+ screenshot = screenshot_bytes
393
+ except Exception as e:
394
+ logger.warning(f"Failed to capture final screenshot: {e}")
395
+
396
+ try:
397
+ ui_state = await self.tools.get_clickables()
398
+ except Exception as e:
399
+ logger.warning(f"Failed to capture final UI state: {e}")
400
+
401
+ # Create final observation chat history and response
402
+ final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
403
+ final_response = {
404
+ "role": "user",
405
+ "content": f"Final State Observation:\nUI State: {ui_state}\nScreenshot: {'Available' if screenshot else 'Not available'}"
406
+ }
407
+
408
+ # Create final episodic memory step
409
+ final_step = EpisodicMemoryStep(
410
+ chat_history=json.dumps(final_chat_history),
411
+ response=json.dumps(final_response),
412
+ timestamp=time.time(),
413
+ screenshot=screenshot
414
+ )
415
+
416
+ self.episodic_memory.steps.append(final_step)
417
+ logger.info("Added final state observation to episodic memory")
418
+
419
+ except Exception as e:
420
+ logger.error(f"Failed to add final state observation: {e}")
@@ -0,0 +1,28 @@
1
+ from llama_index.core.llms import ChatMessage
2
+ from llama_index.core.workflow import Event
3
+ from typing import Optional
4
+ from ..context.episodic_memory import EpisodicMemory
5
+
6
+ class TaskInputEvent(Event):
7
+ input: list[ChatMessage]
8
+
9
+
10
+
11
+ class TaskThinkingEvent(Event):
12
+ thoughts: Optional[str] = None
13
+ code: Optional[str] = None
14
+
15
+ class TaskExecutionEvent(Event):
16
+ code: str
17
+ globals: dict[str, str] = {}
18
+ locals: dict[str, str] = {}
19
+
20
+ class TaskExecutionResultEvent(Event):
21
+ output: str
22
+
23
+ class TaskEndEvent(Event):
24
+ success: bool
25
+ reason: str
26
+
27
+ class EpisodicMemoryEvent(Event):
28
+ episodic_memory: EpisodicMemory
@@ -0,0 +1,26 @@
1
+ """
2
+ Prompt templates for the CodeActAgent.
3
+
4
+ This module contains all the prompts used by the CodeActAgent,
5
+ separated from the workflow logic for better maintainability.
6
+ """
7
+
8
+
9
+ # User prompt template that presents the current request and prompts for reasoning
10
+ DEFAULT_CODE_ACT_USER_PROMPT = """**Current Request:**
11
+ {goal}
12
+
13
+ **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed."""""
14
+
15
+ # Prompt to remind the agent to provide thoughts before code
16
+ DEFAULT_NO_THOUGHTS_PROMPT = """Your previous response provided code without explaining your reasoning first. Remember to always describe your thought process and plan *before* providing the code block.
17
+
18
+ The code you provided will be executed below.
19
+
20
+ Now, describe the next step you will take to address the original goal: {goal}"""
21
+
22
+ # Export all prompts
23
+ __all__ = [
24
+ "DEFAULT_CODE_ACT_USER_PROMPT",
25
+ "DEFAULT_NO_THOUGHTS_PROMPT"
26
+ ]
@@ -0,0 +1,5 @@
1
+ from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
2
+ class MockWorkflow(Workflow):
3
+ @step()
4
+ async def sub_start(self, ctx: Context, ev: StartEvent) -> StopEvent:
5
+ return StopEvent(result="This is a mock Workflow")
@@ -0,0 +1,4 @@
1
+ from llama_index.core.workflow import Event
2
+
3
+ class ScreenshotEvent(Event):
4
+ screenshot: bytes
@@ -0,0 +1,23 @@
1
+ """
2
+ Agent Context Module - Provides specialized agent personas and context injection management.
3
+
4
+ This module contains:
5
+ - AgentPersona: Dataclass for defining specialized agent configurations
6
+ - ContextInjectionManager: Manager for handling different agent personas and their contexts
7
+ """
8
+
9
+ from .agent_persona import AgentPersona
10
+ from .context_injection_manager import ContextInjectionManager
11
+ from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
12
+ from .reflection import Reflection
13
+ from .task_manager import TaskManager, Task
14
+
15
+ __all__ = [
16
+ "AgentPersona",
17
+ "ContextInjectionManager",
18
+ "EpisodicMemory",
19
+ "EpisodicMemoryStep",
20
+ "Reflection",
21
+ "TaskManager",
22
+ "Task"
23
+ ]
@@ -0,0 +1,15 @@
1
+ from typing import Dict, List, Callable, Any, Optional
2
+ from dataclasses import dataclass
3
+
4
+ @dataclass
5
+ class AgentPersona:
6
+ """Represents a specialized agent persona with its configuration."""
7
+ name: str
8
+ system_prompt: str
9
+ user_prompt: str
10
+ description: str
11
+ allowed_tools: List[str]
12
+ required_context: List[str]
13
+ expertise_areas: List[str]
14
+
15
+ AppAgent = AgentPersona