droidrun 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. droidrun/__init__.py +16 -11
  2. droidrun/__main__.py +1 -1
  3. droidrun/adb/__init__.py +3 -3
  4. droidrun/adb/device.py +1 -1
  5. droidrun/adb/manager.py +2 -2
  6. droidrun/agent/__init__.py +6 -0
  7. droidrun/agent/codeact/__init__.py +2 -4
  8. droidrun/agent/codeact/codeact_agent.py +321 -235
  9. droidrun/agent/codeact/events.py +12 -20
  10. droidrun/agent/codeact/prompts.py +0 -52
  11. droidrun/agent/common/default.py +5 -0
  12. droidrun/agent/common/events.py +4 -0
  13. droidrun/agent/context/__init__.py +23 -0
  14. droidrun/agent/context/agent_persona.py +15 -0
  15. droidrun/agent/context/context_injection_manager.py +66 -0
  16. droidrun/agent/context/episodic_memory.py +15 -0
  17. droidrun/agent/context/personas/__init__.py +11 -0
  18. droidrun/agent/context/personas/app_starter.py +44 -0
  19. droidrun/agent/context/personas/default.py +95 -0
  20. droidrun/agent/context/personas/extractor.py +52 -0
  21. droidrun/agent/context/personas/ui_expert.py +107 -0
  22. droidrun/agent/context/reflection.py +20 -0
  23. droidrun/agent/context/task_manager.py +124 -0
  24. droidrun/agent/context/todo.txt +4 -0
  25. droidrun/agent/droid/__init__.py +2 -2
  26. droidrun/agent/droid/droid_agent.py +264 -325
  27. droidrun/agent/droid/events.py +28 -0
  28. droidrun/agent/oneflows/reflector.py +265 -0
  29. droidrun/agent/planner/__init__.py +2 -4
  30. droidrun/agent/planner/events.py +9 -13
  31. droidrun/agent/planner/planner_agent.py +268 -0
  32. droidrun/agent/planner/prompts.py +33 -53
  33. droidrun/agent/utils/__init__.py +3 -0
  34. droidrun/agent/utils/async_utils.py +1 -40
  35. droidrun/agent/utils/chat_utils.py +268 -48
  36. droidrun/agent/utils/executer.py +49 -14
  37. droidrun/agent/utils/llm_picker.py +14 -10
  38. droidrun/agent/utils/trajectory.py +184 -0
  39. droidrun/cli/__init__.py +1 -1
  40. droidrun/cli/logs.py +283 -0
  41. droidrun/cli/main.py +333 -439
  42. droidrun/run.py +105 -0
  43. droidrun/tools/__init__.py +5 -10
  44. droidrun/tools/{actions.py → adb.py} +279 -238
  45. droidrun/tools/ios.py +594 -0
  46. droidrun/tools/tools.py +99 -0
  47. droidrun-0.3.0.dist-info/METADATA +149 -0
  48. droidrun-0.3.0.dist-info/RECORD +52 -0
  49. droidrun/agent/planner/task_manager.py +0 -355
  50. droidrun/agent/planner/workflow.py +0 -371
  51. droidrun/tools/device.py +0 -29
  52. droidrun/tools/loader.py +0 -60
  53. droidrun-0.2.0.dist-info/METADATA +0 -373
  54. droidrun-0.2.0.dist-info/RECORD +0 -32
  55. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/WHEEL +0 -0
  56. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/entry_points.txt +0 -0
  57. {droidrun-0.2.0.dist-info → droidrun-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,36 +1,28 @@
1
1
  from llama_index.core.llms import ChatMessage
2
2
  from llama_index.core.workflow import Event
3
- from typing import Any, Optional
3
+ from typing import Optional
4
+ from ..context.episodic_memory import EpisodicMemory
4
5
 
5
- from pydantic import PrivateAttr
6
+ class TaskInputEvent(Event):
7
+ input: list[ChatMessage]
6
8
 
7
9
 
8
- class InputEvent(Event):
9
- input: list[ChatMessage]
10
10
 
11
- class ModelOutputEvent(Event):
11
+ class TaskThinkingEvent(Event):
12
12
  thoughts: Optional[str] = None
13
13
  code: Optional[str] = None
14
14
 
15
- class ExecutionEvent(Event):
15
+ class TaskExecutionEvent(Event):
16
16
  code: str
17
17
  globals: dict[str, str] = {}
18
18
  locals: dict[str, str] = {}
19
19
 
20
- class ExecutionResultEvent(Event):
20
+ class TaskExecutionResultEvent(Event):
21
21
  output: str
22
22
 
23
- class FinalizeEvent(Event):
24
- _result: Any = PrivateAttr(default=None)
25
-
26
- def __init__(self, result: Any = None, **kwargs: Any) -> None:
27
- # forces the user to provide a result
28
- super().__init__(_result=result, **kwargs)
29
-
30
- def _get_result(self) -> Any:
31
- """This can be overridden by subclasses to return the desired result."""
32
- return self._result
23
+ class TaskEndEvent(Event):
24
+ success: bool
25
+ reason: str
33
26
 
34
- @property
35
- def result(self) -> Any:
36
- return self._get_result()
27
+ class EpisodicMemoryEvent(Event):
28
+ episodic_memory: EpisodicMemory
@@ -5,57 +5,6 @@ This module contains all the prompts used by the CodeActAgent,
5
5
  separated from the workflow logic for better maintainability.
6
6
  """
7
7
 
8
- # System prompt for the CodeActAgent that explains its role and capabilities
9
- DEFAULT_CODE_ACT_SYSTEM_PROMPT = """You are a helpful AI assistant that can write and execute Python code to solve problems.
10
-
11
- You will be given a task to perform. You should output:
12
- - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution. Any output you want to extract from the code should be printed to the console.
13
- - Text to be shown directly to the user, if you want to ask for more information or provide the final answer.
14
- - If the previous code execution can be used to respond to the user, then respond directly (typically you want to avoid mentioning anything related to the code execution in your response).
15
- - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
16
- ## Response Format:
17
- Example of proper code format:
18
- To calculate the area of a circle, I need to use the formula: area = pi * radius^2. I will write a function to do this.
19
- ```python
20
- import math
21
-
22
- def calculate_area(radius):
23
- return math.pi * radius**2
24
-
25
- # Calculate the area for radius = 5
26
- area = calculate_area(5)
27
- print(f"The area of the circle is {{area:.2f}} square units")
28
- ```
29
-
30
- Another example (with for loop):
31
- To calculate the sum of numbers from 1 to 10, I will use a for loop.
32
- ```python
33
- sum = 0
34
- for i in range(1, 11):
35
- sum += i
36
- print(f"The sum of numbers from 1 to 10 is {{sum}}")
37
- ```
38
-
39
- In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
40
- {tool_descriptions}
41
-
42
- You'll receive a screenshot showing the current screen and its UI elements to help you complete the task. However, screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
43
-
44
- **Important Notes:**
45
- - If there is a precondition for the task, you MUST check if it is met.
46
- - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
47
-
48
- ## Final Answer Guidelines:
49
- - When providing a final answer, focus on directly answering the user's question
50
- - Avoid referencing the code you generated unless specifically asked
51
- - Present the results clearly and concisely as if you computed them directly
52
- - If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
53
- - Structure your response like you're directly answering the user's query, not explaining how you solved it
54
-
55
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
56
-
57
- You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
58
- """
59
8
 
60
9
  # User prompt template that presents the current request and prompts for reasoning
61
10
  DEFAULT_CODE_ACT_USER_PROMPT = """**Current Request:**
@@ -72,7 +21,6 @@ Now, describe the next step you will take to address the original goal: {goal}""
72
21
 
73
22
  # Export all prompts
74
23
  __all__ = [
75
- "DEFAULT_CODE_ACT_SYSTEM_PROMPT",
76
24
  "DEFAULT_CODE_ACT_USER_PROMPT",
77
25
  "DEFAULT_NO_THOUGHTS_PROMPT"
78
26
  ]
@@ -0,0 +1,5 @@
1
+ from llama_index.core.workflow import step, StartEvent, StopEvent, Workflow, Context
2
+ class MockWorkflow(Workflow):
3
+ @step()
4
+ async def sub_start(self, ctx: Context, ev: StartEvent) -> StopEvent:
5
+ return StopEvent(result="This is a mock Workflow")
@@ -0,0 +1,4 @@
1
+ from llama_index.core.workflow import Event
2
+
3
+ class ScreenshotEvent(Event):
4
+ screenshot: bytes
@@ -0,0 +1,23 @@
1
+ """
2
+ Agent Context Module - Provides specialized agent personas and context injection management.
3
+
4
+ This module contains:
5
+ - AgentPersona: Dataclass for defining specialized agent configurations
6
+ - ContextInjectionManager: Manager for handling different agent personas and their contexts
7
+ """
8
+
9
+ from .agent_persona import AgentPersona
10
+ from .context_injection_manager import ContextInjectionManager
11
+ from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
12
+ from .reflection import Reflection
13
+ from .task_manager import TaskManager, Task
14
+
15
+ __all__ = [
16
+ "AgentPersona",
17
+ "ContextInjectionManager",
18
+ "EpisodicMemory",
19
+ "EpisodicMemoryStep",
20
+ "Reflection",
21
+ "TaskManager",
22
+ "Task"
23
+ ]
@@ -0,0 +1,15 @@
1
+ from typing import Dict, List, Callable, Any, Optional
2
+ from dataclasses import dataclass
3
+
4
+ @dataclass
5
+ class AgentPersona:
6
+ """Represents a specialized agent persona with its configuration."""
7
+ name: str
8
+ system_prompt: str
9
+ user_prompt: str
10
+ description: str
11
+ allowed_tools: List[str]
12
+ required_context: List[str]
13
+ expertise_areas: List[str]
14
+
15
+ AppAgent = AgentPersona
@@ -0,0 +1,66 @@
1
+ """
2
+ Context Injection Manager - Manages specialized agent personas with dynamic tool and context injection.
3
+
4
+ This module provides the ContextInjectionManager class that manages different agent personas,
5
+ each with specific system prompts, contexts, and tool subsets tailored for specialized tasks.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional, List
10
+ from droidrun.agent.context.agent_persona import AgentPersona
11
+ #import chromadb
12
+ import json
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger("droidrun")
16
+
17
+ class ContextInjectionManager:
18
+ """
19
+ Manages different agent personas with specialized contexts and tool subsets.
20
+
21
+ This class is responsible for:
22
+ - Defining agent personas with specific capabilities
23
+ - Injecting appropriate system prompts based on agent type
24
+ - Filtering tool lists to match agent specialization
25
+ - Providing context-aware configurations for CodeActAgent instances
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ personas: List[AgentPersona]
31
+ ):
32
+ """Initialize the Context Injection Manager with predefined personas."""
33
+
34
+ self.personas = {}
35
+ for persona in personas:
36
+ self.personas[persona.name] = persona
37
+
38
+
39
+ def _load_persona(self, data: str) -> AgentPersona:
40
+ persona = json.loads(data)
41
+ logger.info(f"🎭 Loaded persona: {persona['name']}")
42
+ return AgentPersona(
43
+ name=persona['name'],
44
+ system_prompt=persona['system_prompt'],
45
+ allowed_tools=persona['allowed_tools'],
46
+ description=persona['description'],
47
+ expertise_areas=persona['expertise_areas'],
48
+ user_prompt=persona['user_prompt'],
49
+ required_context=persona['required_context'],
50
+ )
51
+
52
+ def get_persona(self, agent_type: str) -> Optional[AgentPersona]:
53
+ """
54
+ Get a specific agent persona by type.
55
+
56
+ Args:
57
+ agent_type: The type of agent ("UIExpert" or "AppStarterExpert")
58
+
59
+ Returns:
60
+ AgentPersona instance or None if not found
61
+ """
62
+
63
+ return self.personas.get(agent_type)
64
+
65
+ def get_all_personas(self) -> List[str]:
66
+ return self.personas
@@ -0,0 +1,15 @@
1
+ from dataclasses import dataclass, field
2
+ from droidrun.agent.context.agent_persona import AgentPersona
3
+ from typing import List, Optional
4
+
5
+ @dataclass
6
+ class EpisodicMemoryStep:
7
+ chat_history: str
8
+ response: str
9
+ timestamp: float
10
+ screenshot: Optional[bytes]
11
+
12
+ @dataclass
13
+ class EpisodicMemory:
14
+ persona: AgentPersona
15
+ steps: List[EpisodicMemoryStep] = field(default_factory=list)
@@ -0,0 +1,11 @@
1
+ from .default import DEFAULT
2
+ from .ui_expert import UI_EXPERT
3
+ from .app_starter import APP_STARTER_EXPERT
4
+ from .extractor import EXTRACTOR
5
+
6
+ __all__ = [
7
+ 'DEFAULT',
8
+ 'UI_EXPERT',
9
+ 'APP_STARTER_EXPERT',
10
+ 'EXTRACTOR'
11
+ ]
@@ -0,0 +1,44 @@
1
+ from droidrun.agent.context.agent_persona import AgentPersona
2
+ from droidrun.tools import Tools
3
+
4
+ APP_STARTER_EXPERT = AgentPersona(
5
+ name="AppStarterExpert",
6
+ description="Specialized in app launching",
7
+ expertise_areas=[
8
+ "app launching"
9
+ ],
10
+ allowed_tools=[
11
+ Tools.start_app.__name__,
12
+ Tools.complete.__name__
13
+ ],
14
+ required_context=[
15
+ "packages"
16
+ ],
17
+ user_prompt="""
18
+ **Current Request:**
19
+ {goal}
20
+ **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
21
+
22
+ system_prompt= """You are an App Starter Expert specialized in Android application lifecycle management. Your core expertise includes:
23
+
24
+ **Primary Capabilities:**
25
+ - Launch Android applications by package name
26
+ - Use proper package name format (com.example.app)
27
+
28
+ ## Response Format:
29
+ Example of proper code format:
30
+ To launch the Calculator app, I need to use the start_app function with the correct package name.
31
+ ```python
32
+ # Launch the Calculator app
33
+ start_app("com.android.calculator2")
34
+ complete(success=True)
35
+ ```
36
+
37
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
38
+ {tool_descriptions}
39
+
40
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
41
+
42
+ You focus ONLY on app launching and package management - UI interactions within apps are handled by UI specialists.""",
43
+
44
+ )
@@ -0,0 +1,95 @@
1
+ from droidrun.agent.context.agent_persona import AgentPersona
2
+ from droidrun.tools import Tools
3
+
4
+ DEFAULT = AgentPersona(
5
+ name="Default",
6
+ description="Default Agent. Use this as your Default",
7
+ expertise_areas=[
8
+ "UI navigation", "button interactions", "text input",
9
+ "menu navigation", "form filling", "scrolling", "app launching"
10
+ ],
11
+ allowed_tools=[
12
+ Tools.swipe.__name__,
13
+ Tools.input_text.__name__,
14
+ Tools.press_key.__name__,
15
+ Tools.tap_by_index.__name__,
16
+ Tools.start_app.__name__,
17
+ Tools.list_packages.__name__,
18
+ Tools.remember.__name__,
19
+ Tools.complete.__name__
20
+ ],
21
+ required_context=[
22
+ "ui_state",
23
+ "screenshot",
24
+ "phone_state"
25
+ ],
26
+ user_prompt="""
27
+ **Current Request:**
28
+ {goal}
29
+ **Is the precondition met? What is your reasoning and the next step to address this request?**
30
+ Explain your thought process then provide code in ```python ... ``` tags if needed.
31
+ """"",
32
+
33
+ system_prompt="""
34
+ You are a helpful AI assistant that can write and execute Python code to solve problems.
35
+
36
+ You will be given a task to perform. You should output:
37
+ - Python code wrapped in ``` tags that provides the solution to the task, or a step towards the solution.
38
+ - If there is a precondition for the task, you MUST check if it is met.
39
+ - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
40
+ - If you task is complete, you should use the complete(success:bool, reason:str) function within a code block to mark it as finished. The success parameter should be True if the task was completed successfully, and False otherwise. The reason parameter should be a string explaining the reason for failure if failed.
41
+
42
+
43
+ ## Context:
44
+ The following context is given to you for analysis:
45
+ - **ui_state**: A list of all currently visible UI elements with their indices. Use this to understand what interactive elements are available on the screen.
46
+ - **screenshots**: A visual screenshot of the current state of the Android screen. This provides visual context for what the user sees. screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
47
+ - **phone_state**: The current app you are navigating in. This tells you which application context you're working within.
48
+ - **chat history**: You are also given the history of your actions (if any) from your previous steps.
49
+ NOTE: you don't have access to these inputs in your tool calling context
50
+
51
+ ## Response Format:
52
+ Example of proper code format:
53
+ **Task Assignment:**
54
+ **Task:** "Precondition: Settings app is open. Goal: Navigate to Wi-Fi settings and connect to the network 'HomeNetwork'."
55
+
56
+ **(Step 1) Agent Analysis:** I can see the Settings app is open from the screenshot. This is a multi-step task that requires me to first navigate to Wi-Fi settings, then ensure Wi-Fi is enabled, and finally connect to 'HomeNetwork'. Let me start by finding and tapping on the Wi-Fi option in the settings menu. Looking at the UI elements, I can see "Wi-Fi" option at index 3.
57
+
58
+ **(Step 1) Agent Action:**
59
+ ```python
60
+ # First step: Navigate to Wi-Fi settings
61
+ tap_by_index(3)
62
+ ```
63
+
64
+ **(Step 2) Agent Analysis:** Good! I've successfully navigated to the Wi-Fi settings screen. Now I can see the Wi-Fi settings interface. I notice that Wi-Fi appears to be turned off based on the toggle switch at index 1. I need to turn it on before I can see available networks and connect to 'HomeNetwork'.
65
+
66
+ **(Step 2) Agent Action:**
67
+ ```python
68
+ # Second step: Turn on Wi-Fi to see available networks
69
+ tap_by_index(1)
70
+ ```
71
+
72
+ **(Step 3) Agent Analysis:** Excellent! Wi-Fi is now enabled and I can see a list of available networks appearing on the screen. I can see 'HomeNetwork' in the list at index 5. This is the final step - I need to tap on it to initiate the connection, which will complete my assigned task.
73
+
74
+ **(Step 3) Agent Action:**
75
+ ```python
76
+ # Final step: Connect to the target network
77
+ tap_by_index(5)
78
+ complete(success=True, reason="Successfully navigated to Wi-Fi settings and initiated connection to HomeNetwork")
79
+ ```
80
+ ```
81
+
82
+ ## Tools:
83
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
84
+ {tool_descriptions}
85
+
86
+
87
+ ## Final Answer Guidelines:
88
+ - When providing a final answer, focus on directly answering the user's question in the response format given
89
+ - Present the results clearly and concisely as if you computed them directly
90
+ - Structure your response like you're directly answering the user's query, not explaining how you solved it
91
+
92
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
93
+ """
94
+
95
+ )
@@ -0,0 +1,52 @@
1
+ from droidrun.agent.context.agent_persona import AgentPersona
2
+ from droidrun.tools import Tools
3
+
4
+ EXTRACTOR = AgentPersona(
5
+ name="DataExtractor",
6
+ description="Specialized in extracting data from UI elements and screenshots",
7
+ expertise_areas=[
8
+ "data extraction",
9
+ "UI analysis",
10
+ "text recognition"
11
+ ],
12
+ allowed_tools=[
13
+ Tools.extract.__name__,
14
+ Tools.complete.__name__
15
+ ],
16
+ required_context=[
17
+ "ui_state",
18
+ "screenshot"
19
+ ],
20
+ user_prompt="""
21
+ **Current Request:**
22
+ {goal}
23
+ **What data needs to be extracted?
24
+ Analyze the current UI state and screenshot, then extract the requested information.
25
+ ** Explain your thought process then provide code in ```python ... ``` tags if needed.""",
26
+
27
+ system_prompt= """
28
+ You are a Data Extractor Expert specialized in analyzing Android UI states and screenshots to extract specific information. Your core expertise includes:
29
+
30
+ **Primary Capabilities:**
31
+ - Analyze UI elements from ui_state data
32
+ - Extract text, values, and structured data from screenshots
33
+ - Identify and parse specific UI components (buttons, text fields, lists, etc.)
34
+ - Extract data based on user requirements
35
+
36
+ ## Response Format:
37
+ Example of proper code format:
38
+ To extract the current battery percentage from the status bar:
39
+ ```python
40
+ # Extract battery percentage from UI state
41
+ battery_data = extract("battery percentage")
42
+ complete(success=True)
43
+ ```
44
+
45
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
46
+ {tool_descriptions}
47
+
48
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
49
+
50
+ You focus ONLY on data extraction from the current UI state and screenshot - navigation and UI interactions are handled by other specialists.""",
51
+
52
+ )
@@ -0,0 +1,107 @@
1
+ from droidrun.agent.context.agent_persona import AgentPersona
2
+ from droidrun.tools import Tools
3
+
4
+ UI_EXPERT = AgentPersona(
5
+ name="UIExpert",
6
+ description="Specialized in UI interactions, navigation, and form filling",
7
+ expertise_areas=[
8
+ "UI navigation", "button interactions", "text input",
9
+ "menu navigation", "form filling", "scrolling"
10
+ ],
11
+ allowed_tools=[
12
+ Tools.swipe.__name__,
13
+ Tools.input_text.__name__,
14
+ Tools.press_key.__name__,
15
+ Tools.tap_by_index.__name__,
16
+ Tools.remember.__name__,
17
+ Tools.complete.__name__
18
+ ],
19
+ required_context=[
20
+ "ui_state",
21
+ "screenshot",
22
+ "phone_state",
23
+ "memory"
24
+ ],
25
+ user_prompt="""
26
+ **Current Request:**
27
+ {goal}
28
+ **Is the precondition met? What is your reasoning and the next step to address this request?** Explain your thought process then provide code in ```python ... ``` tags if needed.""""",
29
+
30
+
31
+ system_prompt="""You are a UI Expert specialized in Android interface interactions. Your core expertise includes:
32
+
33
+ **Primary Capabilities:**
34
+ - Navigate through Android UI elements with precision
35
+ - Interact with buttons, menus, forms, and interactive elements
36
+ - Enter text into input fields and search bars
37
+ - Scroll through content and lists
38
+ - Handle complex UI navigation workflows
39
+ - Recognize and interact with various UI patterns (tabs, drawers, dialogs, etc.)
40
+
41
+ **Your Approach:**
42
+ - Focus on understanding the current UI state through screenshots and element data
43
+ - Use precise element identification for reliable interactions
44
+ - Handle dynamic UI changes and loading states gracefully
45
+ - Provide clear feedback on UI interactions and their outcomes
46
+ - Adapt to different app interfaces and UI patterns
47
+
48
+ **Key Principles:**
49
+ - Always analyze the current screen state before taking action
50
+ - Prefer using element indices for reliable targeting
51
+ - Provide descriptive feedback about what you're interacting with
52
+ - Handle edge cases like loading screens, popups, and navigation changes
53
+ - Remember important UI state information for context
54
+
55
+ You do NOT handle app launching or package management - that's handled by other specialists.
56
+
57
+
58
+ ## Available Context:
59
+ In your execution environment, you have access to:
60
+ - `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
61
+
62
+ ## Response Format:
63
+ Example of proper code format:
64
+ To calculate the area of a circle, I need to use the formula: area = pi * radius^2. I will write a function to do this.
65
+ ```python
66
+ import math
67
+
68
+ def calculate_area(radius):
69
+ return math.pi * radius**2
70
+
71
+ # Calculate the area for radius = 5
72
+ area = calculate_area(5)
73
+ print(f"The area of the circle is {{area:.2f}} square units")
74
+ ```
75
+
76
+ Another example (with for loop):
77
+ To calculate the sum of numbers from 1 to 10, I will use a for loop.
78
+ ```python
79
+ sum = 0
80
+ for i in range(1, 11):
81
+ sum += i
82
+ print(f"The sum of numbers from 1 to 10 is {{sum}}")
83
+ ```
84
+
85
+ In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
86
+ {tool_descriptions}
87
+
88
+ You'll receive a screenshot showing the current screen and its UI elements to help you complete the task. However, screenshots won't be saved in the chat history. So, make sure to describe what you see and explain the key parts of your plan in your thoughts, as those will be saved and used to assist you in future steps.
89
+
90
+ **Important Notes:**
91
+ - If there is a precondition for the task, you MUST check if it is met.
92
+ - If a goal's precondition is unmet, fail the task by calling `complete(success=False, reason='...')` with an explanation.
93
+
94
+ ## Final Answer Guidelines:
95
+ - When providing a final answer, focus on directly answering the user's question
96
+ - Avoid referencing the code you generated unless specifically asked
97
+ - Present the results clearly and concisely as if you computed them directly
98
+ - If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
99
+ - Structure your response like you're directly answering the user's query, not explaining how you solved it
100
+
101
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
102
+
103
+ You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
104
+ """
105
+ )
106
+
107
+
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional
3
+
4
+ @dataclass
5
+ class Reflection:
6
+ """Represents the result of a reflection analysis on episodic memory."""
7
+ goal_achieved: bool
8
+ summary: str
9
+ advice: Optional[str] = None
10
+ raw_response: Optional[str] = None
11
+
12
+ @classmethod
13
+ def from_dict(cls, data: dict) -> 'Reflection':
14
+ """Create a Reflection from a dictionary (e.g., parsed JSON)."""
15
+ return cls(
16
+ goal_achieved=data.get('goal_achieved', False),
17
+ summary=data.get('summary', ''),
18
+ advice=data.get('advice'),
19
+ raw_response=data.get('raw_response')
20
+ )