droidrun 0.3.9__py3-none-any.whl → 0.3.10.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +2 -3
- droidrun/__main__.py +1 -1
- droidrun/agent/__init__.py +1 -1
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +66 -40
- droidrun/agent/codeact/events.py +6 -3
- droidrun/agent/codeact/prompts.py +2 -2
- droidrun/agent/common/events.py +4 -2
- droidrun/agent/context/__init__.py +1 -3
- droidrun/agent/context/agent_persona.py +2 -1
- droidrun/agent/context/context_injection_manager.py +6 -6
- droidrun/agent/context/episodic_memory.py +5 -3
- droidrun/agent/context/personas/__init__.py +3 -3
- droidrun/agent/context/personas/app_starter.py +3 -3
- droidrun/agent/context/personas/big_agent.py +3 -3
- droidrun/agent/context/personas/default.py +3 -3
- droidrun/agent/context/personas/ui_expert.py +5 -5
- droidrun/agent/context/task_manager.py +15 -17
- droidrun/agent/droid/__init__.py +1 -1
- droidrun/agent/droid/droid_agent.py +327 -180
- droidrun/agent/droid/events.py +91 -9
- droidrun/agent/executor/__init__.py +13 -0
- droidrun/agent/executor/events.py +24 -0
- droidrun/agent/executor/executor_agent.py +327 -0
- droidrun/agent/executor/prompts.py +136 -0
- droidrun/agent/manager/__init__.py +18 -0
- droidrun/agent/manager/events.py +20 -0
- droidrun/agent/manager/manager_agent.py +459 -0
- droidrun/agent/manager/prompts.py +223 -0
- droidrun/agent/oneflows/app_starter_workflow.py +118 -0
- droidrun/agent/oneflows/text_manipulator.py +204 -0
- droidrun/agent/planner/__init__.py +3 -3
- droidrun/agent/planner/events.py +6 -3
- droidrun/agent/planner/planner_agent.py +27 -42
- droidrun/agent/planner/prompts.py +2 -2
- droidrun/agent/usage.py +11 -11
- droidrun/agent/utils/__init__.py +11 -1
- droidrun/agent/utils/async_utils.py +2 -1
- droidrun/agent/utils/chat_utils.py +48 -60
- droidrun/agent/utils/device_state_formatter.py +177 -0
- droidrun/agent/utils/executer.py +12 -11
- droidrun/agent/utils/inference.py +114 -0
- droidrun/agent/utils/llm_picker.py +2 -0
- droidrun/agent/utils/message_utils.py +85 -0
- droidrun/agent/utils/tools.py +220 -0
- droidrun/agent/utils/trajectory.py +8 -7
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +29 -28
- droidrun/cli/main.py +279 -143
- droidrun/config_manager/__init__.py +25 -0
- droidrun/config_manager/config_manager.py +583 -0
- droidrun/macro/__init__.py +2 -2
- droidrun/macro/__main__.py +1 -1
- droidrun/macro/cli.py +36 -34
- droidrun/macro/replay.py +7 -9
- droidrun/portal.py +1 -1
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -4
- droidrun/telemetry/phoenix.py +173 -0
- droidrun/telemetry/tracker.py +7 -5
- droidrun/tools/__init__.py +1 -1
- droidrun/tools/adb.py +210 -82
- droidrun/tools/ios.py +7 -5
- droidrun/tools/tools.py +25 -8
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/METADATA +5 -3
- droidrun-0.3.10.dev3.dist-info/RECORD +70 -0
- droidrun/agent/common/default.py +0 -5
- droidrun/agent/context/reflection.py +0 -20
- droidrun/agent/oneflows/reflector.py +0 -265
- droidrun-0.3.9.dist-info/RECORD +0 -56
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/WHEEL +0 -0
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py
CHANGED
@@ -5,13 +5,12 @@ DroidRun - A framework for controlling Android devices through LLM agents.
|
|
5
5
|
__version__ = "0.3.0"
|
6
6
|
|
7
7
|
# Import main classes for easier access
|
8
|
-
from droidrun.agent.utils.llm_picker import load_llm
|
9
|
-
from droidrun.tools import Tools, AdbTools, IOSTools
|
10
8
|
from droidrun.agent.droid import DroidAgent
|
9
|
+
from droidrun.agent.utils.llm_picker import load_llm
|
11
10
|
|
12
11
|
# Import macro functionality
|
13
12
|
from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
|
14
|
-
|
13
|
+
from droidrun.tools import AdbTools, IOSTools, Tools
|
15
14
|
|
16
15
|
# Make main components available at package level
|
17
16
|
__all__ = [
|
droidrun/__main__.py
CHANGED
droidrun/agent/__init__.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
2
|
-
from droidrun.agent.codeact.prompts import
|
3
|
-
DEFAULT_CODE_ACT_USER_PROMPT,
|
4
|
-
DEFAULT_NO_THOUGHTS_PROMPT
|
5
|
-
)
|
2
|
+
from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
|
6
3
|
|
7
4
|
__all__ = [
|
8
5
|
"CodeActAgent",
|
@@ -1,37 +1,37 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
1
3
|
import logging
|
2
4
|
import re
|
3
5
|
import time
|
4
|
-
import
|
5
|
-
|
6
|
-
import os
|
7
|
-
from typing import List, Optional, Tuple, Union
|
6
|
+
from typing import List, Union
|
7
|
+
|
8
8
|
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
9
|
-
from llama_index.core.prompts import PromptTemplate
|
10
9
|
from llama_index.core.llms.llm import LLM
|
11
|
-
from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
|
12
10
|
from llama_index.core.memory import Memory
|
11
|
+
from llama_index.core.prompts import PromptTemplate
|
12
|
+
from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
|
13
|
+
|
13
14
|
from droidrun.agent.codeact.events import (
|
14
|
-
|
15
|
+
EpisodicMemoryEvent,
|
15
16
|
TaskEndEvent,
|
16
17
|
TaskExecutionEvent,
|
17
18
|
TaskExecutionResultEvent,
|
19
|
+
TaskInputEvent,
|
18
20
|
TaskThinkingEvent,
|
19
|
-
EpisodicMemoryEvent,
|
20
21
|
)
|
21
|
-
from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
|
22
|
-
from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
23
|
-
from droidrun.agent.usage import get_usage_from_response
|
24
|
-
from droidrun.agent.utils import chat_utils
|
25
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
26
22
|
from droidrun.agent.codeact.prompts import (
|
27
23
|
DEFAULT_CODE_ACT_USER_PROMPT,
|
28
24
|
DEFAULT_NO_THOUGHTS_PROMPT,
|
29
25
|
)
|
30
|
-
|
26
|
+
from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
|
27
|
+
from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
28
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
31
29
|
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
30
|
+
from droidrun.agent.usage import get_usage_from_response
|
31
|
+
from droidrun.agent.utils import chat_utils
|
32
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
33
|
+
from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, get_atomic_tool_descriptions, build_custom_tool_descriptions
|
32
34
|
from droidrun.tools import Tools
|
33
|
-
from typing import Optional, Dict, Tuple, List, Any, Callable
|
34
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
35
35
|
|
36
36
|
logger = logging.getLogger("droidrun")
|
37
37
|
|
@@ -49,8 +49,8 @@ class CodeActAgent(Workflow):
|
|
49
49
|
persona: AgentPersona,
|
50
50
|
vision: bool,
|
51
51
|
tools_instance: "Tools",
|
52
|
-
all_tools_list: Dict[str, Callable[..., Any]],
|
53
52
|
max_steps: int = 5,
|
53
|
+
custom_tools: dict = None,
|
54
54
|
debug: bool = False,
|
55
55
|
*args,
|
56
56
|
**kwargs,
|
@@ -78,13 +78,40 @@ class CodeActAgent(Workflow):
|
|
78
78
|
|
79
79
|
self.tools = tools_instance
|
80
80
|
|
81
|
+
# Merge custom_tools with ATOMIC_ACTION_SIGNATURES
|
82
|
+
# Custom tools are treated the same as atomic actions by CodeAct
|
83
|
+
merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
|
84
|
+
|
85
|
+
# Build tool_list from merged signatures
|
81
86
|
self.tool_list = {}
|
87
|
+
for action_name, signature in merged_signatures.items():
|
88
|
+
func = signature["function"]
|
89
|
+
# Create bound function (curry tools_instance as first argument)
|
90
|
+
# Handle both sync and async functions
|
91
|
+
if asyncio.iscoroutinefunction(func):
|
92
|
+
async def make_async_bound(f, ti):
|
93
|
+
async def bound_func(*args, **kwargs):
|
94
|
+
return await f(ti, *args, **kwargs)
|
95
|
+
return bound_func
|
96
|
+
self.tool_list[action_name] = asyncio.run(make_async_bound(func, tools_instance))
|
97
|
+
else:
|
98
|
+
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
|
99
|
+
|
100
|
+
# Add non-atomic tools (remember, complete) from tools_instance
|
101
|
+
self.tool_list["remember"] = tools_instance.remember
|
102
|
+
self.tool_list["complete"] = tools_instance.complete
|
82
103
|
|
83
|
-
|
84
|
-
|
85
|
-
self.tool_list[tool_name] = all_tools_list[tool_name]
|
104
|
+
# Get tool descriptions from ATOMIC_ACTION_SIGNATURES and custom_tools
|
105
|
+
self.tool_descriptions = get_atomic_tool_descriptions()
|
86
106
|
|
87
|
-
|
107
|
+
# Add custom tool descriptions if provided
|
108
|
+
custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
|
109
|
+
if custom_descriptions:
|
110
|
+
self.tool_descriptions += "\n" + custom_descriptions
|
111
|
+
|
112
|
+
# Add descriptions for remember/complete
|
113
|
+
self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
|
114
|
+
self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
|
88
115
|
|
89
116
|
self.system_prompt_content = persona.system_prompt.format(
|
90
117
|
tool_descriptions=self.tool_descriptions
|
@@ -162,7 +189,7 @@ class CodeActAgent(Workflow):
|
|
162
189
|
logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
|
163
190
|
|
164
191
|
model = self.llm.class_name()
|
165
|
-
|
192
|
+
|
166
193
|
if "remember" in self.tool_list and self.remembered_info:
|
167
194
|
await ctx.store.set("remembered_info", self.remembered_info)
|
168
195
|
chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
|
@@ -178,7 +205,7 @@ class CodeActAgent(Workflow):
|
|
178
205
|
logger.warning(
|
179
206
|
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
180
207
|
)
|
181
|
-
elif self.vision
|
208
|
+
elif self.vision: # if vision is enabled, add screenshot to chat history
|
182
209
|
chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
|
183
210
|
|
184
211
|
if context == "ui_state":
|
@@ -190,8 +217,8 @@ class CodeActAgent(Workflow):
|
|
190
217
|
state["a11y_tree"], chat_history
|
191
218
|
)
|
192
219
|
chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
|
193
|
-
except Exception
|
194
|
-
logger.warning(
|
220
|
+
except Exception:
|
221
|
+
logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
|
195
222
|
|
196
223
|
|
197
224
|
if context == "packages":
|
@@ -254,7 +281,7 @@ class CodeActAgent(Workflow):
|
|
254
281
|
"""Execute the code and return the result."""
|
255
282
|
code = ev.code
|
256
283
|
assert code, "Code cannot be empty."
|
257
|
-
logger.info(
|
284
|
+
logger.info("⚡ Executing action...")
|
258
285
|
logger.info(f"Code to execute:\n```python\n{code}\n```")
|
259
286
|
|
260
287
|
try:
|
@@ -269,16 +296,16 @@ class CodeActAgent(Workflow):
|
|
269
296
|
for ui_state in ui_states[:-1]:
|
270
297
|
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
|
271
298
|
|
272
|
-
if self.tools.finished
|
299
|
+
if self.tools.finished:
|
273
300
|
logger.debug(" - Task completed.")
|
274
301
|
event = TaskEndEvent(
|
275
302
|
success=self.tools.success, reason=self.tools.reason
|
276
303
|
)
|
277
304
|
ctx.write_event_to_stream(event)
|
278
305
|
return event
|
279
|
-
|
306
|
+
|
280
307
|
self.remembered_info = self.tools.memory
|
281
|
-
|
308
|
+
|
282
309
|
event = TaskExecutionResultEvent(output=str(result['output']))
|
283
310
|
ctx.write_event_to_stream(event)
|
284
311
|
return event
|
@@ -323,11 +350,11 @@ class CodeActAgent(Workflow):
|
|
323
350
|
"""Finalize the workflow."""
|
324
351
|
self.tools.finished = False
|
325
352
|
await ctx.store.set("chat_memory", self.chat_memory)
|
326
|
-
|
353
|
+
|
327
354
|
# Add final state observation to episodic memory
|
328
355
|
if self.vision:
|
329
356
|
await self._add_final_state_observation(ctx)
|
330
|
-
|
357
|
+
|
331
358
|
result = {}
|
332
359
|
result.update(
|
333
360
|
{
|
@@ -403,7 +430,7 @@ class CodeActAgent(Workflow):
|
|
403
430
|
logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
|
404
431
|
time.sleep(seconds)
|
405
432
|
else:
|
406
|
-
logger.error(
|
433
|
+
logger.error("Rate limit error. Retrying in 5 seconds...")
|
407
434
|
time.sleep(40)
|
408
435
|
logger.debug("🔍 Retrying call to LLM...")
|
409
436
|
response = await self.llm.achat(messages=messages_to_send)
|
@@ -452,26 +479,25 @@ class CodeActAgent(Workflow):
|
|
452
479
|
try:
|
453
480
|
# Get current screenshot and UI state
|
454
481
|
screenshot = None
|
455
|
-
|
456
|
-
|
482
|
+
|
457
483
|
try:
|
458
484
|
_, screenshot_bytes = self.tools.take_screenshot()
|
459
485
|
screenshot = screenshot_bytes
|
460
486
|
except Exception as e:
|
461
487
|
logger.warning(f"Failed to capture final screenshot: {e}")
|
462
|
-
|
488
|
+
|
463
489
|
try:
|
464
490
|
(a11y_tree, phone_state) = self.tools.get_state()
|
465
491
|
except Exception as e:
|
466
492
|
logger.warning(f"Failed to capture final UI state: {e}")
|
467
|
-
|
493
|
+
|
468
494
|
# Create final observation chat history and response
|
469
495
|
final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
|
470
496
|
final_response = {
|
471
|
-
"role": "user",
|
497
|
+
"role": "user",
|
472
498
|
"content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
|
473
499
|
}
|
474
|
-
|
500
|
+
|
475
501
|
# Create final episodic memory step
|
476
502
|
final_step = EpisodicMemoryStep(
|
477
503
|
chat_history=json.dumps(final_chat_history),
|
@@ -479,9 +505,9 @@ class CodeActAgent(Workflow):
|
|
479
505
|
timestamp=time.time(),
|
480
506
|
screenshot=screenshot
|
481
507
|
)
|
482
|
-
|
508
|
+
|
483
509
|
self.episodic_memory.steps.append(final_step)
|
484
510
|
logger.info("Added final state observation to episodic memory")
|
485
|
-
|
511
|
+
|
486
512
|
except Exception as e:
|
487
513
|
logger.error(f"Failed to add final state observation: {e}")
|
droidrun/agent/codeact/events.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
from llama_index.core.llms import ChatMessage
|
2
4
|
from llama_index.core.workflow import Event
|
3
|
-
from typing import Optional
|
4
5
|
|
5
6
|
from droidrun.agent.usage import UsageResult
|
7
|
+
|
6
8
|
from ..context.episodic_memory import EpisodicMemory
|
7
9
|
|
10
|
+
|
8
11
|
class TaskInputEvent(Event):
|
9
12
|
input: list[ChatMessage]
|
10
13
|
|
@@ -12,7 +15,7 @@ class TaskInputEvent(Event):
|
|
12
15
|
|
13
16
|
class TaskThinkingEvent(Event):
|
14
17
|
thoughts: Optional[str] = None
|
15
|
-
code: Optional[str] = None
|
18
|
+
code: Optional[str] = None
|
16
19
|
usage: Optional[UsageResult] = None
|
17
20
|
|
18
21
|
class TaskExecutionEvent(Event):
|
@@ -28,4 +31,4 @@ class TaskEndEvent(Event):
|
|
28
31
|
reason: str
|
29
32
|
|
30
33
|
class EpisodicMemoryEvent(Event):
|
31
|
-
episodic_memory: EpisodicMemory
|
34
|
+
episodic_memory: EpisodicMemory
|
droidrun/agent/common/events.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
from typing import Any, Dict
|
2
|
+
|
1
3
|
from llama_index.core.workflow import Event
|
2
|
-
|
4
|
+
|
3
5
|
|
4
6
|
class ScreenshotEvent(Event):
|
5
7
|
screenshot: bytes
|
@@ -16,7 +18,7 @@ class TapActionEvent(MacroEvent):
|
|
16
18
|
element_index: int = None
|
17
19
|
element_text: str = ""
|
18
20
|
element_bounds: str = ""
|
19
|
-
|
21
|
+
|
20
22
|
class SwipeActionEvent(MacroEvent):
|
21
23
|
"""Event for swipe actions with coordinates"""
|
22
24
|
start_x: int
|
@@ -9,15 +9,13 @@ This module contains:
|
|
9
9
|
from .agent_persona import AgentPersona
|
10
10
|
from .context_injection_manager import ContextInjectionManager
|
11
11
|
from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
12
|
-
from .
|
13
|
-
from .task_manager import TaskManager, Task
|
12
|
+
from .task_manager import Task, TaskManager
|
14
13
|
|
15
14
|
__all__ = [
|
16
15
|
"AgentPersona",
|
17
16
|
"ContextInjectionManager",
|
18
17
|
"EpisodicMemory",
|
19
18
|
"EpisodicMemoryStep",
|
20
|
-
"Reflection",
|
21
19
|
"TaskManager",
|
22
20
|
"Task"
|
23
21
|
]
|
@@ -5,12 +5,12 @@ This module provides the ContextInjectionManager class that manages different ag
|
|
5
5
|
each with specific system prompts, contexts, and tool subsets tailored for specialized tasks.
|
6
6
|
"""
|
7
7
|
|
8
|
-
import logging
|
9
|
-
from typing import Optional, List
|
10
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
11
8
|
#import chromadb
|
12
9
|
import json
|
13
|
-
|
10
|
+
import logging
|
11
|
+
from typing import List, Optional
|
12
|
+
|
13
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
14
14
|
|
15
15
|
logger = logging.getLogger("droidrun")
|
16
16
|
|
@@ -59,8 +59,8 @@ class ContextInjectionManager:
|
|
59
59
|
Returns:
|
60
60
|
AgentPersona instance or None if not found
|
61
61
|
"""
|
62
|
-
|
62
|
+
|
63
63
|
return self.personas.get(agent_type)
|
64
|
-
|
64
|
+
|
65
65
|
def get_all_personas(self) -> List[str]:
|
66
66
|
return self.personas
|
@@ -1,7 +1,9 @@
|
|
1
1
|
from dataclasses import dataclass, field
|
2
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
3
2
|
from typing import List, Optional
|
4
3
|
|
4
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
5
|
+
|
6
|
+
|
5
7
|
@dataclass
|
6
8
|
class EpisodicMemoryStep:
|
7
9
|
chat_history: str
|
@@ -9,7 +11,7 @@ class EpisodicMemoryStep:
|
|
9
11
|
timestamp: float
|
10
12
|
screenshot: Optional[bytes]
|
11
13
|
|
12
|
-
@dataclass
|
14
|
+
@dataclass
|
13
15
|
class EpisodicMemory:
|
14
16
|
persona: AgentPersona
|
15
|
-
steps: List[EpisodicMemoryStep] = field(default_factory=list)
|
17
|
+
steps: List[EpisodicMemoryStep] = field(default_factory=list)
|
@@ -1,11 +1,11 @@
|
|
1
|
-
from .default import DEFAULT
|
2
|
-
from .ui_expert import UI_EXPERT
|
3
1
|
from .app_starter import APP_STARTER_EXPERT
|
4
2
|
from .big_agent import BIG_AGENT
|
3
|
+
from .default import DEFAULT
|
4
|
+
from .ui_expert import UI_EXPERT
|
5
5
|
|
6
6
|
__all__ = [
|
7
7
|
'DEFAULT',
|
8
8
|
'UI_EXPERT',
|
9
9
|
'APP_STARTER_EXPERT',
|
10
10
|
'BIG_AGENT',
|
11
|
-
]
|
11
|
+
]
|
@@ -2,7 +2,7 @@ from droidrun.agent.context.agent_persona import AgentPersona
|
|
2
2
|
from droidrun.tools import Tools
|
3
3
|
|
4
4
|
APP_STARTER_EXPERT = AgentPersona(
|
5
|
-
name="AppStarterExpert",
|
5
|
+
name="AppStarterExpert",
|
6
6
|
description="Specialized in app launching",
|
7
7
|
expertise_areas=[
|
8
8
|
"app launching"
|
@@ -37,8 +37,8 @@ APP_STARTER_EXPERT = AgentPersona(
|
|
37
37
|
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
38
38
|
{tool_descriptions}
|
39
39
|
|
40
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
40
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
41
41
|
|
42
42
|
You focus ONLY on app launching and package management - UI interactions within apps are handled by UI specialists.""",
|
43
43
|
|
44
|
-
)
|
44
|
+
)
|
@@ -5,7 +5,7 @@ BIG_AGENT = AgentPersona(
|
|
5
5
|
name="Big Agent",
|
6
6
|
description="Big Agent. Use this as your Big Agent",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling", "app launching"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
@@ -90,7 +90,7 @@ BIG_AGENT = AgentPersona(
|
|
90
90
|
- Present the results clearly and concisely as if you computed them directly
|
91
91
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
92
92
|
|
93
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
93
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
94
94
|
"""
|
95
95
|
|
96
|
-
)
|
96
|
+
)
|
@@ -5,7 +5,7 @@ DEFAULT = AgentPersona(
|
|
5
5
|
name="Default",
|
6
6
|
description="Default Agent. Use this as your Default",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling", "app launching"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
@@ -89,7 +89,7 @@ DEFAULT = AgentPersona(
|
|
89
89
|
- Present the results clearly and concisely as if you computed them directly
|
90
90
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
91
91
|
|
92
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
92
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
93
93
|
"""
|
94
94
|
|
95
|
-
)
|
95
|
+
)
|
@@ -5,14 +5,14 @@ UI_EXPERT = AgentPersona(
|
|
5
5
|
name="UIExpert",
|
6
6
|
description="Specialized in UI interactions, navigation, and form filling",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
12
12
|
Tools.swipe.__name__,
|
13
13
|
Tools.input_text.__name__,
|
14
14
|
Tools.press_key.__name__,
|
15
|
-
Tools.tap_by_index.__name__,
|
15
|
+
Tools.tap_by_index.__name__,
|
16
16
|
Tools.drag.__name__,
|
17
17
|
Tools.remember.__name__,
|
18
18
|
Tools.complete.__name__
|
@@ -54,8 +54,8 @@ UI_EXPERT = AgentPersona(
|
|
54
54
|
- Remember important UI state information for context
|
55
55
|
|
56
56
|
You do NOT handle app launching or package management - that's handled by other specialists.
|
57
|
-
|
58
|
-
|
57
|
+
|
58
|
+
|
59
59
|
## Available Context:
|
60
60
|
In your execution environment, you have access to:
|
61
61
|
- `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
|
@@ -99,7 +99,7 @@ UI_EXPERT = AgentPersona(
|
|
99
99
|
- If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
|
100
100
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
101
101
|
|
102
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
102
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
103
103
|
|
104
104
|
You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
|
105
105
|
"""
|
@@ -1,7 +1,8 @@
|
|
1
|
+
import copy
|
1
2
|
import os
|
2
|
-
from typing import List, Dict, Optional
|
3
3
|
from dataclasses import dataclass
|
4
|
-
import
|
4
|
+
from typing import Dict, List, Optional
|
5
|
+
|
5
6
|
|
6
7
|
@dataclass
|
7
8
|
class Task:
|
@@ -14,13 +15,13 @@ class Task:
|
|
14
15
|
# Optional fields to carry success/failure context back to the planner
|
15
16
|
message: Optional[str] = None
|
16
17
|
failure_reason: Optional[str] = None
|
17
|
-
|
18
|
+
|
18
19
|
|
19
20
|
class TaskManager:
|
20
21
|
"""
|
21
22
|
Manages a list of tasks for an agent, each with a status and assigned specialized agent.
|
22
23
|
"""
|
23
|
-
STATUS_PENDING = "pending"
|
24
|
+
STATUS_PENDING = "pending"
|
24
25
|
STATUS_COMPLETED = "completed"
|
25
26
|
STATUS_FAILED = "failed"
|
26
27
|
|
@@ -32,14 +33,14 @@ class TaskManager:
|
|
32
33
|
def __init__(self):
|
33
34
|
"""Initializes an empty task list."""
|
34
35
|
self.tasks: List[Task] = []
|
35
|
-
self.goal_completed = False
|
36
|
+
self.goal_completed = False
|
36
37
|
self.message = None
|
37
|
-
self.task_history = []
|
38
|
+
self.task_history = []
|
38
39
|
self.file_path = os.path.join(os.path.dirname(__file__), "todo.txt")
|
39
40
|
|
40
41
|
def get_all_tasks(self) -> List[Task]:
|
41
42
|
return self.tasks
|
42
|
-
|
43
|
+
|
43
44
|
def get_task_history(self):
|
44
45
|
return self.task_history
|
45
46
|
|
@@ -67,9 +68,6 @@ class TaskManager:
|
|
67
68
|
|
68
69
|
def get_failed_tasks(self) -> list[dict]:
|
69
70
|
return [task for task in self.task_history if task.status == self.STATUS_FAILED]
|
70
|
-
|
71
|
-
def get_task_history(self) -> list[dict]:
|
72
|
-
return self.task_history
|
73
71
|
|
74
72
|
|
75
73
|
def save_to_file(self):
|
@@ -89,12 +87,12 @@ class TaskManager:
|
|
89
87
|
def set_tasks_with_agents(self, task_assignments: List[Dict[str, str]]):
|
90
88
|
"""
|
91
89
|
Clears the current task list and sets new tasks with their assigned agents.
|
92
|
-
|
90
|
+
|
93
91
|
Args:
|
94
92
|
task_assignments: A list of dictionaries, each containing:
|
95
93
|
- 'task': The task description string
|
96
94
|
- 'agent': The agent type
|
97
|
-
|
95
|
+
|
98
96
|
Example:
|
99
97
|
task_manager.set_tasks_with_agents([
|
100
98
|
{'task': 'Open Gmail app', 'agent': 'AppStarterExpert'},
|
@@ -106,21 +104,21 @@ class TaskManager:
|
|
106
104
|
for i, assignment in enumerate(task_assignments):
|
107
105
|
if not isinstance(assignment, dict) or 'task' not in assignment:
|
108
106
|
raise ValueError(f"Each task assignment must be a dictionary with 'task' key at index {i}.")
|
109
|
-
|
107
|
+
|
110
108
|
task_description = assignment['task']
|
111
109
|
if not isinstance(task_description, str) or not task_description.strip():
|
112
110
|
raise ValueError(f"Task description must be a non-empty string at index {i}.")
|
113
|
-
|
111
|
+
|
114
112
|
agent_type = assignment.get('agent', 'Default')
|
115
|
-
|
113
|
+
|
116
114
|
task_obj = Task(
|
117
115
|
description=task_description.strip(),
|
118
116
|
status=self.STATUS_PENDING,
|
119
117
|
agent_type=agent_type
|
120
118
|
)
|
121
|
-
|
119
|
+
|
122
120
|
self.tasks.append(task_obj)
|
123
|
-
|
121
|
+
|
124
122
|
print(f"Tasks set with agents: {len(self.tasks)} tasks added.")
|
125
123
|
self.save_to_file()
|
126
124
|
except Exception as e:
|
droidrun/agent/droid/__init__.py
CHANGED