droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +2 -3
- droidrun/__main__.py +1 -1
- droidrun/agent/__init__.py +1 -1
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +112 -48
- droidrun/agent/codeact/events.py +6 -3
- droidrun/agent/codeact/prompts.py +2 -2
- droidrun/agent/common/constants.py +2 -0
- droidrun/agent/common/events.py +5 -3
- droidrun/agent/context/__init__.py +1 -3
- droidrun/agent/context/agent_persona.py +2 -1
- droidrun/agent/context/context_injection_manager.py +6 -6
- droidrun/agent/context/episodic_memory.py +5 -3
- droidrun/agent/context/personas/__init__.py +3 -3
- droidrun/agent/context/personas/app_starter.py +3 -3
- droidrun/agent/context/personas/big_agent.py +3 -3
- droidrun/agent/context/personas/default.py +3 -3
- droidrun/agent/context/personas/ui_expert.py +5 -5
- droidrun/agent/context/task_manager.py +15 -17
- droidrun/agent/droid/__init__.py +1 -1
- droidrun/agent/droid/droid_agent.py +327 -182
- droidrun/agent/droid/events.py +91 -9
- droidrun/agent/executor/__init__.py +13 -0
- droidrun/agent/executor/events.py +24 -0
- droidrun/agent/executor/executor_agent.py +327 -0
- droidrun/agent/executor/prompts.py +136 -0
- droidrun/agent/manager/__init__.py +18 -0
- droidrun/agent/manager/events.py +20 -0
- droidrun/agent/manager/manager_agent.py +459 -0
- droidrun/agent/manager/prompts.py +223 -0
- droidrun/agent/oneflows/app_starter_workflow.py +118 -0
- droidrun/agent/oneflows/text_manipulator.py +204 -0
- droidrun/agent/planner/__init__.py +3 -3
- droidrun/agent/planner/events.py +6 -3
- droidrun/agent/planner/planner_agent.py +60 -53
- droidrun/agent/planner/prompts.py +2 -2
- droidrun/agent/usage.py +15 -13
- droidrun/agent/utils/__init__.py +11 -1
- droidrun/agent/utils/async_utils.py +2 -1
- droidrun/agent/utils/chat_utils.py +48 -60
- droidrun/agent/utils/device_state_formatter.py +177 -0
- droidrun/agent/utils/executer.py +13 -12
- droidrun/agent/utils/inference.py +114 -0
- droidrun/agent/utils/llm_picker.py +2 -0
- droidrun/agent/utils/message_utils.py +85 -0
- droidrun/agent/utils/tools.py +220 -0
- droidrun/agent/utils/trajectory.py +8 -7
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +29 -28
- droidrun/cli/main.py +279 -143
- droidrun/config_manager/__init__.py +25 -0
- droidrun/config_manager/config_manager.py +583 -0
- droidrun/macro/__init__.py +2 -2
- droidrun/macro/__main__.py +1 -1
- droidrun/macro/cli.py +36 -34
- droidrun/macro/replay.py +7 -9
- droidrun/portal.py +1 -1
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -4
- droidrun/telemetry/phoenix.py +173 -0
- droidrun/telemetry/tracker.py +7 -5
- droidrun/tools/__init__.py +1 -1
- droidrun/tools/adb.py +210 -82
- droidrun/tools/ios.py +7 -5
- droidrun/tools/tools.py +25 -8
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
- droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
- droidrun/agent/common/default.py +0 -5
- droidrun/agent/context/reflection.py +0 -20
- droidrun/agent/oneflows/reflector.py +0 -265
- droidrun-0.3.8.dist-info/RECORD +0 -55
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py
CHANGED
@@ -5,13 +5,12 @@ DroidRun - A framework for controlling Android devices through LLM agents.
|
|
5
5
|
__version__ = "0.3.0"
|
6
6
|
|
7
7
|
# Import main classes for easier access
|
8
|
-
from droidrun.agent.utils.llm_picker import load_llm
|
9
|
-
from droidrun.tools import Tools, AdbTools, IOSTools
|
10
8
|
from droidrun.agent.droid import DroidAgent
|
9
|
+
from droidrun.agent.utils.llm_picker import load_llm
|
11
10
|
|
12
11
|
# Import macro functionality
|
13
12
|
from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
|
14
|
-
|
13
|
+
from droidrun.tools import AdbTools, IOSTools, Tools
|
15
14
|
|
16
15
|
# Make main components available at package level
|
17
16
|
__all__ = [
|
droidrun/__main__.py
CHANGED
droidrun/agent/__init__.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
from droidrun.agent.codeact.codeact_agent import CodeActAgent
|
2
|
-
from droidrun.agent.codeact.prompts import
|
3
|
-
DEFAULT_CODE_ACT_USER_PROMPT,
|
4
|
-
DEFAULT_NO_THOUGHTS_PROMPT
|
5
|
-
)
|
2
|
+
from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
|
6
3
|
|
7
4
|
__all__ = [
|
8
5
|
"CodeActAgent",
|
@@ -1,36 +1,37 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
1
3
|
import logging
|
2
4
|
import re
|
3
5
|
import time
|
4
|
-
import
|
5
|
-
|
6
|
-
import os
|
7
|
-
from typing import List, Optional, Tuple, Union
|
6
|
+
from typing import List, Union
|
7
|
+
|
8
8
|
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
9
|
-
from llama_index.core.prompts import PromptTemplate
|
10
9
|
from llama_index.core.llms.llm import LLM
|
11
|
-
from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
|
12
10
|
from llama_index.core.memory import Memory
|
11
|
+
from llama_index.core.prompts import PromptTemplate
|
12
|
+
from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
|
13
|
+
|
13
14
|
from droidrun.agent.codeact.events import (
|
14
|
-
|
15
|
+
EpisodicMemoryEvent,
|
15
16
|
TaskEndEvent,
|
16
17
|
TaskExecutionEvent,
|
17
18
|
TaskExecutionResultEvent,
|
19
|
+
TaskInputEvent,
|
18
20
|
TaskThinkingEvent,
|
19
|
-
EpisodicMemoryEvent,
|
20
21
|
)
|
21
|
-
from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
|
22
|
-
from droidrun.agent.usage import get_usage_from_response
|
23
|
-
from droidrun.agent.utils import chat_utils
|
24
|
-
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
25
22
|
from droidrun.agent.codeact.prompts import (
|
26
23
|
DEFAULT_CODE_ACT_USER_PROMPT,
|
27
24
|
DEFAULT_NO_THOUGHTS_PROMPT,
|
28
25
|
)
|
29
|
-
|
26
|
+
from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
|
27
|
+
from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
|
28
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
30
29
|
from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
30
|
+
from droidrun.agent.usage import get_usage_from_response
|
31
|
+
from droidrun.agent.utils import chat_utils
|
32
|
+
from droidrun.agent.utils.executer import SimpleCodeExecutor
|
33
|
+
from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, get_atomic_tool_descriptions, build_custom_tool_descriptions
|
31
34
|
from droidrun.tools import Tools
|
32
|
-
from typing import Optional, Dict, Tuple, List, Any, Callable
|
33
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
34
35
|
|
35
36
|
logger = logging.getLogger("droidrun")
|
36
37
|
|
@@ -48,8 +49,8 @@ class CodeActAgent(Workflow):
|
|
48
49
|
persona: AgentPersona,
|
49
50
|
vision: bool,
|
50
51
|
tools_instance: "Tools",
|
51
|
-
all_tools_list: Dict[str, Callable[..., Any]],
|
52
52
|
max_steps: int = 5,
|
53
|
+
custom_tools: dict = None,
|
53
54
|
debug: bool = False,
|
54
55
|
*args,
|
55
56
|
**kwargs,
|
@@ -77,13 +78,40 @@ class CodeActAgent(Workflow):
|
|
77
78
|
|
78
79
|
self.tools = tools_instance
|
79
80
|
|
81
|
+
# Merge custom_tools with ATOMIC_ACTION_SIGNATURES
|
82
|
+
# Custom tools are treated the same as atomic actions by CodeAct
|
83
|
+
merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
|
84
|
+
|
85
|
+
# Build tool_list from merged signatures
|
80
86
|
self.tool_list = {}
|
87
|
+
for action_name, signature in merged_signatures.items():
|
88
|
+
func = signature["function"]
|
89
|
+
# Create bound function (curry tools_instance as first argument)
|
90
|
+
# Handle both sync and async functions
|
91
|
+
if asyncio.iscoroutinefunction(func):
|
92
|
+
async def make_async_bound(f, ti):
|
93
|
+
async def bound_func(*args, **kwargs):
|
94
|
+
return await f(ti, *args, **kwargs)
|
95
|
+
return bound_func
|
96
|
+
self.tool_list[action_name] = asyncio.run(make_async_bound(func, tools_instance))
|
97
|
+
else:
|
98
|
+
self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
|
81
99
|
|
82
|
-
|
83
|
-
|
84
|
-
|
100
|
+
# Add non-atomic tools (remember, complete) from tools_instance
|
101
|
+
self.tool_list["remember"] = tools_instance.remember
|
102
|
+
self.tool_list["complete"] = tools_instance.complete
|
85
103
|
|
86
|
-
|
104
|
+
# Get tool descriptions from ATOMIC_ACTION_SIGNATURES and custom_tools
|
105
|
+
self.tool_descriptions = get_atomic_tool_descriptions()
|
106
|
+
|
107
|
+
# Add custom tool descriptions if provided
|
108
|
+
custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
|
109
|
+
if custom_descriptions:
|
110
|
+
self.tool_descriptions += "\n" + custom_descriptions
|
111
|
+
|
112
|
+
# Add descriptions for remember/complete
|
113
|
+
self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
|
114
|
+
self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
|
87
115
|
|
88
116
|
self.system_prompt_content = persona.system_prompt.format(
|
89
117
|
tool_descriptions=self.tool_descriptions
|
@@ -109,7 +137,8 @@ class CodeActAgent(Workflow):
|
|
109
137
|
"""Prepare chat history from user input."""
|
110
138
|
logger.info("💬 Preparing chat for task execution...")
|
111
139
|
|
112
|
-
|
140
|
+
|
141
|
+
self.chat_memory: Memory = await ctx.store.get(
|
113
142
|
"chat_memory", default=Memory.from_defaults()
|
114
143
|
)
|
115
144
|
|
@@ -135,7 +164,7 @@ class CodeActAgent(Workflow):
|
|
135
164
|
|
136
165
|
await self.chat_memory.aput(self.user_message)
|
137
166
|
|
138
|
-
await ctx.set("chat_memory", self.chat_memory)
|
167
|
+
await ctx.store.set("chat_memory", self.chat_memory)
|
139
168
|
input_messages = self.chat_memory.get_all()
|
140
169
|
return TaskInputEvent(input=input_messages)
|
141
170
|
|
@@ -160,9 +189,9 @@ class CodeActAgent(Workflow):
|
|
160
189
|
logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
|
161
190
|
|
162
191
|
model = self.llm.class_name()
|
163
|
-
|
192
|
+
|
164
193
|
if "remember" in self.tool_list and self.remembered_info:
|
165
|
-
await ctx.set("remembered_info", self.remembered_info)
|
194
|
+
await ctx.store.set("remembered_info", self.remembered_info)
|
166
195
|
chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
|
167
196
|
|
168
197
|
for context in self.required_context:
|
@@ -171,25 +200,25 @@ class CodeActAgent(Workflow):
|
|
171
200
|
screenshot = (self.tools.take_screenshot())[1]
|
172
201
|
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
173
202
|
|
174
|
-
await ctx.set("screenshot", screenshot)
|
203
|
+
await ctx.store.set("screenshot", screenshot)
|
175
204
|
if model == "DeepSeek":
|
176
205
|
logger.warning(
|
177
206
|
"[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
|
178
207
|
)
|
179
|
-
elif self.vision
|
208
|
+
elif self.vision: # if vision is enabled, add screenshot to chat history
|
180
209
|
chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
|
181
210
|
|
182
211
|
if context == "ui_state":
|
183
212
|
try:
|
184
213
|
state = self.tools.get_state()
|
185
|
-
await ctx.set("ui_state", state["a11y_tree"])
|
214
|
+
await ctx.store.set("ui_state", state["a11y_tree"])
|
186
215
|
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
|
187
216
|
chat_history = await chat_utils.add_ui_text_block(
|
188
217
|
state["a11y_tree"], chat_history
|
189
218
|
)
|
190
219
|
chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
|
191
|
-
except Exception
|
192
|
-
logger.warning(
|
220
|
+
except Exception:
|
221
|
+
logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
|
193
222
|
|
194
223
|
|
195
224
|
if context == "packages":
|
@@ -252,7 +281,7 @@ class CodeActAgent(Workflow):
|
|
252
281
|
"""Execute the code and return the result."""
|
253
282
|
code = ev.code
|
254
283
|
assert code, "Code cannot be empty."
|
255
|
-
logger.info(
|
284
|
+
logger.info("⚡ Executing action...")
|
256
285
|
logger.info(f"Code to execute:\n```python\n{code}\n```")
|
257
286
|
|
258
287
|
try:
|
@@ -267,16 +296,16 @@ class CodeActAgent(Workflow):
|
|
267
296
|
for ui_state in ui_states[:-1]:
|
268
297
|
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
|
269
298
|
|
270
|
-
if self.tools.finished
|
299
|
+
if self.tools.finished:
|
271
300
|
logger.debug(" - Task completed.")
|
272
301
|
event = TaskEndEvent(
|
273
302
|
success=self.tools.success, reason=self.tools.reason
|
274
303
|
)
|
275
304
|
ctx.write_event_to_stream(event)
|
276
305
|
return event
|
277
|
-
|
306
|
+
|
278
307
|
self.remembered_info = self.tools.memory
|
279
|
-
|
308
|
+
|
280
309
|
event = TaskExecutionResultEvent(output=str(result['output']))
|
281
310
|
ctx.write_event_to_stream(event)
|
282
311
|
return event
|
@@ -320,12 +349,12 @@ class CodeActAgent(Workflow):
|
|
320
349
|
async def finalize(self, ev: TaskEndEvent, ctx: Context) -> StopEvent:
|
321
350
|
"""Finalize the workflow."""
|
322
351
|
self.tools.finished = False
|
323
|
-
await ctx.set("chat_memory", self.chat_memory)
|
324
|
-
|
352
|
+
await ctx.store.set("chat_memory", self.chat_memory)
|
353
|
+
|
325
354
|
# Add final state observation to episodic memory
|
326
355
|
if self.vision:
|
327
356
|
await self._add_final_state_observation(ctx)
|
328
|
-
|
357
|
+
|
329
358
|
result = {}
|
330
359
|
result.update(
|
331
360
|
{
|
@@ -347,14 +376,15 @@ class CodeActAgent(Workflow):
|
|
347
376
|
self, ctx: Context, chat_history: List[ChatMessage]
|
348
377
|
) -> ChatResponse | None:
|
349
378
|
logger.debug("🔍 Getting LLM response...")
|
350
|
-
|
379
|
+
limited_history = self._limit_history(chat_history)
|
380
|
+
messages_to_send = [self.system_prompt] + limited_history
|
351
381
|
messages_to_send = [chat_utils.message_copy(msg) for msg in messages_to_send]
|
352
382
|
try:
|
353
383
|
response = await self.llm.achat(messages=messages_to_send)
|
354
384
|
logger.debug("🔍 Received LLM response.")
|
355
385
|
|
356
386
|
filtered_chat_history = []
|
357
|
-
for msg in
|
387
|
+
for msg in limited_history:
|
358
388
|
filtered_msg = chat_utils.message_copy(msg)
|
359
389
|
if hasattr(filtered_msg, "blocks") and filtered_msg.blocks:
|
360
390
|
filtered_msg.blocks = [
|
@@ -379,9 +409,10 @@ class CodeActAgent(Workflow):
|
|
379
409
|
chat_history=chat_history_str,
|
380
410
|
response=response_str,
|
381
411
|
timestamp=time.time(),
|
382
|
-
screenshot=(await ctx.get("screenshot", None))
|
412
|
+
screenshot=(await ctx.store.get("screenshot", None))
|
383
413
|
)
|
384
414
|
|
415
|
+
|
385
416
|
self.episodic_memory.steps.append(step)
|
386
417
|
|
387
418
|
assert hasattr(
|
@@ -399,41 +430,74 @@ class CodeActAgent(Workflow):
|
|
399
430
|
logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
|
400
431
|
time.sleep(seconds)
|
401
432
|
else:
|
402
|
-
logger.error(
|
433
|
+
logger.error("Rate limit error. Retrying in 5 seconds...")
|
403
434
|
time.sleep(40)
|
404
435
|
logger.debug("🔍 Retrying call to LLM...")
|
405
436
|
response = await self.llm.achat(messages=messages_to_send)
|
437
|
+
elif (
|
438
|
+
self.llm.class_name() == "Anthropic_LLM"
|
439
|
+
and "overloaded_error" in str(e)
|
440
|
+
):
|
441
|
+
# Use exponential backoff for Anthropic errors
|
442
|
+
if not hasattr(self, '_anthropic_retry_count'):
|
443
|
+
self._anthropic_retry_count = 0
|
444
|
+
self._anthropic_retry_count += 1
|
445
|
+
seconds = min(2 ** self._anthropic_retry_count, 60) # Cap at 60 seconds
|
446
|
+
logger.error(f"Anthropic overload error. Retrying in {seconds} seconds... (attempt {self._anthropic_retry_count})")
|
447
|
+
time.sleep(seconds)
|
448
|
+
logger.debug("🔍 Retrying call to LLM...")
|
449
|
+
response = await self.llm.achat(messages=messages_to_send)
|
450
|
+
self._anthropic_retry_count = 0 # Reset on success
|
406
451
|
else:
|
407
452
|
logger.error(f"Could not get an answer from LLM: {repr(e)}")
|
408
453
|
raise e
|
409
454
|
logger.debug(" - Received response from LLM.")
|
410
455
|
return response
|
411
456
|
|
457
|
+
def _limit_history(
|
458
|
+
self, chat_history: List[ChatMessage]
|
459
|
+
) -> List[ChatMessage]:
|
460
|
+
if LLM_HISTORY_LIMIT <= 0:
|
461
|
+
return chat_history
|
462
|
+
|
463
|
+
max_messages = LLM_HISTORY_LIMIT * 2
|
464
|
+
if len(chat_history) <= max_messages:
|
465
|
+
return chat_history
|
466
|
+
|
467
|
+
preserved_head: List[ChatMessage] = []
|
468
|
+
if chat_history and chat_history[0].role == "user":
|
469
|
+
preserved_head = [chat_history[0]]
|
470
|
+
|
471
|
+
tail = chat_history[-max_messages:]
|
472
|
+
if preserved_head and preserved_head[0] in tail:
|
473
|
+
preserved_head = []
|
474
|
+
|
475
|
+
return preserved_head + tail
|
476
|
+
|
412
477
|
async def _add_final_state_observation(self, ctx: Context) -> None:
|
413
478
|
"""Add the current UI state and screenshot as the final observation step."""
|
414
479
|
try:
|
415
480
|
# Get current screenshot and UI state
|
416
481
|
screenshot = None
|
417
|
-
|
418
|
-
|
482
|
+
|
419
483
|
try:
|
420
484
|
_, screenshot_bytes = self.tools.take_screenshot()
|
421
485
|
screenshot = screenshot_bytes
|
422
486
|
except Exception as e:
|
423
487
|
logger.warning(f"Failed to capture final screenshot: {e}")
|
424
|
-
|
488
|
+
|
425
489
|
try:
|
426
490
|
(a11y_tree, phone_state) = self.tools.get_state()
|
427
491
|
except Exception as e:
|
428
492
|
logger.warning(f"Failed to capture final UI state: {e}")
|
429
|
-
|
493
|
+
|
430
494
|
# Create final observation chat history and response
|
431
495
|
final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
|
432
496
|
final_response = {
|
433
|
-
"role": "user",
|
497
|
+
"role": "user",
|
434
498
|
"content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
|
435
499
|
}
|
436
|
-
|
500
|
+
|
437
501
|
# Create final episodic memory step
|
438
502
|
final_step = EpisodicMemoryStep(
|
439
503
|
chat_history=json.dumps(final_chat_history),
|
@@ -441,9 +505,9 @@ class CodeActAgent(Workflow):
|
|
441
505
|
timestamp=time.time(),
|
442
506
|
screenshot=screenshot
|
443
507
|
)
|
444
|
-
|
508
|
+
|
445
509
|
self.episodic_memory.steps.append(final_step)
|
446
510
|
logger.info("Added final state observation to episodic memory")
|
447
|
-
|
511
|
+
|
448
512
|
except Exception as e:
|
449
513
|
logger.error(f"Failed to add final state observation: {e}")
|
droidrun/agent/codeact/events.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
from llama_index.core.llms import ChatMessage
|
2
4
|
from llama_index.core.workflow import Event
|
3
|
-
from typing import Optional
|
4
5
|
|
5
6
|
from droidrun.agent.usage import UsageResult
|
7
|
+
|
6
8
|
from ..context.episodic_memory import EpisodicMemory
|
7
9
|
|
10
|
+
|
8
11
|
class TaskInputEvent(Event):
|
9
12
|
input: list[ChatMessage]
|
10
13
|
|
@@ -12,7 +15,7 @@ class TaskInputEvent(Event):
|
|
12
15
|
|
13
16
|
class TaskThinkingEvent(Event):
|
14
17
|
thoughts: Optional[str] = None
|
15
|
-
code: Optional[str] = None
|
18
|
+
code: Optional[str] = None
|
16
19
|
usage: Optional[UsageResult] = None
|
17
20
|
|
18
21
|
class TaskExecutionEvent(Event):
|
@@ -28,4 +31,4 @@ class TaskEndEvent(Event):
|
|
28
31
|
reason: str
|
29
32
|
|
30
33
|
class EpisodicMemoryEvent(Event):
|
31
|
-
episodic_memory: EpisodicMemory
|
34
|
+
episodic_memory: EpisodicMemory
|
droidrun/agent/common/events.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
from typing import Any, Dict
|
2
|
+
|
1
3
|
from llama_index.core.workflow import Event
|
2
|
-
|
4
|
+
|
3
5
|
|
4
6
|
class ScreenshotEvent(Event):
|
5
7
|
screenshot: bytes
|
@@ -16,7 +18,7 @@ class TapActionEvent(MacroEvent):
|
|
16
18
|
element_index: int = None
|
17
19
|
element_text: str = ""
|
18
20
|
element_bounds: str = ""
|
19
|
-
|
21
|
+
|
20
22
|
class SwipeActionEvent(MacroEvent):
|
21
23
|
"""Event for swipe actions with coordinates"""
|
22
24
|
start_x: int
|
@@ -48,4 +50,4 @@ class StartAppEvent(MacroEvent):
|
|
48
50
|
activity: str = None
|
49
51
|
|
50
52
|
class RecordUIStateEvent(Event):
|
51
|
-
ui_state: list[Dict[str, Any]]
|
53
|
+
ui_state: list[Dict[str, Any]]
|
@@ -9,15 +9,13 @@ This module contains:
|
|
9
9
|
from .agent_persona import AgentPersona
|
10
10
|
from .context_injection_manager import ContextInjectionManager
|
11
11
|
from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
|
12
|
-
from .
|
13
|
-
from .task_manager import TaskManager, Task
|
12
|
+
from .task_manager import Task, TaskManager
|
14
13
|
|
15
14
|
__all__ = [
|
16
15
|
"AgentPersona",
|
17
16
|
"ContextInjectionManager",
|
18
17
|
"EpisodicMemory",
|
19
18
|
"EpisodicMemoryStep",
|
20
|
-
"Reflection",
|
21
19
|
"TaskManager",
|
22
20
|
"Task"
|
23
21
|
]
|
@@ -5,12 +5,12 @@ This module provides the ContextInjectionManager class that manages different ag
|
|
5
5
|
each with specific system prompts, contexts, and tool subsets tailored for specialized tasks.
|
6
6
|
"""
|
7
7
|
|
8
|
-
import logging
|
9
|
-
from typing import Optional, List
|
10
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
11
8
|
#import chromadb
|
12
9
|
import json
|
13
|
-
|
10
|
+
import logging
|
11
|
+
from typing import List, Optional
|
12
|
+
|
13
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
14
14
|
|
15
15
|
logger = logging.getLogger("droidrun")
|
16
16
|
|
@@ -59,8 +59,8 @@ class ContextInjectionManager:
|
|
59
59
|
Returns:
|
60
60
|
AgentPersona instance or None if not found
|
61
61
|
"""
|
62
|
-
|
62
|
+
|
63
63
|
return self.personas.get(agent_type)
|
64
|
-
|
64
|
+
|
65
65
|
def get_all_personas(self) -> List[str]:
|
66
66
|
return self.personas
|
@@ -1,7 +1,9 @@
|
|
1
1
|
from dataclasses import dataclass, field
|
2
|
-
from droidrun.agent.context.agent_persona import AgentPersona
|
3
2
|
from typing import List, Optional
|
4
3
|
|
4
|
+
from droidrun.agent.context.agent_persona import AgentPersona
|
5
|
+
|
6
|
+
|
5
7
|
@dataclass
|
6
8
|
class EpisodicMemoryStep:
|
7
9
|
chat_history: str
|
@@ -9,7 +11,7 @@ class EpisodicMemoryStep:
|
|
9
11
|
timestamp: float
|
10
12
|
screenshot: Optional[bytes]
|
11
13
|
|
12
|
-
@dataclass
|
14
|
+
@dataclass
|
13
15
|
class EpisodicMemory:
|
14
16
|
persona: AgentPersona
|
15
|
-
steps: List[EpisodicMemoryStep] = field(default_factory=list)
|
17
|
+
steps: List[EpisodicMemoryStep] = field(default_factory=list)
|
@@ -1,11 +1,11 @@
|
|
1
|
-
from .default import DEFAULT
|
2
|
-
from .ui_expert import UI_EXPERT
|
3
1
|
from .app_starter import APP_STARTER_EXPERT
|
4
2
|
from .big_agent import BIG_AGENT
|
3
|
+
from .default import DEFAULT
|
4
|
+
from .ui_expert import UI_EXPERT
|
5
5
|
|
6
6
|
__all__ = [
|
7
7
|
'DEFAULT',
|
8
8
|
'UI_EXPERT',
|
9
9
|
'APP_STARTER_EXPERT',
|
10
10
|
'BIG_AGENT',
|
11
|
-
]
|
11
|
+
]
|
@@ -2,7 +2,7 @@ from droidrun.agent.context.agent_persona import AgentPersona
|
|
2
2
|
from droidrun.tools import Tools
|
3
3
|
|
4
4
|
APP_STARTER_EXPERT = AgentPersona(
|
5
|
-
name="AppStarterExpert",
|
5
|
+
name="AppStarterExpert",
|
6
6
|
description="Specialized in app launching",
|
7
7
|
expertise_areas=[
|
8
8
|
"app launching"
|
@@ -37,8 +37,8 @@ APP_STARTER_EXPERT = AgentPersona(
|
|
37
37
|
In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
|
38
38
|
{tool_descriptions}
|
39
39
|
|
40
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
40
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
41
41
|
|
42
42
|
You focus ONLY on app launching and package management - UI interactions within apps are handled by UI specialists.""",
|
43
43
|
|
44
|
-
)
|
44
|
+
)
|
@@ -5,7 +5,7 @@ BIG_AGENT = AgentPersona(
|
|
5
5
|
name="Big Agent",
|
6
6
|
description="Big Agent. Use this as your Big Agent",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling", "app launching"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
@@ -90,7 +90,7 @@ BIG_AGENT = AgentPersona(
|
|
90
90
|
- Present the results clearly and concisely as if you computed them directly
|
91
91
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
92
92
|
|
93
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
93
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
94
94
|
"""
|
95
95
|
|
96
|
-
)
|
96
|
+
)
|
@@ -5,7 +5,7 @@ DEFAULT = AgentPersona(
|
|
5
5
|
name="Default",
|
6
6
|
description="Default Agent. Use this as your Default",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling", "app launching"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
@@ -89,7 +89,7 @@ DEFAULT = AgentPersona(
|
|
89
89
|
- Present the results clearly and concisely as if you computed them directly
|
90
90
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
91
91
|
|
92
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
92
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
93
93
|
"""
|
94
94
|
|
95
|
-
)
|
95
|
+
)
|
@@ -5,14 +5,14 @@ UI_EXPERT = AgentPersona(
|
|
5
5
|
name="UIExpert",
|
6
6
|
description="Specialized in UI interactions, navigation, and form filling",
|
7
7
|
expertise_areas=[
|
8
|
-
"UI navigation", "button interactions", "text input",
|
8
|
+
"UI navigation", "button interactions", "text input",
|
9
9
|
"menu navigation", "form filling", "scrolling"
|
10
10
|
],
|
11
11
|
allowed_tools=[
|
12
12
|
Tools.swipe.__name__,
|
13
13
|
Tools.input_text.__name__,
|
14
14
|
Tools.press_key.__name__,
|
15
|
-
Tools.tap_by_index.__name__,
|
15
|
+
Tools.tap_by_index.__name__,
|
16
16
|
Tools.drag.__name__,
|
17
17
|
Tools.remember.__name__,
|
18
18
|
Tools.complete.__name__
|
@@ -54,8 +54,8 @@ UI_EXPERT = AgentPersona(
|
|
54
54
|
- Remember important UI state information for context
|
55
55
|
|
56
56
|
You do NOT handle app launching or package management - that's handled by other specialists.
|
57
|
-
|
58
|
-
|
57
|
+
|
58
|
+
|
59
59
|
## Available Context:
|
60
60
|
In your execution environment, you have access to:
|
61
61
|
- `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
|
@@ -99,7 +99,7 @@ UI_EXPERT = AgentPersona(
|
|
99
99
|
- If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
|
100
100
|
- Structure your response like you're directly answering the user's query, not explaining how you solved it
|
101
101
|
|
102
|
-
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
102
|
+
Reminder: Always place your Python code between ```...``` tags when you want to run code.
|
103
103
|
|
104
104
|
You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
|
105
105
|
"""
|