droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. droidrun/__init__.py +2 -3
  2. droidrun/__main__.py +1 -1
  3. droidrun/agent/__init__.py +1 -1
  4. droidrun/agent/codeact/__init__.py +1 -4
  5. droidrun/agent/codeact/codeact_agent.py +112 -48
  6. droidrun/agent/codeact/events.py +6 -3
  7. droidrun/agent/codeact/prompts.py +2 -2
  8. droidrun/agent/common/constants.py +2 -0
  9. droidrun/agent/common/events.py +5 -3
  10. droidrun/agent/context/__init__.py +1 -3
  11. droidrun/agent/context/agent_persona.py +2 -1
  12. droidrun/agent/context/context_injection_manager.py +6 -6
  13. droidrun/agent/context/episodic_memory.py +5 -3
  14. droidrun/agent/context/personas/__init__.py +3 -3
  15. droidrun/agent/context/personas/app_starter.py +3 -3
  16. droidrun/agent/context/personas/big_agent.py +3 -3
  17. droidrun/agent/context/personas/default.py +3 -3
  18. droidrun/agent/context/personas/ui_expert.py +5 -5
  19. droidrun/agent/context/task_manager.py +15 -17
  20. droidrun/agent/droid/__init__.py +1 -1
  21. droidrun/agent/droid/droid_agent.py +327 -182
  22. droidrun/agent/droid/events.py +91 -9
  23. droidrun/agent/executor/__init__.py +13 -0
  24. droidrun/agent/executor/events.py +24 -0
  25. droidrun/agent/executor/executor_agent.py +327 -0
  26. droidrun/agent/executor/prompts.py +136 -0
  27. droidrun/agent/manager/__init__.py +18 -0
  28. droidrun/agent/manager/events.py +20 -0
  29. droidrun/agent/manager/manager_agent.py +459 -0
  30. droidrun/agent/manager/prompts.py +223 -0
  31. droidrun/agent/oneflows/app_starter_workflow.py +118 -0
  32. droidrun/agent/oneflows/text_manipulator.py +204 -0
  33. droidrun/agent/planner/__init__.py +3 -3
  34. droidrun/agent/planner/events.py +6 -3
  35. droidrun/agent/planner/planner_agent.py +60 -53
  36. droidrun/agent/planner/prompts.py +2 -2
  37. droidrun/agent/usage.py +15 -13
  38. droidrun/agent/utils/__init__.py +11 -1
  39. droidrun/agent/utils/async_utils.py +2 -1
  40. droidrun/agent/utils/chat_utils.py +48 -60
  41. droidrun/agent/utils/device_state_formatter.py +177 -0
  42. droidrun/agent/utils/executer.py +13 -12
  43. droidrun/agent/utils/inference.py +114 -0
  44. droidrun/agent/utils/llm_picker.py +2 -0
  45. droidrun/agent/utils/message_utils.py +85 -0
  46. droidrun/agent/utils/tools.py +220 -0
  47. droidrun/agent/utils/trajectory.py +8 -7
  48. droidrun/cli/__init__.py +1 -1
  49. droidrun/cli/logs.py +29 -28
  50. droidrun/cli/main.py +279 -143
  51. droidrun/config_manager/__init__.py +25 -0
  52. droidrun/config_manager/config_manager.py +583 -0
  53. droidrun/macro/__init__.py +2 -2
  54. droidrun/macro/__main__.py +1 -1
  55. droidrun/macro/cli.py +36 -34
  56. droidrun/macro/replay.py +7 -9
  57. droidrun/portal.py +1 -1
  58. droidrun/telemetry/__init__.py +2 -2
  59. droidrun/telemetry/events.py +3 -4
  60. droidrun/telemetry/phoenix.py +173 -0
  61. droidrun/telemetry/tracker.py +7 -5
  62. droidrun/tools/__init__.py +1 -1
  63. droidrun/tools/adb.py +210 -82
  64. droidrun/tools/ios.py +7 -5
  65. droidrun/tools/tools.py +25 -8
  66. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
  67. droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
  68. droidrun/agent/common/default.py +0 -5
  69. droidrun/agent/context/reflection.py +0 -20
  70. droidrun/agent/oneflows/reflector.py +0 -265
  71. droidrun-0.3.8.dist-info/RECORD +0 -55
  72. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
  73. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
  74. {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
droidrun/__init__.py CHANGED
@@ -5,13 +5,12 @@ DroidRun - A framework for controlling Android devices through LLM agents.
5
5
  __version__ = "0.3.0"
6
6
 
7
7
  # Import main classes for easier access
8
- from droidrun.agent.utils.llm_picker import load_llm
9
- from droidrun.tools import Tools, AdbTools, IOSTools
10
8
  from droidrun.agent.droid import DroidAgent
9
+ from droidrun.agent.utils.llm_picker import load_llm
11
10
 
12
11
  # Import macro functionality
13
12
  from droidrun.macro import MacroPlayer, replay_macro_file, replay_macro_folder
14
-
13
+ from droidrun.tools import AdbTools, IOSTools, Tools
15
14
 
16
15
  # Make main components available at package level
17
16
  __all__ = [
droidrun/__main__.py CHANGED
@@ -4,4 +4,4 @@ DroidRun main entry point
4
4
  from droidrun.cli.main import cli
5
5
 
6
6
  if __name__ == '__main__':
7
- cli()
7
+ cli()
@@ -3,4 +3,4 @@
3
3
  #logger = logging.getLogger("droidrun")
4
4
  #logger.propagate = False # Don't send to root logger
5
5
  #logger.handlers = [] # No handlers by default
6
- #logger.setLevel(logging.INFO) # Or WARNING
6
+ #logger.setLevel(logging.INFO) # Or WARNING
@@ -1,8 +1,5 @@
1
1
  from droidrun.agent.codeact.codeact_agent import CodeActAgent
2
- from droidrun.agent.codeact.prompts import (
3
- DEFAULT_CODE_ACT_USER_PROMPT,
4
- DEFAULT_NO_THOUGHTS_PROMPT
5
- )
2
+ from droidrun.agent.codeact.prompts import DEFAULT_CODE_ACT_USER_PROMPT, DEFAULT_NO_THOUGHTS_PROMPT
6
3
 
7
4
  __all__ = [
8
5
  "CodeActAgent",
@@ -1,36 +1,37 @@
1
+ import asyncio
2
+ import json
1
3
  import logging
2
4
  import re
3
5
  import time
4
- import asyncio
5
- import json
6
- import os
7
- from typing import List, Optional, Tuple, Union
6
+ from typing import List, Union
7
+
8
8
  from llama_index.core.base.llms.types import ChatMessage, ChatResponse
9
- from llama_index.core.prompts import PromptTemplate
10
9
  from llama_index.core.llms.llm import LLM
11
- from llama_index.core.workflow import Workflow, StartEvent, StopEvent, Context, step
12
10
  from llama_index.core.memory import Memory
11
+ from llama_index.core.prompts import PromptTemplate
12
+ from llama_index.core.workflow import Context, StartEvent, StopEvent, Workflow, step
13
+
13
14
  from droidrun.agent.codeact.events import (
14
- TaskInputEvent,
15
+ EpisodicMemoryEvent,
15
16
  TaskEndEvent,
16
17
  TaskExecutionEvent,
17
18
  TaskExecutionResultEvent,
19
+ TaskInputEvent,
18
20
  TaskThinkingEvent,
19
- EpisodicMemoryEvent,
20
21
  )
21
- from droidrun.agent.common.events import ScreenshotEvent, RecordUIStateEvent
22
- from droidrun.agent.usage import get_usage_from_response
23
- from droidrun.agent.utils import chat_utils
24
- from droidrun.agent.utils.executer import SimpleCodeExecutor
25
22
  from droidrun.agent.codeact.prompts import (
26
23
  DEFAULT_CODE_ACT_USER_PROMPT,
27
24
  DEFAULT_NO_THOUGHTS_PROMPT,
28
25
  )
29
-
26
+ from droidrun.agent.common.constants import LLM_HISTORY_LIMIT
27
+ from droidrun.agent.common.events import RecordUIStateEvent, ScreenshotEvent
28
+ from droidrun.agent.context.agent_persona import AgentPersona
30
29
  from droidrun.agent.context.episodic_memory import EpisodicMemory, EpisodicMemoryStep
30
+ from droidrun.agent.usage import get_usage_from_response
31
+ from droidrun.agent.utils import chat_utils
32
+ from droidrun.agent.utils.executer import SimpleCodeExecutor
33
+ from droidrun.agent.utils.tools import ATOMIC_ACTION_SIGNATURES, get_atomic_tool_descriptions, build_custom_tool_descriptions
31
34
  from droidrun.tools import Tools
32
- from typing import Optional, Dict, Tuple, List, Any, Callable
33
- from droidrun.agent.context.agent_persona import AgentPersona
34
35
 
35
36
  logger = logging.getLogger("droidrun")
36
37
 
@@ -48,8 +49,8 @@ class CodeActAgent(Workflow):
48
49
  persona: AgentPersona,
49
50
  vision: bool,
50
51
  tools_instance: "Tools",
51
- all_tools_list: Dict[str, Callable[..., Any]],
52
52
  max_steps: int = 5,
53
+ custom_tools: dict = None,
53
54
  debug: bool = False,
54
55
  *args,
55
56
  **kwargs,
@@ -77,13 +78,40 @@ class CodeActAgent(Workflow):
77
78
 
78
79
  self.tools = tools_instance
79
80
 
81
+ # Merge custom_tools with ATOMIC_ACTION_SIGNATURES
82
+ # Custom tools are treated the same as atomic actions by CodeAct
83
+ merged_signatures = {**ATOMIC_ACTION_SIGNATURES, **(custom_tools or {})}
84
+
85
+ # Build tool_list from merged signatures
80
86
  self.tool_list = {}
87
+ for action_name, signature in merged_signatures.items():
88
+ func = signature["function"]
89
+ # Create bound function (curry tools_instance as first argument)
90
+ # Handle both sync and async functions
91
+ if asyncio.iscoroutinefunction(func):
92
+ async def make_async_bound(f, ti):
93
+ async def bound_func(*args, **kwargs):
94
+ return await f(ti, *args, **kwargs)
95
+ return bound_func
96
+ self.tool_list[action_name] = asyncio.run(make_async_bound(func, tools_instance))
97
+ else:
98
+ self.tool_list[action_name] = lambda *args, f=func, ti=tools_instance: f(ti, *args)
81
99
 
82
- for tool_name in persona.allowed_tools:
83
- if tool_name in all_tools_list:
84
- self.tool_list[tool_name] = all_tools_list[tool_name]
100
+ # Add non-atomic tools (remember, complete) from tools_instance
101
+ self.tool_list["remember"] = tools_instance.remember
102
+ self.tool_list["complete"] = tools_instance.complete
85
103
 
86
- self.tool_descriptions = chat_utils.parse_tool_descriptions(self.tool_list)
104
+ # Get tool descriptions from ATOMIC_ACTION_SIGNATURES and custom_tools
105
+ self.tool_descriptions = get_atomic_tool_descriptions()
106
+
107
+ # Add custom tool descriptions if provided
108
+ custom_descriptions = build_custom_tool_descriptions(custom_tools or {})
109
+ if custom_descriptions:
110
+ self.tool_descriptions += "\n" + custom_descriptions
111
+
112
+ # Add descriptions for remember/complete
113
+ self.tool_descriptions += "\n- remember(information: str): Remember information for later use"
114
+ self.tool_descriptions += "\n- complete(success: bool, reason: str): Mark task as complete"
87
115
 
88
116
  self.system_prompt_content = persona.system_prompt.format(
89
117
  tool_descriptions=self.tool_descriptions
@@ -109,7 +137,8 @@ class CodeActAgent(Workflow):
109
137
  """Prepare chat history from user input."""
110
138
  logger.info("💬 Preparing chat for task execution...")
111
139
 
112
- self.chat_memory: Memory = await ctx.get(
140
+
141
+ self.chat_memory: Memory = await ctx.store.get(
113
142
  "chat_memory", default=Memory.from_defaults()
114
143
  )
115
144
 
@@ -135,7 +164,7 @@ class CodeActAgent(Workflow):
135
164
 
136
165
  await self.chat_memory.aput(self.user_message)
137
166
 
138
- await ctx.set("chat_memory", self.chat_memory)
167
+ await ctx.store.set("chat_memory", self.chat_memory)
139
168
  input_messages = self.chat_memory.get_all()
140
169
  return TaskInputEvent(input=input_messages)
141
170
 
@@ -160,9 +189,9 @@ class CodeActAgent(Workflow):
160
189
  logger.info(f"🧠 Step {self.steps_counter}: Thinking...")
161
190
 
162
191
  model = self.llm.class_name()
163
-
192
+
164
193
  if "remember" in self.tool_list and self.remembered_info:
165
- await ctx.set("remembered_info", self.remembered_info)
194
+ await ctx.store.set("remembered_info", self.remembered_info)
166
195
  chat_history = await chat_utils.add_memory_block(self.remembered_info, chat_history)
167
196
 
168
197
  for context in self.required_context:
@@ -171,25 +200,25 @@ class CodeActAgent(Workflow):
171
200
  screenshot = (self.tools.take_screenshot())[1]
172
201
  ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
173
202
 
174
- await ctx.set("screenshot", screenshot)
203
+ await ctx.store.set("screenshot", screenshot)
175
204
  if model == "DeepSeek":
176
205
  logger.warning(
177
206
  "[yellow]DeepSeek doesnt support images. Disabling screenshots[/]"
178
207
  )
179
- elif self.vision == True: # if vision is enabled, add screenshot to chat history
208
+ elif self.vision: # if vision is enabled, add screenshot to chat history
180
209
  chat_history = await chat_utils.add_screenshot_image_block(screenshot, chat_history)
181
210
 
182
211
  if context == "ui_state":
183
212
  try:
184
213
  state = self.tools.get_state()
185
- await ctx.set("ui_state", state["a11y_tree"])
214
+ await ctx.store.set("ui_state", state["a11y_tree"])
186
215
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=state["a11y_tree"]))
187
216
  chat_history = await chat_utils.add_ui_text_block(
188
217
  state["a11y_tree"], chat_history
189
218
  )
190
219
  chat_history = await chat_utils.add_phone_state_block(state["phone_state"], chat_history)
191
- except Exception as e:
192
- logger.warning(f"⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
220
+ except Exception:
221
+ logger.warning("⚠️ Error retrieving state from the connected device. Is the Accessibility Service enabled?")
193
222
 
194
223
 
195
224
  if context == "packages":
@@ -252,7 +281,7 @@ class CodeActAgent(Workflow):
252
281
  """Execute the code and return the result."""
253
282
  code = ev.code
254
283
  assert code, "Code cannot be empty."
255
- logger.info(f"⚡ Executing action...")
284
+ logger.info("⚡ Executing action...")
256
285
  logger.info(f"Code to execute:\n```python\n{code}\n```")
257
286
 
258
287
  try:
@@ -267,16 +296,16 @@ class CodeActAgent(Workflow):
267
296
  for ui_state in ui_states[:-1]:
268
297
  ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state['a11y_tree']))
269
298
 
270
- if self.tools.finished == True:
299
+ if self.tools.finished:
271
300
  logger.debug(" - Task completed.")
272
301
  event = TaskEndEvent(
273
302
  success=self.tools.success, reason=self.tools.reason
274
303
  )
275
304
  ctx.write_event_to_stream(event)
276
305
  return event
277
-
306
+
278
307
  self.remembered_info = self.tools.memory
279
-
308
+
280
309
  event = TaskExecutionResultEvent(output=str(result['output']))
281
310
  ctx.write_event_to_stream(event)
282
311
  return event
@@ -320,12 +349,12 @@ class CodeActAgent(Workflow):
320
349
  async def finalize(self, ev: TaskEndEvent, ctx: Context) -> StopEvent:
321
350
  """Finalize the workflow."""
322
351
  self.tools.finished = False
323
- await ctx.set("chat_memory", self.chat_memory)
324
-
352
+ await ctx.store.set("chat_memory", self.chat_memory)
353
+
325
354
  # Add final state observation to episodic memory
326
355
  if self.vision:
327
356
  await self._add_final_state_observation(ctx)
328
-
357
+
329
358
  result = {}
330
359
  result.update(
331
360
  {
@@ -347,14 +376,15 @@ class CodeActAgent(Workflow):
347
376
  self, ctx: Context, chat_history: List[ChatMessage]
348
377
  ) -> ChatResponse | None:
349
378
  logger.debug("🔍 Getting LLM response...")
350
- messages_to_send = [self.system_prompt] + chat_history
379
+ limited_history = self._limit_history(chat_history)
380
+ messages_to_send = [self.system_prompt] + limited_history
351
381
  messages_to_send = [chat_utils.message_copy(msg) for msg in messages_to_send]
352
382
  try:
353
383
  response = await self.llm.achat(messages=messages_to_send)
354
384
  logger.debug("🔍 Received LLM response.")
355
385
 
356
386
  filtered_chat_history = []
357
- for msg in chat_history:
387
+ for msg in limited_history:
358
388
  filtered_msg = chat_utils.message_copy(msg)
359
389
  if hasattr(filtered_msg, "blocks") and filtered_msg.blocks:
360
390
  filtered_msg.blocks = [
@@ -379,9 +409,10 @@ class CodeActAgent(Workflow):
379
409
  chat_history=chat_history_str,
380
410
  response=response_str,
381
411
  timestamp=time.time(),
382
- screenshot=(await ctx.get("screenshot", None))
412
+ screenshot=(await ctx.store.get("screenshot", None))
383
413
  )
384
414
 
415
+
385
416
  self.episodic_memory.steps.append(step)
386
417
 
387
418
  assert hasattr(
@@ -399,41 +430,74 @@ class CodeActAgent(Workflow):
399
430
  logger.error(f"Rate limit error. Retrying in {seconds} seconds...")
400
431
  time.sleep(seconds)
401
432
  else:
402
- logger.error(f"Rate limit error. Retrying in 5 seconds...")
433
+ logger.error("Rate limit error. Retrying in 5 seconds...")
403
434
  time.sleep(40)
404
435
  logger.debug("🔍 Retrying call to LLM...")
405
436
  response = await self.llm.achat(messages=messages_to_send)
437
+ elif (
438
+ self.llm.class_name() == "Anthropic_LLM"
439
+ and "overloaded_error" in str(e)
440
+ ):
441
+ # Use exponential backoff for Anthropic errors
442
+ if not hasattr(self, '_anthropic_retry_count'):
443
+ self._anthropic_retry_count = 0
444
+ self._anthropic_retry_count += 1
445
+ seconds = min(2 ** self._anthropic_retry_count, 60) # Cap at 60 seconds
446
+ logger.error(f"Anthropic overload error. Retrying in {seconds} seconds... (attempt {self._anthropic_retry_count})")
447
+ time.sleep(seconds)
448
+ logger.debug("🔍 Retrying call to LLM...")
449
+ response = await self.llm.achat(messages=messages_to_send)
450
+ self._anthropic_retry_count = 0 # Reset on success
406
451
  else:
407
452
  logger.error(f"Could not get an answer from LLM: {repr(e)}")
408
453
  raise e
409
454
  logger.debug(" - Received response from LLM.")
410
455
  return response
411
456
 
457
+ def _limit_history(
458
+ self, chat_history: List[ChatMessage]
459
+ ) -> List[ChatMessage]:
460
+ if LLM_HISTORY_LIMIT <= 0:
461
+ return chat_history
462
+
463
+ max_messages = LLM_HISTORY_LIMIT * 2
464
+ if len(chat_history) <= max_messages:
465
+ return chat_history
466
+
467
+ preserved_head: List[ChatMessage] = []
468
+ if chat_history and chat_history[0].role == "user":
469
+ preserved_head = [chat_history[0]]
470
+
471
+ tail = chat_history[-max_messages:]
472
+ if preserved_head and preserved_head[0] in tail:
473
+ preserved_head = []
474
+
475
+ return preserved_head + tail
476
+
412
477
  async def _add_final_state_observation(self, ctx: Context) -> None:
413
478
  """Add the current UI state and screenshot as the final observation step."""
414
479
  try:
415
480
  # Get current screenshot and UI state
416
481
  screenshot = None
417
- ui_state = None
418
-
482
+
419
483
  try:
420
484
  _, screenshot_bytes = self.tools.take_screenshot()
421
485
  screenshot = screenshot_bytes
422
486
  except Exception as e:
423
487
  logger.warning(f"Failed to capture final screenshot: {e}")
424
-
488
+
425
489
  try:
426
490
  (a11y_tree, phone_state) = self.tools.get_state()
427
491
  except Exception as e:
428
492
  logger.warning(f"Failed to capture final UI state: {e}")
429
-
493
+
430
494
  # Create final observation chat history and response
431
495
  final_chat_history = [{"role": "system", "content": "Final state observation after task completion"}]
432
496
  final_response = {
433
- "role": "user",
497
+ "role": "user",
434
498
  "content": f"Final State Observation:\nUI State: {a11y_tree}\nScreenshot: {'Available' if screenshot else 'Not available'}"
435
499
  }
436
-
500
+
437
501
  # Create final episodic memory step
438
502
  final_step = EpisodicMemoryStep(
439
503
  chat_history=json.dumps(final_chat_history),
@@ -441,9 +505,9 @@ class CodeActAgent(Workflow):
441
505
  timestamp=time.time(),
442
506
  screenshot=screenshot
443
507
  )
444
-
508
+
445
509
  self.episodic_memory.steps.append(final_step)
446
510
  logger.info("Added final state observation to episodic memory")
447
-
511
+
448
512
  except Exception as e:
449
513
  logger.error(f"Failed to add final state observation: {e}")
@@ -1,10 +1,13 @@
1
+ from typing import Optional
2
+
1
3
  from llama_index.core.llms import ChatMessage
2
4
  from llama_index.core.workflow import Event
3
- from typing import Optional
4
5
 
5
6
  from droidrun.agent.usage import UsageResult
7
+
6
8
  from ..context.episodic_memory import EpisodicMemory
7
9
 
10
+
8
11
  class TaskInputEvent(Event):
9
12
  input: list[ChatMessage]
10
13
 
@@ -12,7 +15,7 @@ class TaskInputEvent(Event):
12
15
 
13
16
  class TaskThinkingEvent(Event):
14
17
  thoughts: Optional[str] = None
15
- code: Optional[str] = None
18
+ code: Optional[str] = None
16
19
  usage: Optional[UsageResult] = None
17
20
 
18
21
  class TaskExecutionEvent(Event):
@@ -28,4 +31,4 @@ class TaskEndEvent(Event):
28
31
  reason: str
29
32
 
30
33
  class EpisodicMemoryEvent(Event):
31
- episodic_memory: EpisodicMemory
34
+ episodic_memory: EpisodicMemory
@@ -21,6 +21,6 @@ Now, describe the next step you will take to address the original goal: {goal}""
21
21
 
22
22
  # Export all prompts
23
23
  __all__ = [
24
- "DEFAULT_CODE_ACT_USER_PROMPT",
24
+ "DEFAULT_CODE_ACT_USER_PROMPT",
25
25
  "DEFAULT_NO_THOUGHTS_PROMPT"
26
- ]
26
+ ]
@@ -0,0 +1,2 @@
1
+ """Max number of recent conversation steps to include in LLM prompt"""
2
+ LLM_HISTORY_LIMIT = 20
@@ -1,5 +1,7 @@
1
+ from typing import Any, Dict
2
+
1
3
  from llama_index.core.workflow import Event
2
- from typing import Dict, Any
4
+
3
5
 
4
6
  class ScreenshotEvent(Event):
5
7
  screenshot: bytes
@@ -16,7 +18,7 @@ class TapActionEvent(MacroEvent):
16
18
  element_index: int = None
17
19
  element_text: str = ""
18
20
  element_bounds: str = ""
19
-
21
+
20
22
  class SwipeActionEvent(MacroEvent):
21
23
  """Event for swipe actions with coordinates"""
22
24
  start_x: int
@@ -48,4 +50,4 @@ class StartAppEvent(MacroEvent):
48
50
  activity: str = None
49
51
 
50
52
  class RecordUIStateEvent(Event):
51
- ui_state: list[Dict[str, Any]]
53
+ ui_state: list[Dict[str, Any]]
@@ -9,15 +9,13 @@ This module contains:
9
9
  from .agent_persona import AgentPersona
10
10
  from .context_injection_manager import ContextInjectionManager
11
11
  from .episodic_memory import EpisodicMemory, EpisodicMemoryStep
12
- from .reflection import Reflection
13
- from .task_manager import TaskManager, Task
12
+ from .task_manager import Task, TaskManager
14
13
 
15
14
  __all__ = [
16
15
  "AgentPersona",
17
16
  "ContextInjectionManager",
18
17
  "EpisodicMemory",
19
18
  "EpisodicMemoryStep",
20
- "Reflection",
21
19
  "TaskManager",
22
20
  "Task"
23
21
  ]
@@ -1,5 +1,6 @@
1
- from typing import Dict, List, Callable, Any, Optional
2
1
  from dataclasses import dataclass
2
+ from typing import List
3
+
3
4
 
4
5
  @dataclass
5
6
  class AgentPersona:
@@ -5,12 +5,12 @@ This module provides the ContextInjectionManager class that manages different ag
5
5
  each with specific system prompts, contexts, and tool subsets tailored for specialized tasks.
6
6
  """
7
7
 
8
- import logging
9
- from typing import Optional, List
10
- from droidrun.agent.context.agent_persona import AgentPersona
11
8
  #import chromadb
12
9
  import json
13
- from pathlib import Path
10
+ import logging
11
+ from typing import List, Optional
12
+
13
+ from droidrun.agent.context.agent_persona import AgentPersona
14
14
 
15
15
  logger = logging.getLogger("droidrun")
16
16
 
@@ -59,8 +59,8 @@ class ContextInjectionManager:
59
59
  Returns:
60
60
  AgentPersona instance or None if not found
61
61
  """
62
-
62
+
63
63
  return self.personas.get(agent_type)
64
-
64
+
65
65
  def get_all_personas(self) -> List[str]:
66
66
  return self.personas
@@ -1,7 +1,9 @@
1
1
  from dataclasses import dataclass, field
2
- from droidrun.agent.context.agent_persona import AgentPersona
3
2
  from typing import List, Optional
4
3
 
4
+ from droidrun.agent.context.agent_persona import AgentPersona
5
+
6
+
5
7
  @dataclass
6
8
  class EpisodicMemoryStep:
7
9
  chat_history: str
@@ -9,7 +11,7 @@ class EpisodicMemoryStep:
9
11
  timestamp: float
10
12
  screenshot: Optional[bytes]
11
13
 
12
- @dataclass
14
+ @dataclass
13
15
  class EpisodicMemory:
14
16
  persona: AgentPersona
15
- steps: List[EpisodicMemoryStep] = field(default_factory=list)
17
+ steps: List[EpisodicMemoryStep] = field(default_factory=list)
@@ -1,11 +1,11 @@
1
- from .default import DEFAULT
2
- from .ui_expert import UI_EXPERT
3
1
  from .app_starter import APP_STARTER_EXPERT
4
2
  from .big_agent import BIG_AGENT
3
+ from .default import DEFAULT
4
+ from .ui_expert import UI_EXPERT
5
5
 
6
6
  __all__ = [
7
7
  'DEFAULT',
8
8
  'UI_EXPERT',
9
9
  'APP_STARTER_EXPERT',
10
10
  'BIG_AGENT',
11
- ]
11
+ ]
@@ -2,7 +2,7 @@ from droidrun.agent.context.agent_persona import AgentPersona
2
2
  from droidrun.tools import Tools
3
3
 
4
4
  APP_STARTER_EXPERT = AgentPersona(
5
- name="AppStarterExpert",
5
+ name="AppStarterExpert",
6
6
  description="Specialized in app launching",
7
7
  expertise_areas=[
8
8
  "app launching"
@@ -37,8 +37,8 @@ APP_STARTER_EXPERT = AgentPersona(
37
37
  In addition to the Python Standard Library and any functions you have already written, you can use the following functions:
38
38
  {tool_descriptions}
39
39
 
40
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
40
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
41
41
 
42
42
  You focus ONLY on app launching and package management - UI interactions within apps are handled by UI specialists.""",
43
43
 
44
- )
44
+ )
@@ -5,7 +5,7 @@ BIG_AGENT = AgentPersona(
5
5
  name="Big Agent",
6
6
  description="Big Agent. Use this as your Big Agent",
7
7
  expertise_areas=[
8
- "UI navigation", "button interactions", "text input",
8
+ "UI navigation", "button interactions", "text input",
9
9
  "menu navigation", "form filling", "scrolling", "app launching"
10
10
  ],
11
11
  allowed_tools=[
@@ -90,7 +90,7 @@ BIG_AGENT = AgentPersona(
90
90
  - Present the results clearly and concisely as if you computed them directly
91
91
  - Structure your response like you're directly answering the user's query, not explaining how you solved it
92
92
 
93
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
93
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
94
94
  """
95
95
 
96
- )
96
+ )
@@ -5,7 +5,7 @@ DEFAULT = AgentPersona(
5
5
  name="Default",
6
6
  description="Default Agent. Use this as your Default",
7
7
  expertise_areas=[
8
- "UI navigation", "button interactions", "text input",
8
+ "UI navigation", "button interactions", "text input",
9
9
  "menu navigation", "form filling", "scrolling", "app launching"
10
10
  ],
11
11
  allowed_tools=[
@@ -89,7 +89,7 @@ DEFAULT = AgentPersona(
89
89
  - Present the results clearly and concisely as if you computed them directly
90
90
  - Structure your response like you're directly answering the user's query, not explaining how you solved it
91
91
 
92
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
92
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
93
93
  """
94
94
 
95
- )
95
+ )
@@ -5,14 +5,14 @@ UI_EXPERT = AgentPersona(
5
5
  name="UIExpert",
6
6
  description="Specialized in UI interactions, navigation, and form filling",
7
7
  expertise_areas=[
8
- "UI navigation", "button interactions", "text input",
8
+ "UI navigation", "button interactions", "text input",
9
9
  "menu navigation", "form filling", "scrolling"
10
10
  ],
11
11
  allowed_tools=[
12
12
  Tools.swipe.__name__,
13
13
  Tools.input_text.__name__,
14
14
  Tools.press_key.__name__,
15
- Tools.tap_by_index.__name__,
15
+ Tools.tap_by_index.__name__,
16
16
  Tools.drag.__name__,
17
17
  Tools.remember.__name__,
18
18
  Tools.complete.__name__
@@ -54,8 +54,8 @@ UI_EXPERT = AgentPersona(
54
54
  - Remember important UI state information for context
55
55
 
56
56
  You do NOT handle app launching or package management - that's handled by other specialists.
57
-
58
-
57
+
58
+
59
59
  ## Available Context:
60
60
  In your execution environment, you have access to:
61
61
  - `ui_elements`: A global variable containing the current UI elements from the device. This is automatically updated before each code execution and contains the latest UI elements that were fetched.
@@ -99,7 +99,7 @@ UI_EXPERT = AgentPersona(
99
99
  - If relevant, you can briefly mention general methods used, but don't include code snippets in the final answer
100
100
  - Structure your response like you're directly answering the user's query, not explaining how you solved it
101
101
 
102
- Reminder: Always place your Python code between ```...``` tags when you want to run code.
102
+ Reminder: Always place your Python code between ```...``` tags when you want to run code.
103
103
 
104
104
  You MUST ALWAYS to include your reasoning and thought process outside of the code block. You MUST DOUBLE CHECK that TASK IS COMPLETE with a SCREENSHOT.
105
105
  """