droidrun 0.3.8__py3-none-any.whl → 0.3.10.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- droidrun/__init__.py +2 -3
- droidrun/__main__.py +1 -1
- droidrun/agent/__init__.py +1 -1
- droidrun/agent/codeact/__init__.py +1 -4
- droidrun/agent/codeact/codeact_agent.py +112 -48
- droidrun/agent/codeact/events.py +6 -3
- droidrun/agent/codeact/prompts.py +2 -2
- droidrun/agent/common/constants.py +2 -0
- droidrun/agent/common/events.py +5 -3
- droidrun/agent/context/__init__.py +1 -3
- droidrun/agent/context/agent_persona.py +2 -1
- droidrun/agent/context/context_injection_manager.py +6 -6
- droidrun/agent/context/episodic_memory.py +5 -3
- droidrun/agent/context/personas/__init__.py +3 -3
- droidrun/agent/context/personas/app_starter.py +3 -3
- droidrun/agent/context/personas/big_agent.py +3 -3
- droidrun/agent/context/personas/default.py +3 -3
- droidrun/agent/context/personas/ui_expert.py +5 -5
- droidrun/agent/context/task_manager.py +15 -17
- droidrun/agent/droid/__init__.py +1 -1
- droidrun/agent/droid/droid_agent.py +327 -182
- droidrun/agent/droid/events.py +91 -9
- droidrun/agent/executor/__init__.py +13 -0
- droidrun/agent/executor/events.py +24 -0
- droidrun/agent/executor/executor_agent.py +327 -0
- droidrun/agent/executor/prompts.py +136 -0
- droidrun/agent/manager/__init__.py +18 -0
- droidrun/agent/manager/events.py +20 -0
- droidrun/agent/manager/manager_agent.py +459 -0
- droidrun/agent/manager/prompts.py +223 -0
- droidrun/agent/oneflows/app_starter_workflow.py +118 -0
- droidrun/agent/oneflows/text_manipulator.py +204 -0
- droidrun/agent/planner/__init__.py +3 -3
- droidrun/agent/planner/events.py +6 -3
- droidrun/agent/planner/planner_agent.py +60 -53
- droidrun/agent/planner/prompts.py +2 -2
- droidrun/agent/usage.py +15 -13
- droidrun/agent/utils/__init__.py +11 -1
- droidrun/agent/utils/async_utils.py +2 -1
- droidrun/agent/utils/chat_utils.py +48 -60
- droidrun/agent/utils/device_state_formatter.py +177 -0
- droidrun/agent/utils/executer.py +13 -12
- droidrun/agent/utils/inference.py +114 -0
- droidrun/agent/utils/llm_picker.py +2 -0
- droidrun/agent/utils/message_utils.py +85 -0
- droidrun/agent/utils/tools.py +220 -0
- droidrun/agent/utils/trajectory.py +8 -7
- droidrun/cli/__init__.py +1 -1
- droidrun/cli/logs.py +29 -28
- droidrun/cli/main.py +279 -143
- droidrun/config_manager/__init__.py +25 -0
- droidrun/config_manager/config_manager.py +583 -0
- droidrun/macro/__init__.py +2 -2
- droidrun/macro/__main__.py +1 -1
- droidrun/macro/cli.py +36 -34
- droidrun/macro/replay.py +7 -9
- droidrun/portal.py +1 -1
- droidrun/telemetry/__init__.py +2 -2
- droidrun/telemetry/events.py +3 -4
- droidrun/telemetry/phoenix.py +173 -0
- droidrun/telemetry/tracker.py +7 -5
- droidrun/tools/__init__.py +1 -1
- droidrun/tools/adb.py +210 -82
- droidrun/tools/ios.py +7 -5
- droidrun/tools/tools.py +25 -8
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/METADATA +13 -7
- droidrun-0.3.10.dev2.dist-info/RECORD +70 -0
- droidrun/agent/common/default.py +0 -5
- droidrun/agent/context/reflection.py +0 -20
- droidrun/agent/oneflows/reflector.py +0 -265
- droidrun-0.3.8.dist-info/RECORD +0 -55
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/WHEEL +0 -0
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/entry_points.txt +0 -0
- {droidrun-0.3.8.dist-info → droidrun-0.3.10.dev2.dist-info}/licenses/LICENSE +0 -0
droidrun/agent/utils/__init__.py
CHANGED
@@ -1,3 +1,13 @@
|
|
1
1
|
"""
|
2
2
|
Utility modules for DroidRun agents.
|
3
|
-
"""
|
3
|
+
"""
|
4
|
+
|
5
|
+
from droidrun.agent.utils.message_utils import (
|
6
|
+
convert_messages_to_chatmessages,
|
7
|
+
image_to_image_bytes,
|
8
|
+
)
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
"convert_messages_to_chatmessages",
|
12
|
+
"image_to_image_bytes",
|
13
|
+
]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
|
3
|
+
|
3
4
|
def async_to_sync(func):
|
4
5
|
"""
|
5
6
|
Convert an async function to a sync function.
|
@@ -14,4 +15,4 @@ def async_to_sync(func):
|
|
14
15
|
def wrapper(*args, **kwargs):
|
15
16
|
return asyncio.run(func(*args, **kwargs))
|
16
17
|
|
17
|
-
return wrapper
|
18
|
+
return wrapper
|
@@ -1,16 +1,12 @@
|
|
1
|
-
import base64
|
2
|
-
import re
|
3
1
|
import inspect
|
4
|
-
|
5
|
-
|
6
2
|
import json
|
7
3
|
import logging
|
8
|
-
|
9
|
-
from
|
4
|
+
import re
|
5
|
+
from typing import List, Optional, Tuple
|
6
|
+
|
10
7
|
from llama_index.core.base.llms.types import ChatMessage, ImageBlock, TextBlock
|
11
8
|
|
12
|
-
|
13
|
-
from droidrun.tools import Tools
|
9
|
+
from droidrun.telemetry.phoenix import clean_span
|
14
10
|
|
15
11
|
logger = logging.getLogger("droidrun")
|
16
12
|
|
@@ -27,41 +23,23 @@ def message_copy(message: ChatMessage, deep = True) -> ChatMessage:
|
|
27
23
|
|
28
24
|
return copied_message
|
29
25
|
|
30
|
-
|
31
|
-
"""Add reflection summary and advice to help the planner understand what went wrong and what to do differently."""
|
32
|
-
|
33
|
-
reflection_text = "\n### The last task failed. You have additional information about what happenend. \nThe Reflection from Previous Attempt:\n"
|
34
|
-
|
35
|
-
if reflection.summary:
|
36
|
-
reflection_text += f"**What happened:** {reflection.summary}\n\n"
|
37
|
-
|
38
|
-
if reflection.advice:
|
39
|
-
reflection_text += f"**Recommended approach for this retry:** {reflection.advice}\n"
|
40
|
-
|
41
|
-
reflection_block = TextBlock(text=reflection_text)
|
42
|
-
|
43
|
-
# Copy chat_history and append reflection block to the last message
|
44
|
-
chat_history = chat_history.copy()
|
45
|
-
chat_history[-1] = message_copy(chat_history[-1])
|
46
|
-
chat_history[-1].blocks.append(reflection_block)
|
47
|
-
|
48
|
-
return chat_history
|
26
|
+
|
49
27
|
|
50
28
|
def _format_ui_elements(ui_data, level=0) -> str:
|
51
29
|
"""Format UI elements in natural language: index. className: resourceId, text - bounds"""
|
52
30
|
if not ui_data:
|
53
31
|
return ""
|
54
|
-
|
32
|
+
|
55
33
|
formatted_lines = []
|
56
34
|
indent = " " * level # Indentation for nested elements
|
57
|
-
|
35
|
+
|
58
36
|
# Handle both list and single element
|
59
37
|
elements = ui_data if isinstance(ui_data, list) else [ui_data]
|
60
|
-
|
38
|
+
|
61
39
|
for element in elements:
|
62
40
|
if not isinstance(element, dict):
|
63
41
|
continue
|
64
|
-
|
42
|
+
|
65
43
|
# Extract element properties
|
66
44
|
index = element.get('index', '')
|
67
45
|
class_name = element.get('className', '')
|
@@ -69,15 +47,15 @@ def _format_ui_elements(ui_data, level=0) -> str:
|
|
69
47
|
text = element.get('text', '')
|
70
48
|
bounds = element.get('bounds', '')
|
71
49
|
children = element.get('children', [])
|
72
|
-
|
73
|
-
|
50
|
+
|
51
|
+
|
74
52
|
# Format the line: index. className: resourceId, text - bounds
|
75
53
|
line_parts = []
|
76
54
|
if index != '':
|
77
55
|
line_parts.append(f"{index}.")
|
78
56
|
if class_name:
|
79
57
|
line_parts.append(class_name + ":")
|
80
|
-
|
58
|
+
|
81
59
|
details = []
|
82
60
|
if resource_id:
|
83
61
|
details.append(f'"{resource_id}"')
|
@@ -85,19 +63,19 @@ def _format_ui_elements(ui_data, level=0) -> str:
|
|
85
63
|
details.append(f'"{text}"')
|
86
64
|
if details:
|
87
65
|
line_parts.append(", ".join(details))
|
88
|
-
|
66
|
+
|
89
67
|
if bounds:
|
90
68
|
line_parts.append(f"- ({bounds})")
|
91
|
-
|
69
|
+
|
92
70
|
formatted_line = f"{indent}{' '.join(line_parts)}"
|
93
71
|
formatted_lines.append(formatted_line)
|
94
|
-
|
72
|
+
|
95
73
|
# Recursively format children with increased indentation
|
96
74
|
if children:
|
97
75
|
child_formatted = _format_ui_elements(children, level + 1)
|
98
76
|
if child_formatted:
|
99
77
|
formatted_lines.append(child_formatted)
|
100
|
-
|
78
|
+
|
101
79
|
return "\n".join(formatted_lines)
|
102
80
|
|
103
81
|
async def add_ui_text_block(ui_state: str, chat_history: List[ChatMessage], copy = True) -> List[ChatMessage]:
|
@@ -111,7 +89,7 @@ async def add_ui_text_block(ui_state: str, chat_history: List[ChatMessage], copy
|
|
111
89
|
except (json.JSONDecodeError, TypeError):
|
112
90
|
# Fallback to original format if parsing fails
|
113
91
|
ui_block = TextBlock(text="\nCurrent Clickable UI elements from the device using the custom TopViewService:\n```json\n" + json.dumps(ui_state) + "\n```\n")
|
114
|
-
|
92
|
+
|
115
93
|
if copy:
|
116
94
|
chat_history = chat_history.copy()
|
117
95
|
chat_history[-1] = message_copy(chat_history[-1])
|
@@ -129,27 +107,27 @@ async def add_screenshot_image_block(screenshot, chat_history: List[ChatMessage]
|
|
129
107
|
|
130
108
|
|
131
109
|
async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) -> List[ChatMessage]:
|
132
|
-
|
110
|
+
|
133
111
|
# Format the phone state data nicely
|
134
112
|
if isinstance(phone_state, dict) and 'error' not in phone_state:
|
135
113
|
current_app = phone_state.get('currentApp', '')
|
136
114
|
package_name = phone_state.get('packageName', 'Unknown')
|
137
115
|
keyboard_visible = phone_state.get('keyboardVisible', False)
|
138
116
|
focused_element = phone_state.get('focusedElement')
|
139
|
-
|
117
|
+
|
140
118
|
# Format the focused element
|
141
119
|
if focused_element:
|
142
120
|
element_text = focused_element.get('text', '')
|
143
121
|
element_class = focused_element.get('className', '')
|
144
122
|
element_resource_id = focused_element.get('resourceId', '')
|
145
|
-
|
123
|
+
|
146
124
|
# Build focused element description
|
147
125
|
focused_desc = f"'{element_text}' {element_class}"
|
148
126
|
if element_resource_id:
|
149
127
|
focused_desc += f" | ID: {element_resource_id}"
|
150
128
|
else:
|
151
129
|
focused_desc = "None"
|
152
|
-
|
130
|
+
|
153
131
|
phone_state_text = f"""
|
154
132
|
**Current Phone State:**
|
155
133
|
• **App:** {current_app} ({package_name})
|
@@ -162,7 +140,7 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
|
|
162
140
|
phone_state_text = f"\n📱 **Phone State Error:** {phone_state.get('message', 'Unknown error')}\n"
|
163
141
|
else:
|
164
142
|
phone_state_text = f"\n📱 **Phone State:** {phone_state}\n"
|
165
|
-
|
143
|
+
|
166
144
|
ui_block = TextBlock(text=phone_state_text)
|
167
145
|
chat_history = chat_history.copy()
|
168
146
|
chat_history[-1] = message_copy(chat_history[-1])
|
@@ -170,7 +148,7 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
|
|
170
148
|
return chat_history
|
171
149
|
|
172
150
|
async def add_packages_block(packages, chat_history: List[ChatMessage]) -> List[ChatMessage]:
|
173
|
-
|
151
|
+
|
174
152
|
ui_block = TextBlock(text=f"\nInstalled packages: {packages}\n```\n")
|
175
153
|
chat_history = chat_history.copy()
|
176
154
|
chat_history[-1] = message_copy(chat_history[-1])
|
@@ -181,7 +159,7 @@ async def add_memory_block(memory: List[str], chat_history: List[ChatMessage]) -
|
|
181
159
|
memory_block = "\n### Remembered Information:\n"
|
182
160
|
for idx, item in enumerate(memory, 1):
|
183
161
|
memory_block += f"{idx}. {item}\n"
|
184
|
-
|
162
|
+
|
185
163
|
for i, msg in enumerate(chat_history):
|
186
164
|
if msg.role == "user":
|
187
165
|
if isinstance(msg.content, str):
|
@@ -194,13 +172,6 @@ async def add_memory_block(memory: List[str], chat_history: List[ChatMessage]) -
|
|
194
172
|
break
|
195
173
|
return chat_history
|
196
174
|
|
197
|
-
async def get_reflection_block(reflections: List[Reflection]) -> ChatMessage:
|
198
|
-
reflection_block = "\n### You also have additional Knowledge to help you guide your current task from previous expierences:\n"
|
199
|
-
for reflection in reflections:
|
200
|
-
reflection_block += f"**{reflection.advice}\n"
|
201
|
-
|
202
|
-
return ChatMessage(role="user", content=reflection_block)
|
203
|
-
|
204
175
|
async def add_task_history_block(all_tasks: list[dict], chat_history: List[ChatMessage]) -> List[ChatMessage]:
|
205
176
|
"""Experimental task history with all previous tasks."""
|
206
177
|
if not all_tasks:
|
@@ -212,8 +183,8 @@ async def add_task_history_block(all_tasks: list[dict], chat_history: List[ChatM
|
|
212
183
|
status_value: str
|
213
184
|
|
214
185
|
if hasattr(task, "description") and hasattr(task, "status"):
|
215
|
-
description =
|
216
|
-
status_value =
|
186
|
+
description = task.description
|
187
|
+
status_value = task.status or "unknown"
|
217
188
|
elif isinstance(task, dict):
|
218
189
|
description = str(task.get("description", task))
|
219
190
|
status_value = str(task.get("status", "unknown"))
|
@@ -236,7 +207,7 @@ def parse_tool_descriptions(tool_list) -> str:
|
|
236
207
|
"""Parses the available tools and their descriptions for the system prompt."""
|
237
208
|
logger.info("🛠️ Parsing tool descriptions...")
|
238
209
|
tool_descriptions = []
|
239
|
-
|
210
|
+
|
240
211
|
for tool in tool_list.values():
|
241
212
|
assert callable(tool), f"Tool {tool} is not callable."
|
242
213
|
tool_name = tool.__name__
|
@@ -253,11 +224,11 @@ def parse_tool_descriptions(tool_list) -> str:
|
|
253
224
|
def parse_persona_description(personas) -> str:
|
254
225
|
"""Parses the available agent personas and their descriptions for the system prompt."""
|
255
226
|
logger.debug("👥 Parsing agent persona descriptions for Planner Agent...")
|
256
|
-
|
227
|
+
|
257
228
|
if not personas:
|
258
229
|
logger.warning("No agent personas provided to Planner Agent")
|
259
230
|
return "No specialized agents available."
|
260
|
-
|
231
|
+
|
261
232
|
persona_descriptions = []
|
262
233
|
for persona in personas:
|
263
234
|
# Format each persona with name, description, and expertise areas
|
@@ -265,7 +236,7 @@ def parse_persona_description(personas) -> str:
|
|
265
236
|
formatted_persona = f"- **{persona.name}**: {persona.description}\n Expertise: {expertise_list}"
|
266
237
|
persona_descriptions.append(formatted_persona)
|
267
238
|
logger.debug(f" - Parsed persona: {persona.name}")
|
268
|
-
|
239
|
+
|
269
240
|
# Join all persona descriptions into a single string
|
270
241
|
descriptions = "\n".join(persona_descriptions)
|
271
242
|
logger.debug(f"👤 Found {len(persona_descriptions)} agent personas.")
|
@@ -308,4 +279,21 @@ def extract_code_and_thought(response_text: str) -> Tuple[Optional[str], str]:
|
|
308
279
|
thought_preview = (thought_text[:100] + '...') if len(thought_text) > 100 else thought_text
|
309
280
|
logger.debug(f" - Extracted thought: {thought_preview}")
|
310
281
|
|
311
|
-
return extracted_code, thought_text
|
282
|
+
return extracted_code, thought_text
|
283
|
+
|
284
|
+
|
285
|
+
def has_non_empty_content(msg):
|
286
|
+
content = msg.get('content', [])
|
287
|
+
if not content: # Empty list or None
|
288
|
+
return False
|
289
|
+
# Check if any content item has non-empty text
|
290
|
+
for item in content:
|
291
|
+
if isinstance(item, dict) and item.get('text', '').strip():
|
292
|
+
return True
|
293
|
+
elif isinstance(item, str) and item.strip():
|
294
|
+
return True
|
295
|
+
return False
|
296
|
+
|
297
|
+
@clean_span("remove_empty_messages")
|
298
|
+
def remove_empty_messages(messages):
|
299
|
+
return [msg for msg in messages if has_non_empty_content(msg)]
|
@@ -0,0 +1,177 @@
|
|
1
|
+
from typing import Any, Dict, List, Tuple
|
2
|
+
|
3
|
+
|
4
|
+
def format_phone_state(phone_state: Dict[str, Any]) -> str:
|
5
|
+
"""
|
6
|
+
Format phone state data into a readable text block.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
phone_state: Dictionary containing phone state information
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
Formatted phone state text
|
13
|
+
"""
|
14
|
+
if isinstance(phone_state, dict) and 'error' not in phone_state:
|
15
|
+
current_app = phone_state.get('currentApp', '')
|
16
|
+
package_name = phone_state.get('packageName', 'Unknown')
|
17
|
+
focused_element = phone_state.get('focusedElement')
|
18
|
+
is_editable = phone_state.get('isEditable', False)
|
19
|
+
|
20
|
+
# Format the focused element - just show the text content
|
21
|
+
if focused_element and focused_element.get('text'):
|
22
|
+
focused_desc = f"'{focused_element.get('text', '')}'"
|
23
|
+
else:
|
24
|
+
focused_desc = "''"
|
25
|
+
|
26
|
+
phone_state_text = f"""**Current Phone State:**
|
27
|
+
• **App:** {current_app} ({package_name})
|
28
|
+
• **Keyboard:** {'Visible' if is_editable else 'Hidden'}
|
29
|
+
• **Focused Element:** {focused_desc}"""
|
30
|
+
else:
|
31
|
+
# Handle error cases or malformed data
|
32
|
+
if isinstance(phone_state, dict) and 'error' in phone_state:
|
33
|
+
phone_state_text = f"📱 **Phone State Error:** {phone_state.get('message', 'Unknown error')}"
|
34
|
+
else:
|
35
|
+
phone_state_text = f"📱 **Phone State:** {phone_state}"
|
36
|
+
|
37
|
+
return phone_state_text
|
38
|
+
|
39
|
+
|
40
|
+
def format_ui_elements(ui_data: List[Dict[str, Any]], level: int = 0) -> str:
|
41
|
+
"""
|
42
|
+
Format UI elements in the exact format: index. className: "resourceId", "text" - (bounds)
|
43
|
+
|
44
|
+
Args:
|
45
|
+
ui_data: List of UI element dictionaries
|
46
|
+
level: Indentation level for nested elements
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
Formatted UI elements text
|
50
|
+
"""
|
51
|
+
if not ui_data:
|
52
|
+
return ""
|
53
|
+
|
54
|
+
formatted_lines = []
|
55
|
+
indent = " " * level # Indentation for nested elements
|
56
|
+
|
57
|
+
# Handle both list and single element
|
58
|
+
elements = ui_data if isinstance(ui_data, list) else [ui_data]
|
59
|
+
|
60
|
+
for element in elements:
|
61
|
+
if not isinstance(element, dict):
|
62
|
+
continue
|
63
|
+
|
64
|
+
# Extract element properties
|
65
|
+
index = element.get('index', '')
|
66
|
+
class_name = element.get('className', '')
|
67
|
+
resource_id = element.get('resourceId', '')
|
68
|
+
text = element.get('text', '')
|
69
|
+
bounds = element.get('bounds', '')
|
70
|
+
children = element.get('children', [])
|
71
|
+
|
72
|
+
# Format the line: index. className: "resourceId", "text" - (bounds)
|
73
|
+
line_parts = []
|
74
|
+
if index != '':
|
75
|
+
line_parts.append(f"{index}.")
|
76
|
+
if class_name:
|
77
|
+
line_parts.append(class_name + ":")
|
78
|
+
|
79
|
+
# Build the quoted details section
|
80
|
+
details = []
|
81
|
+
if resource_id:
|
82
|
+
details.append(f'"{resource_id}"')
|
83
|
+
if text:
|
84
|
+
details.append(f'"{text}"')
|
85
|
+
|
86
|
+
if details:
|
87
|
+
line_parts.append(", ".join(details))
|
88
|
+
|
89
|
+
if bounds:
|
90
|
+
line_parts.append(f"- ({bounds})")
|
91
|
+
|
92
|
+
formatted_line = f"{indent}{' '.join(line_parts)}"
|
93
|
+
formatted_lines.append(formatted_line)
|
94
|
+
|
95
|
+
# Recursively format children with increased indentation
|
96
|
+
if children:
|
97
|
+
child_formatted = format_ui_elements(children, level + 1)
|
98
|
+
if child_formatted:
|
99
|
+
formatted_lines.append(child_formatted)
|
100
|
+
|
101
|
+
return "\n".join(formatted_lines)
|
102
|
+
|
103
|
+
|
104
|
+
def get_device_state_exact_format(state: Dict[str, Any]) -> Tuple[str, str]:
|
105
|
+
"""
|
106
|
+
Get device state in exactly the format requested:
|
107
|
+
|
108
|
+
**Current Phone State:**
|
109
|
+
• **App:** App Name (package.name)
|
110
|
+
• **Keyboard:** Hidden/Visible
|
111
|
+
• **Focused Element:** 'text'
|
112
|
+
|
113
|
+
Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':
|
114
|
+
1. ClassName: "resourceId", "text" - (x1, y1, x2, y2)
|
115
|
+
|
116
|
+
Args:
|
117
|
+
state: Dictionary containing device state data from collector.get_device_state()
|
118
|
+
|
119
|
+
Returns:
|
120
|
+
Tuple of (formatted_string, focused_text) where focused_text is the actual
|
121
|
+
text content of the focused element, or empty string if none.
|
122
|
+
"""
|
123
|
+
try:
|
124
|
+
if "error" in state:
|
125
|
+
return (f"Error getting device state: {state.get('message', 'Unknown error')}", "")
|
126
|
+
|
127
|
+
# Extract focused element text
|
128
|
+
phone_state = state.get("phone_state", {})
|
129
|
+
focused_element = phone_state.get('focusedElement')
|
130
|
+
focused_text = ""
|
131
|
+
if focused_element:
|
132
|
+
focused_text = focused_element.get('text', '')
|
133
|
+
|
134
|
+
# Format the state data
|
135
|
+
phone_state_text = format_phone_state(phone_state)
|
136
|
+
ui_data = state.get("a11y_tree", [])
|
137
|
+
if ui_data:
|
138
|
+
formatted_ui = format_ui_elements(ui_data)
|
139
|
+
ui_elements_text = f"Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':\n{formatted_ui}"
|
140
|
+
else:
|
141
|
+
ui_elements_text = "Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':\nNo UI elements found"
|
142
|
+
|
143
|
+
formatted_string = f"{phone_state_text}\n \n\n{ui_elements_text}"
|
144
|
+
|
145
|
+
return (formatted_string, focused_text)
|
146
|
+
except Exception as e:
|
147
|
+
return (f"Error getting device state: {e}", "")
|
148
|
+
|
149
|
+
|
150
|
+
def main():
|
151
|
+
"""Small test"""
|
152
|
+
example_state = {
|
153
|
+
"phone_state": {
|
154
|
+
"currentApp": "Settings",
|
155
|
+
"packageName": "com.android.settings",
|
156
|
+
"isEditable": False,
|
157
|
+
"focusedElement": {"text": "Search settings"}
|
158
|
+
},
|
159
|
+
"a11y_tree": [
|
160
|
+
{
|
161
|
+
"index": 1,
|
162
|
+
"className": "android.widget.TextView",
|
163
|
+
"resourceId": "com.android.settings:id/title",
|
164
|
+
"text": "Wi‑Fi",
|
165
|
+
"bounds": "100,200,300,250"
|
166
|
+
}
|
167
|
+
]
|
168
|
+
}
|
169
|
+
|
170
|
+
formatted_string, focused_text = get_device_state_exact_format(example_state)
|
171
|
+
print("Formatted String:")
|
172
|
+
print(formatted_string)
|
173
|
+
print(f"\nFocused Text: '{focused_text}'")
|
174
|
+
|
175
|
+
|
176
|
+
if __name__ == "__main__":
|
177
|
+
main()
|
droidrun/agent/utils/executer.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
-
import
|
1
|
+
import asyncio
|
2
2
|
import contextlib
|
3
|
-
import
|
4
|
-
import traceback
|
3
|
+
import io
|
5
4
|
import logging
|
5
|
+
import threading
|
6
|
+
import traceback
|
7
|
+
from asyncio import AbstractEventLoop
|
6
8
|
from typing import Any, Dict
|
7
|
-
|
9
|
+
|
8
10
|
from llama_index.core.workflow import Context
|
9
|
-
|
10
|
-
from
|
11
|
-
import threading
|
11
|
+
|
12
|
+
from droidrun.agent.utils.async_utils import async_to_sync
|
12
13
|
from droidrun.tools.adb import AdbTools
|
13
14
|
|
14
15
|
logger = logging.getLogger("droidrun")
|
@@ -27,9 +28,9 @@ class SimpleCodeExecutor:
|
|
27
28
|
def __init__(
|
28
29
|
self,
|
29
30
|
loop: AbstractEventLoop,
|
30
|
-
locals: Dict[str, Any] = {},
|
31
|
-
globals: Dict[str, Any] = {},
|
32
|
-
tools={},
|
31
|
+
locals: Dict[str, Any] = {}, # noqa: B006
|
32
|
+
globals: Dict[str, Any] = {}, # noqa: B006
|
33
|
+
tools={}, # noqa: B006
|
33
34
|
tools_instance=None,
|
34
35
|
use_same_scope: bool = True,
|
35
36
|
):
|
@@ -98,10 +99,10 @@ class SimpleCodeExecutor:
|
|
98
99
|
str: Output from the execution, including print statements.
|
99
100
|
"""
|
100
101
|
# Update UI elements before execution
|
101
|
-
self.globals['ui_state'] = await ctx.get("ui_state", None)
|
102
|
+
self.globals['ui_state'] = await ctx.store.get("ui_state", None)
|
102
103
|
self.globals['step_screenshots'] = []
|
103
104
|
self.globals['step_ui_states'] = []
|
104
|
-
|
105
|
+
|
105
106
|
if self.tools_instance and isinstance(self.tools_instance, AdbTools):
|
106
107
|
self.tools_instance._set_context(ctx)
|
107
108
|
|
@@ -0,0 +1,114 @@
|
|
1
|
+
|
2
|
+
import contextvars
|
3
|
+
import threading
|
4
|
+
import time
|
5
|
+
from concurrent.futures import TimeoutError as FuturesTimeoutError
|
6
|
+
import asyncio
|
7
|
+
from typing import Any, Optional
|
8
|
+
|
9
|
+
def call_with_retries(llm, messages, retries=3, timeout=500, delay=1.0):
|
10
|
+
last_exception = None
|
11
|
+
|
12
|
+
for attempt in range(1, retries + 1):
|
13
|
+
ctx = contextvars.copy_context()
|
14
|
+
result_holder = {}
|
15
|
+
error_holder = {}
|
16
|
+
|
17
|
+
def _target():
|
18
|
+
try:
|
19
|
+
result_holder["response"] = ctx.run(llm.chat, messages=messages) # noqa: B023
|
20
|
+
except Exception as e:
|
21
|
+
error_holder["error"] = e # noqa: B023
|
22
|
+
|
23
|
+
worker = threading.Thread(target=_target, daemon=True)
|
24
|
+
worker.start()
|
25
|
+
worker.join(timeout)
|
26
|
+
|
27
|
+
if worker.is_alive():
|
28
|
+
print(f"Attempt {attempt} timed out after {timeout} seconds")
|
29
|
+
# Do not join; thread is daemon and won't block process exit
|
30
|
+
last_exception = TimeoutError("Timed out")
|
31
|
+
else:
|
32
|
+
if "error" in error_holder:
|
33
|
+
err = error_holder["error"]
|
34
|
+
# Normalize FuturesTimeoutError if raised inside llm.chat
|
35
|
+
if isinstance(err, FuturesTimeoutError):
|
36
|
+
print(f"Attempt {attempt} timed out inside LLM after {timeout} seconds")
|
37
|
+
last_exception = TimeoutError("Timed out")
|
38
|
+
else:
|
39
|
+
print(f"Attempt {attempt} failed with error: {err!r}")
|
40
|
+
last_exception = err
|
41
|
+
else:
|
42
|
+
response = result_holder.get("response")
|
43
|
+
if (
|
44
|
+
response is not None
|
45
|
+
and getattr(response, "message", None) is not None
|
46
|
+
and getattr(response.message, "content", None)
|
47
|
+
):
|
48
|
+
return response
|
49
|
+
else:
|
50
|
+
print(f"Attempt {attempt} returned empty content")
|
51
|
+
last_exception = ValueError("Empty response content")
|
52
|
+
|
53
|
+
if attempt < retries:
|
54
|
+
time.sleep(delay * attempt)
|
55
|
+
|
56
|
+
if last_exception:
|
57
|
+
raise last_exception
|
58
|
+
raise ValueError("All attempts returned empty response content")
|
59
|
+
|
60
|
+
|
61
|
+
async def acall_with_retries(
|
62
|
+
llm,
|
63
|
+
messages: list,
|
64
|
+
retries: int = 3,
|
65
|
+
timeout: float = 500,
|
66
|
+
delay: float = 1.0
|
67
|
+
) -> Any:
|
68
|
+
"""
|
69
|
+
Call LLM with retries and timeout handling.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
llm: The LLM client instance
|
73
|
+
messages: List of messages to send
|
74
|
+
retries: Number of retry attempts
|
75
|
+
timeout: Timeout in seconds for each attempt
|
76
|
+
delay: Base delay between retries (multiplied by attempt number)
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
The LLM response object
|
80
|
+
"""
|
81
|
+
last_exception: Optional[Exception] = None
|
82
|
+
|
83
|
+
for attempt in range(1, retries + 1):
|
84
|
+
try:
|
85
|
+
response = await asyncio.wait_for(
|
86
|
+
llm.achat(messages=messages), # Use achat() instead of chat()
|
87
|
+
timeout=timeout
|
88
|
+
)
|
89
|
+
|
90
|
+
# Validate response
|
91
|
+
if (
|
92
|
+
response is not None
|
93
|
+
and getattr(response, "message", None) is not None
|
94
|
+
and getattr(response.message, "content", None)
|
95
|
+
):
|
96
|
+
return response
|
97
|
+
else:
|
98
|
+
print(f"Attempt {attempt} returned empty content")
|
99
|
+
last_exception = ValueError("Empty response content")
|
100
|
+
|
101
|
+
except asyncio.TimeoutError:
|
102
|
+
print(f"Attempt {attempt} timed out after {timeout} seconds")
|
103
|
+
last_exception = TimeoutError("Timed out")
|
104
|
+
|
105
|
+
except Exception as e:
|
106
|
+
print(f"Attempt {attempt} failed with error: {e!r}")
|
107
|
+
last_exception = e
|
108
|
+
|
109
|
+
if attempt < retries:
|
110
|
+
await asyncio.sleep(delay * attempt)
|
111
|
+
|
112
|
+
if last_exception:
|
113
|
+
raise last_exception
|
114
|
+
raise ValueError("All attempts returned empty response content")
|