autoglm-gui 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/agents/glm/agent.py +6 -1
- AutoGLM_GUI/agents/mai/agent.py +3 -0
- AutoGLM_GUI/agents/stream_runner.py +7 -2
- AutoGLM_GUI/api/agents.py +26 -1
- AutoGLM_GUI/api/history.py +27 -1
- AutoGLM_GUI/models/history.py +45 -1
- AutoGLM_GUI/scheduler_manager.py +52 -6
- AutoGLM_GUI/schemas.py +12 -0
- AutoGLM_GUI/static/assets/{about-BQm96DAl.js → about-CfwX1Cmc.js} +1 -1
- AutoGLM_GUI/static/assets/{alert-dialog-B42XxGPR.js → alert-dialog-CtGlN2IJ.js} +1 -1
- AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
- AutoGLM_GUI/static/assets/{circle-alert-D4rSJh37.js → circle-alert-t08bEMPO.js} +1 -1
- AutoGLM_GUI/static/assets/{dialog-DZ78cEcj.js → dialog-FNwZJFwk.js} +1 -1
- AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
- AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
- AutoGLM_GUI/static/assets/{index-CmZSnDqc.js → index-BaLMSqd3.js} +1 -1
- AutoGLM_GUI/static/assets/{index-CssG-3TH.js → index-CTHbFvKl.js} +5 -5
- AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
- AutoGLM_GUI/static/assets/{label-BCUzE_nm.js → label-DJFevVmr.js} +1 -1
- AutoGLM_GUI/static/assets/{logs-eoFxn5of.js → logs-RW09DyYY.js} +1 -1
- AutoGLM_GUI/static/assets/{popover-DLsuV5Sx.js → popover--JTJrE5v.js} +1 -1
- AutoGLM_GUI/static/assets/{scheduled-tasks-MyqGJvy_.js → scheduled-tasks-DTRKsQXF.js} +1 -1
- AutoGLM_GUI/static/assets/{square-pen-zGWYrdfj.js → square-pen-CPK_K680.js} +1 -1
- AutoGLM_GUI/static/assets/{textarea-BX6y7uM5.js → textarea-PRmVnWq5.js} +1 -1
- AutoGLM_GUI/static/assets/{workflows-CYFs6ssC.js → workflows-CdcsAoaT.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +49 -7
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/RECORD +31 -70
- AutoGLM_GUI/device_adapter.py +0 -263
- AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +0 -129
- AutoGLM_GUI/static/assets/history-DFBv7TGc.js +0 -1
- AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +0 -1
- mai_agent/base.py +0 -137
- mai_agent/mai_grounding_agent.py +0 -263
- mai_agent/mai_naivigation_agent.py +0 -526
- mai_agent/prompt.py +0 -148
- mai_agent/unified_memory.py +0 -67
- mai_agent/utils.py +0 -73
- phone_agent/__init__.py +0 -12
- phone_agent/actions/__init__.py +0 -5
- phone_agent/actions/handler.py +0 -400
- phone_agent/actions/handler_ios.py +0 -278
- phone_agent/adb/__init__.py +0 -51
- phone_agent/adb/connection.py +0 -358
- phone_agent/adb/device.py +0 -253
- phone_agent/adb/input.py +0 -108
- phone_agent/adb/screenshot.py +0 -108
- phone_agent/agent.py +0 -253
- phone_agent/agent_ios.py +0 -277
- phone_agent/config/__init__.py +0 -53
- phone_agent/config/apps.py +0 -227
- phone_agent/config/apps_harmonyos.py +0 -256
- phone_agent/config/apps_ios.py +0 -339
- phone_agent/config/i18n.py +0 -81
- phone_agent/config/prompts.py +0 -80
- phone_agent/config/prompts_en.py +0 -79
- phone_agent/config/prompts_zh.py +0 -82
- phone_agent/config/timing.py +0 -167
- phone_agent/device_factory.py +0 -166
- phone_agent/hdc/__init__.py +0 -53
- phone_agent/hdc/connection.py +0 -384
- phone_agent/hdc/device.py +0 -269
- phone_agent/hdc/input.py +0 -145
- phone_agent/hdc/screenshot.py +0 -127
- phone_agent/model/__init__.py +0 -5
- phone_agent/model/client.py +0 -290
- phone_agent/xctest/__init__.py +0 -47
- phone_agent/xctest/connection.py +0 -379
- phone_agent/xctest/device.py +0 -472
- phone_agent/xctest/input.py +0 -311
- phone_agent/xctest/screenshot.py +0 -226
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.5.0.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
phone_agent/agent.py
DELETED
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
"""Main PhoneAgent class for orchestrating phone automation."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import traceback
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from typing import Any, Callable
|
|
7
|
-
|
|
8
|
-
from phone_agent.actions import ActionHandler
|
|
9
|
-
from phone_agent.actions.handler import finish, parse_action
|
|
10
|
-
from phone_agent.config import get_messages, get_system_prompt
|
|
11
|
-
from phone_agent.device_factory import get_device_factory
|
|
12
|
-
from phone_agent.model import ModelClient, ModelConfig
|
|
13
|
-
from phone_agent.model.client import MessageBuilder
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class AgentConfig:
|
|
18
|
-
"""Configuration for the PhoneAgent."""
|
|
19
|
-
|
|
20
|
-
max_steps: int = 100
|
|
21
|
-
device_id: str | None = None
|
|
22
|
-
lang: str = "cn"
|
|
23
|
-
system_prompt: str | None = None
|
|
24
|
-
verbose: bool = True
|
|
25
|
-
|
|
26
|
-
def __post_init__(self):
|
|
27
|
-
if self.system_prompt is None:
|
|
28
|
-
self.system_prompt = get_system_prompt(self.lang)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@dataclass
|
|
32
|
-
class StepResult:
|
|
33
|
-
"""Result of a single agent step."""
|
|
34
|
-
|
|
35
|
-
success: bool
|
|
36
|
-
finished: bool
|
|
37
|
-
action: dict[str, Any] | None
|
|
38
|
-
thinking: str
|
|
39
|
-
message: str | None = None
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class PhoneAgent:
|
|
43
|
-
"""
|
|
44
|
-
AI-powered agent for automating Android phone interactions.
|
|
45
|
-
|
|
46
|
-
The agent uses a vision-language model to understand screen content
|
|
47
|
-
and decide on actions to complete user tasks.
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
model_config: Configuration for the AI model.
|
|
51
|
-
agent_config: Configuration for the agent behavior.
|
|
52
|
-
confirmation_callback: Optional callback for sensitive action confirmation.
|
|
53
|
-
takeover_callback: Optional callback for takeover requests.
|
|
54
|
-
|
|
55
|
-
Example:
|
|
56
|
-
>>> from phone_agent import PhoneAgent
|
|
57
|
-
>>> from phone_agent.model import ModelConfig
|
|
58
|
-
>>>
|
|
59
|
-
>>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
|
|
60
|
-
>>> agent = PhoneAgent(model_config)
|
|
61
|
-
>>> agent.run("Open WeChat and send a message to John")
|
|
62
|
-
"""
|
|
63
|
-
|
|
64
|
-
def __init__(
|
|
65
|
-
self,
|
|
66
|
-
model_config: ModelConfig | None = None,
|
|
67
|
-
agent_config: AgentConfig | None = None,
|
|
68
|
-
confirmation_callback: Callable[[str], bool] | None = None,
|
|
69
|
-
takeover_callback: Callable[[str], None] | None = None,
|
|
70
|
-
):
|
|
71
|
-
self.model_config = model_config or ModelConfig()
|
|
72
|
-
self.agent_config = agent_config or AgentConfig()
|
|
73
|
-
|
|
74
|
-
self.model_client = ModelClient(self.model_config)
|
|
75
|
-
self.action_handler = ActionHandler(
|
|
76
|
-
device_id=self.agent_config.device_id,
|
|
77
|
-
confirmation_callback=confirmation_callback,
|
|
78
|
-
takeover_callback=takeover_callback,
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
self._context: list[dict[str, Any]] = []
|
|
82
|
-
self._step_count = 0
|
|
83
|
-
|
|
84
|
-
def run(self, task: str) -> str:
|
|
85
|
-
"""
|
|
86
|
-
Run the agent to complete a task.
|
|
87
|
-
|
|
88
|
-
Args:
|
|
89
|
-
task: Natural language description of the task.
|
|
90
|
-
|
|
91
|
-
Returns:
|
|
92
|
-
Final message from the agent.
|
|
93
|
-
"""
|
|
94
|
-
self._context = []
|
|
95
|
-
self._step_count = 0
|
|
96
|
-
|
|
97
|
-
# First step with user prompt
|
|
98
|
-
result = self._execute_step(task, is_first=True)
|
|
99
|
-
|
|
100
|
-
if result.finished:
|
|
101
|
-
return result.message or "Task completed"
|
|
102
|
-
|
|
103
|
-
# Continue until finished or max steps reached
|
|
104
|
-
while self._step_count < self.agent_config.max_steps:
|
|
105
|
-
result = self._execute_step(is_first=False)
|
|
106
|
-
|
|
107
|
-
if result.finished:
|
|
108
|
-
return result.message or "Task completed"
|
|
109
|
-
|
|
110
|
-
return "Max steps reached"
|
|
111
|
-
|
|
112
|
-
def step(self, task: str | None = None) -> StepResult:
|
|
113
|
-
"""
|
|
114
|
-
Execute a single step of the agent.
|
|
115
|
-
|
|
116
|
-
Useful for manual control or debugging.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
task: Task description (only needed for first step).
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
StepResult with step details.
|
|
123
|
-
"""
|
|
124
|
-
is_first = len(self._context) == 0
|
|
125
|
-
|
|
126
|
-
if is_first and not task:
|
|
127
|
-
raise ValueError("Task is required for the first step")
|
|
128
|
-
|
|
129
|
-
return self._execute_step(task, is_first)
|
|
130
|
-
|
|
131
|
-
def reset(self) -> None:
|
|
132
|
-
"""Reset the agent state for a new task."""
|
|
133
|
-
self._context = []
|
|
134
|
-
self._step_count = 0
|
|
135
|
-
|
|
136
|
-
def _execute_step(
|
|
137
|
-
self, user_prompt: str | None = None, is_first: bool = False
|
|
138
|
-
) -> StepResult:
|
|
139
|
-
"""Execute a single step of the agent loop."""
|
|
140
|
-
self._step_count += 1
|
|
141
|
-
|
|
142
|
-
# Capture current screen state
|
|
143
|
-
device_factory = get_device_factory()
|
|
144
|
-
screenshot = device_factory.get_screenshot(self.agent_config.device_id)
|
|
145
|
-
current_app = device_factory.get_current_app(self.agent_config.device_id)
|
|
146
|
-
|
|
147
|
-
# Build messages
|
|
148
|
-
if is_first:
|
|
149
|
-
self._context.append(
|
|
150
|
-
MessageBuilder.create_system_message(self.agent_config.system_prompt)
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
screen_info = MessageBuilder.build_screen_info(current_app)
|
|
154
|
-
text_content = f"{user_prompt}\n\n{screen_info}"
|
|
155
|
-
|
|
156
|
-
self._context.append(
|
|
157
|
-
MessageBuilder.create_user_message(
|
|
158
|
-
text=text_content, image_base64=screenshot.base64_data
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
else:
|
|
162
|
-
screen_info = MessageBuilder.build_screen_info(current_app)
|
|
163
|
-
text_content = f"** Screen Info **\n\n{screen_info}"
|
|
164
|
-
|
|
165
|
-
self._context.append(
|
|
166
|
-
MessageBuilder.create_user_message(
|
|
167
|
-
text=text_content, image_base64=screenshot.base64_data
|
|
168
|
-
)
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
# Get model response
|
|
172
|
-
try:
|
|
173
|
-
msgs = get_messages(self.agent_config.lang)
|
|
174
|
-
print("\n" + "=" * 50)
|
|
175
|
-
print(f"💭 {msgs['thinking']}:")
|
|
176
|
-
print("-" * 50)
|
|
177
|
-
response = self.model_client.request(self._context)
|
|
178
|
-
except Exception as e:
|
|
179
|
-
if self.agent_config.verbose:
|
|
180
|
-
traceback.print_exc()
|
|
181
|
-
return StepResult(
|
|
182
|
-
success=False,
|
|
183
|
-
finished=True,
|
|
184
|
-
action=None,
|
|
185
|
-
thinking="",
|
|
186
|
-
message=f"Model error: {e}",
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
# Parse action from response
|
|
190
|
-
try:
|
|
191
|
-
action = parse_action(response.action)
|
|
192
|
-
except ValueError:
|
|
193
|
-
if self.agent_config.verbose:
|
|
194
|
-
traceback.print_exc()
|
|
195
|
-
action = finish(message=response.action)
|
|
196
|
-
|
|
197
|
-
if self.agent_config.verbose:
|
|
198
|
-
# Print thinking process
|
|
199
|
-
print("-" * 50)
|
|
200
|
-
print(f"🎯 {msgs['action']}:")
|
|
201
|
-
print(json.dumps(action, ensure_ascii=False, indent=2))
|
|
202
|
-
print("=" * 50 + "\n")
|
|
203
|
-
|
|
204
|
-
# Remove image from context to save space
|
|
205
|
-
self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
|
|
206
|
-
|
|
207
|
-
# Execute action
|
|
208
|
-
try:
|
|
209
|
-
result = self.action_handler.execute(
|
|
210
|
-
action, screenshot.width, screenshot.height
|
|
211
|
-
)
|
|
212
|
-
except Exception as e:
|
|
213
|
-
if self.agent_config.verbose:
|
|
214
|
-
traceback.print_exc()
|
|
215
|
-
result = self.action_handler.execute(
|
|
216
|
-
finish(message=str(e)), screenshot.width, screenshot.height
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
# Add assistant response to context
|
|
220
|
-
self._context.append(
|
|
221
|
-
MessageBuilder.create_assistant_message(
|
|
222
|
-
f"<think>{response.thinking}</think><answer>{response.action}</answer>"
|
|
223
|
-
)
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
# Check if finished
|
|
227
|
-
finished = action.get("_metadata") == "finish" or result.should_finish
|
|
228
|
-
|
|
229
|
-
if finished and self.agent_config.verbose:
|
|
230
|
-
msgs = get_messages(self.agent_config.lang)
|
|
231
|
-
print("\n" + "🎉 " + "=" * 48)
|
|
232
|
-
print(
|
|
233
|
-
f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
|
|
234
|
-
)
|
|
235
|
-
print("=" * 50 + "\n")
|
|
236
|
-
|
|
237
|
-
return StepResult(
|
|
238
|
-
success=result.success,
|
|
239
|
-
finished=finished,
|
|
240
|
-
action=action,
|
|
241
|
-
thinking=response.thinking,
|
|
242
|
-
message=result.message or action.get("message"),
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
@property
|
|
246
|
-
def context(self) -> list[dict[str, Any]]:
|
|
247
|
-
"""Get the current conversation context."""
|
|
248
|
-
return self._context.copy()
|
|
249
|
-
|
|
250
|
-
@property
|
|
251
|
-
def step_count(self) -> int:
|
|
252
|
-
"""Get the current step count."""
|
|
253
|
-
return self._step_count
|
phone_agent/agent_ios.py
DELETED
|
@@ -1,277 +0,0 @@
|
|
|
1
|
-
"""iOS PhoneAgent class for orchestrating iOS phone automation."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import traceback
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from typing import Any, Callable
|
|
7
|
-
|
|
8
|
-
from phone_agent.actions.handler import finish, parse_action
|
|
9
|
-
from phone_agent.actions.handler_ios import IOSActionHandler
|
|
10
|
-
from phone_agent.config import get_messages, get_system_prompt
|
|
11
|
-
from phone_agent.model import ModelClient, ModelConfig
|
|
12
|
-
from phone_agent.model.client import MessageBuilder
|
|
13
|
-
from phone_agent.xctest import XCTestConnection, get_current_app, get_screenshot
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class IOSAgentConfig:
|
|
18
|
-
"""Configuration for the iOS PhoneAgent."""
|
|
19
|
-
|
|
20
|
-
max_steps: int = 100
|
|
21
|
-
wda_url: str = "http://localhost:8100"
|
|
22
|
-
session_id: str | None = None
|
|
23
|
-
device_id: str | None = None # iOS device UDID
|
|
24
|
-
lang: str = "cn"
|
|
25
|
-
system_prompt: str | None = None
|
|
26
|
-
verbose: bool = True
|
|
27
|
-
|
|
28
|
-
def __post_init__(self):
|
|
29
|
-
if self.system_prompt is None:
|
|
30
|
-
self.system_prompt = get_system_prompt(self.lang)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass
|
|
34
|
-
class StepResult:
|
|
35
|
-
"""Result of a single agent step."""
|
|
36
|
-
|
|
37
|
-
success: bool
|
|
38
|
-
finished: bool
|
|
39
|
-
action: dict[str, Any] | None
|
|
40
|
-
thinking: str
|
|
41
|
-
message: str | None = None
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class IOSPhoneAgent:
|
|
45
|
-
"""
|
|
46
|
-
AI-powered agent for automating iOS phone interactions.
|
|
47
|
-
|
|
48
|
-
The agent uses a vision-language model to understand screen content
|
|
49
|
-
and decide on actions to complete user tasks via WebDriverAgent.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
model_config: Configuration for the AI model.
|
|
53
|
-
agent_config: Configuration for the iOS agent behavior.
|
|
54
|
-
confirmation_callback: Optional callback for sensitive action confirmation.
|
|
55
|
-
takeover_callback: Optional callback for takeover requests.
|
|
56
|
-
|
|
57
|
-
Example:
|
|
58
|
-
>>> from phone_agent.agent_ios import IOSPhoneAgent, IOSAgentConfig
|
|
59
|
-
>>> from phone_agent.model import ModelConfig
|
|
60
|
-
>>>
|
|
61
|
-
>>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
|
|
62
|
-
>>> agent_config = IOSAgentConfig(wda_url="http://localhost:8100")
|
|
63
|
-
>>> agent = IOSPhoneAgent(model_config, agent_config)
|
|
64
|
-
>>> agent.run("Open Safari and search for Apple")
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
def __init__(
|
|
68
|
-
self,
|
|
69
|
-
model_config: ModelConfig | None = None,
|
|
70
|
-
agent_config: IOSAgentConfig | None = None,
|
|
71
|
-
confirmation_callback: Callable[[str], bool] | None = None,
|
|
72
|
-
takeover_callback: Callable[[str], None] | None = None,
|
|
73
|
-
):
|
|
74
|
-
self.model_config = model_config or ModelConfig()
|
|
75
|
-
self.agent_config = agent_config or IOSAgentConfig()
|
|
76
|
-
|
|
77
|
-
self.model_client = ModelClient(self.model_config)
|
|
78
|
-
|
|
79
|
-
# Initialize WDA connection and create session if needed
|
|
80
|
-
self.wda_connection = XCTestConnection(wda_url=self.agent_config.wda_url)
|
|
81
|
-
|
|
82
|
-
# Auto-create session if not provided
|
|
83
|
-
if self.agent_config.session_id is None:
|
|
84
|
-
success, session_id = self.wda_connection.start_wda_session()
|
|
85
|
-
if success and session_id != "session_started":
|
|
86
|
-
self.agent_config.session_id = session_id
|
|
87
|
-
if self.agent_config.verbose:
|
|
88
|
-
print(f"✅ Created WDA session: {session_id}")
|
|
89
|
-
elif self.agent_config.verbose:
|
|
90
|
-
print("⚠️ Using default WDA session (no explicit session ID)")
|
|
91
|
-
|
|
92
|
-
self.action_handler = IOSActionHandler(
|
|
93
|
-
wda_url=self.agent_config.wda_url,
|
|
94
|
-
session_id=self.agent_config.session_id,
|
|
95
|
-
confirmation_callback=confirmation_callback,
|
|
96
|
-
takeover_callback=takeover_callback,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
self._context: list[dict[str, Any]] = []
|
|
100
|
-
self._step_count = 0
|
|
101
|
-
|
|
102
|
-
def run(self, task: str) -> str:
|
|
103
|
-
"""
|
|
104
|
-
Run the agent to complete a task.
|
|
105
|
-
|
|
106
|
-
Args:
|
|
107
|
-
task: Natural language description of the task.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
Final message from the agent.
|
|
111
|
-
"""
|
|
112
|
-
self._context = []
|
|
113
|
-
self._step_count = 0
|
|
114
|
-
|
|
115
|
-
# First step with user prompt
|
|
116
|
-
result = self._execute_step(task, is_first=True)
|
|
117
|
-
|
|
118
|
-
if result.finished:
|
|
119
|
-
return result.message or "Task completed"
|
|
120
|
-
|
|
121
|
-
# Continue until finished or max steps reached
|
|
122
|
-
while self._step_count < self.agent_config.max_steps:
|
|
123
|
-
result = self._execute_step(is_first=False)
|
|
124
|
-
|
|
125
|
-
if result.finished:
|
|
126
|
-
return result.message or "Task completed"
|
|
127
|
-
|
|
128
|
-
return "Max steps reached"
|
|
129
|
-
|
|
130
|
-
def step(self, task: str | None = None) -> StepResult:
|
|
131
|
-
"""
|
|
132
|
-
Execute a single step of the agent.
|
|
133
|
-
|
|
134
|
-
Useful for manual control or debugging.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
task: Task description (only needed for first step).
|
|
138
|
-
|
|
139
|
-
Returns:
|
|
140
|
-
StepResult with step details.
|
|
141
|
-
"""
|
|
142
|
-
is_first = len(self._context) == 0
|
|
143
|
-
|
|
144
|
-
if is_first and not task:
|
|
145
|
-
raise ValueError("Task is required for the first step")
|
|
146
|
-
|
|
147
|
-
return self._execute_step(task, is_first)
|
|
148
|
-
|
|
149
|
-
def reset(self) -> None:
|
|
150
|
-
"""Reset the agent state for a new task."""
|
|
151
|
-
self._context = []
|
|
152
|
-
self._step_count = 0
|
|
153
|
-
|
|
154
|
-
def _execute_step(
|
|
155
|
-
self, user_prompt: str | None = None, is_first: bool = False
|
|
156
|
-
) -> StepResult:
|
|
157
|
-
"""Execute a single step of the agent loop."""
|
|
158
|
-
self._step_count += 1
|
|
159
|
-
|
|
160
|
-
# Capture current screen state
|
|
161
|
-
screenshot = get_screenshot(
|
|
162
|
-
wda_url=self.agent_config.wda_url,
|
|
163
|
-
session_id=self.agent_config.session_id,
|
|
164
|
-
device_id=self.agent_config.device_id,
|
|
165
|
-
)
|
|
166
|
-
current_app = get_current_app(
|
|
167
|
-
wda_url=self.agent_config.wda_url, session_id=self.agent_config.session_id
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
# Build messages
|
|
171
|
-
if is_first:
|
|
172
|
-
self._context.append(
|
|
173
|
-
MessageBuilder.create_system_message(self.agent_config.system_prompt)
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
screen_info = MessageBuilder.build_screen_info(current_app)
|
|
177
|
-
text_content = f"{user_prompt}\n\n{screen_info}"
|
|
178
|
-
|
|
179
|
-
self._context.append(
|
|
180
|
-
MessageBuilder.create_user_message(
|
|
181
|
-
text=text_content, image_base64=screenshot.base64_data
|
|
182
|
-
)
|
|
183
|
-
)
|
|
184
|
-
else:
|
|
185
|
-
screen_info = MessageBuilder.build_screen_info(current_app)
|
|
186
|
-
text_content = f"** Screen Info **\n\n{screen_info}"
|
|
187
|
-
|
|
188
|
-
self._context.append(
|
|
189
|
-
MessageBuilder.create_user_message(
|
|
190
|
-
text=text_content, image_base64=screenshot.base64_data
|
|
191
|
-
)
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
# Get model response
|
|
195
|
-
try:
|
|
196
|
-
response = self.model_client.request(self._context)
|
|
197
|
-
except Exception as e:
|
|
198
|
-
if self.agent_config.verbose:
|
|
199
|
-
traceback.print_exc()
|
|
200
|
-
return StepResult(
|
|
201
|
-
success=False,
|
|
202
|
-
finished=True,
|
|
203
|
-
action=None,
|
|
204
|
-
thinking="",
|
|
205
|
-
message=f"Model error: {e}",
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
# Parse action from response
|
|
209
|
-
try:
|
|
210
|
-
action = parse_action(response.action)
|
|
211
|
-
except ValueError:
|
|
212
|
-
if self.agent_config.verbose:
|
|
213
|
-
traceback.print_exc()
|
|
214
|
-
action = finish(message=response.action)
|
|
215
|
-
|
|
216
|
-
if self.agent_config.verbose:
|
|
217
|
-
# Print thinking process
|
|
218
|
-
msgs = get_messages(self.agent_config.lang)
|
|
219
|
-
print("\n" + "=" * 50)
|
|
220
|
-
print(f"💭 {msgs['thinking']}:")
|
|
221
|
-
print("-" * 50)
|
|
222
|
-
print(response.thinking)
|
|
223
|
-
print("-" * 50)
|
|
224
|
-
print(f"🎯 {msgs['action']}:")
|
|
225
|
-
print(json.dumps(action, ensure_ascii=False, indent=2))
|
|
226
|
-
print("=" * 50 + "\n")
|
|
227
|
-
|
|
228
|
-
# Remove image from context to save space
|
|
229
|
-
self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
|
|
230
|
-
|
|
231
|
-
# Execute action
|
|
232
|
-
try:
|
|
233
|
-
result = self.action_handler.execute(
|
|
234
|
-
action, screenshot.width, screenshot.height
|
|
235
|
-
)
|
|
236
|
-
except Exception as e:
|
|
237
|
-
if self.agent_config.verbose:
|
|
238
|
-
traceback.print_exc()
|
|
239
|
-
result = self.action_handler.execute(
|
|
240
|
-
finish(message=str(e)), screenshot.width, screenshot.height
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
# Add assistant response to context
|
|
244
|
-
self._context.append(
|
|
245
|
-
MessageBuilder.create_assistant_message(
|
|
246
|
-
f"<think>{response.thinking}</think><answer>{response.action}</answer>"
|
|
247
|
-
)
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
# Check if finished
|
|
251
|
-
finished = action.get("_metadata") == "finish" or result.should_finish
|
|
252
|
-
|
|
253
|
-
if finished and self.agent_config.verbose:
|
|
254
|
-
msgs = get_messages(self.agent_config.lang)
|
|
255
|
-
print("\n" + "🎉 " + "=" * 48)
|
|
256
|
-
print(
|
|
257
|
-
f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
|
|
258
|
-
)
|
|
259
|
-
print("=" * 50 + "\n")
|
|
260
|
-
|
|
261
|
-
return StepResult(
|
|
262
|
-
success=result.success,
|
|
263
|
-
finished=finished,
|
|
264
|
-
action=action,
|
|
265
|
-
thinking=response.thinking,
|
|
266
|
-
message=result.message or action.get("message"),
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
@property
|
|
270
|
-
def context(self) -> list[dict[str, Any]]:
|
|
271
|
-
"""Get the current conversation context."""
|
|
272
|
-
return self._context.copy()
|
|
273
|
-
|
|
274
|
-
@property
|
|
275
|
-
def step_count(self) -> int:
|
|
276
|
-
"""Get the current step count."""
|
|
277
|
-
return self._step_count
|
phone_agent/config/__init__.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
"""Configuration module for Phone Agent."""
|
|
2
|
-
|
|
3
|
-
from phone_agent.config.apps import APP_PACKAGES
|
|
4
|
-
from phone_agent.config.apps_ios import APP_PACKAGES_IOS
|
|
5
|
-
from phone_agent.config.i18n import get_message, get_messages
|
|
6
|
-
from phone_agent.config.prompts_en import SYSTEM_PROMPT as SYSTEM_PROMPT_EN
|
|
7
|
-
from phone_agent.config.prompts_zh import SYSTEM_PROMPT as SYSTEM_PROMPT_ZH
|
|
8
|
-
from phone_agent.config.timing import (
|
|
9
|
-
TIMING_CONFIG,
|
|
10
|
-
ActionTimingConfig,
|
|
11
|
-
ConnectionTimingConfig,
|
|
12
|
-
DeviceTimingConfig,
|
|
13
|
-
TimingConfig,
|
|
14
|
-
get_timing_config,
|
|
15
|
-
update_timing_config,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def get_system_prompt(lang: str = "cn") -> str:
|
|
20
|
-
"""
|
|
21
|
-
Get system prompt by language.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
lang: Language code, 'cn' for Chinese, 'en' for English.
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
System prompt string.
|
|
28
|
-
"""
|
|
29
|
-
if lang == "en":
|
|
30
|
-
return SYSTEM_PROMPT_EN
|
|
31
|
-
return SYSTEM_PROMPT_ZH
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Default to Chinese for backward compatibility
|
|
35
|
-
SYSTEM_PROMPT = SYSTEM_PROMPT_ZH
|
|
36
|
-
|
|
37
|
-
__all__ = [
|
|
38
|
-
"APP_PACKAGES",
|
|
39
|
-
"APP_PACKAGES_IOS",
|
|
40
|
-
"SYSTEM_PROMPT",
|
|
41
|
-
"SYSTEM_PROMPT_ZH",
|
|
42
|
-
"SYSTEM_PROMPT_EN",
|
|
43
|
-
"get_system_prompt",
|
|
44
|
-
"get_messages",
|
|
45
|
-
"get_message",
|
|
46
|
-
"TIMING_CONFIG",
|
|
47
|
-
"TimingConfig",
|
|
48
|
-
"ActionTimingConfig",
|
|
49
|
-
"DeviceTimingConfig",
|
|
50
|
-
"ConnectionTimingConfig",
|
|
51
|
-
"get_timing_config",
|
|
52
|
-
"update_timing_config",
|
|
53
|
-
]
|