autoglm-gui 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -4
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. phone_agent/actions/handler_ios.py → AutoGLM_GUI/actions/handler.py +30 -112
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. {phone_agent → AutoGLM_GUI}/adb/__init__.py +25 -23
  7. {phone_agent → AutoGLM_GUI}/adb/connection.py +5 -40
  8. {phone_agent → AutoGLM_GUI}/adb/device.py +12 -94
  9. {phone_agent → AutoGLM_GUI}/adb/input.py +6 -47
  10. AutoGLM_GUI/adb/screenshot.py +11 -0
  11. {phone_agent/config → AutoGLM_GUI/adb}/timing.py +1 -1
  12. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  13. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  14. AutoGLM_GUI/adb_plus/serial.py +38 -20
  15. AutoGLM_GUI/adb_plus/touch.py +4 -9
  16. AutoGLM_GUI/agents/__init__.py +43 -12
  17. AutoGLM_GUI/agents/events.py +19 -0
  18. AutoGLM_GUI/agents/factory.py +31 -38
  19. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  20. AutoGLM_GUI/agents/glm/agent.py +297 -0
  21. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  22. AutoGLM_GUI/agents/glm/parser.py +110 -0
  23. {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_en.py +7 -9
  24. {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_zh.py +18 -25
  25. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  26. AutoGLM_GUI/agents/mai/agent.py +408 -0
  27. AutoGLM_GUI/agents/mai/parser.py +254 -0
  28. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  29. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  30. AutoGLM_GUI/agents/protocols.py +12 -8
  31. AutoGLM_GUI/agents/stream_runner.py +193 -0
  32. AutoGLM_GUI/api/__init__.py +40 -21
  33. AutoGLM_GUI/api/agents.py +181 -239
  34. AutoGLM_GUI/api/control.py +9 -6
  35. AutoGLM_GUI/api/devices.py +102 -12
  36. AutoGLM_GUI/api/history.py +104 -0
  37. AutoGLM_GUI/api/layered_agent.py +67 -15
  38. AutoGLM_GUI/api/media.py +64 -1
  39. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  40. AutoGLM_GUI/config.py +81 -0
  41. AutoGLM_GUI/config_manager.py +68 -51
  42. AutoGLM_GUI/device_manager.py +248 -29
  43. AutoGLM_GUI/device_protocol.py +1 -1
  44. AutoGLM_GUI/devices/adb_device.py +5 -10
  45. AutoGLM_GUI/devices/mock_device.py +4 -2
  46. AutoGLM_GUI/devices/remote_device.py +8 -3
  47. AutoGLM_GUI/history_manager.py +164 -0
  48. AutoGLM_GUI/model/__init__.py +5 -0
  49. AutoGLM_GUI/model/message_builder.py +69 -0
  50. AutoGLM_GUI/model/types.py +24 -0
  51. AutoGLM_GUI/models/__init__.py +10 -0
  52. AutoGLM_GUI/models/history.py +140 -0
  53. AutoGLM_GUI/models/scheduled_task.py +71 -0
  54. AutoGLM_GUI/parsers/__init__.py +22 -0
  55. AutoGLM_GUI/parsers/base.py +50 -0
  56. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  57. AutoGLM_GUI/phone_agent_manager.py +62 -396
  58. AutoGLM_GUI/platform_utils.py +26 -0
  59. AutoGLM_GUI/prompt_config.py +15 -0
  60. AutoGLM_GUI/prompts/__init__.py +32 -0
  61. AutoGLM_GUI/scheduler_manager.py +350 -0
  62. AutoGLM_GUI/schemas.py +246 -72
  63. AutoGLM_GUI/scrcpy_stream.py +142 -24
  64. AutoGLM_GUI/socketio_server.py +100 -27
  65. AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-CfwX1Cmc.js} +1 -1
  66. AutoGLM_GUI/static/assets/alert-dialog-CtGlN2IJ.js +1 -0
  67. AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
  68. AutoGLM_GUI/static/assets/circle-alert-t08bEMPO.js +1 -0
  69. AutoGLM_GUI/static/assets/dialog-FNwZJFwk.js +45 -0
  70. AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
  71. AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
  72. AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-BaLMSqd3.js} +1 -1
  73. AutoGLM_GUI/static/assets/index-CTHbFvKl.js +11 -0
  74. AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
  75. AutoGLM_GUI/static/assets/label-DJFevVmr.js +1 -0
  76. AutoGLM_GUI/static/assets/logs-RW09DyYY.js +1 -0
  77. AutoGLM_GUI/static/assets/popover--JTJrE5v.js +1 -0
  78. AutoGLM_GUI/static/assets/scheduled-tasks-DTRKsQXF.js +1 -0
  79. AutoGLM_GUI/static/assets/square-pen-CPK_K680.js +1 -0
  80. AutoGLM_GUI/static/assets/textarea-PRmVnWq5.js +1 -0
  81. AutoGLM_GUI/static/assets/workflows-CdcsAoaT.js +1 -0
  82. AutoGLM_GUI/static/index.html +2 -2
  83. AutoGLM_GUI/types.py +17 -0
  84. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +179 -130
  85. autoglm_gui-1.5.1.dist-info/RECORD +118 -0
  86. AutoGLM_GUI/agents/mai_adapter.py +0 -627
  87. AutoGLM_GUI/api/dual_model.py +0 -317
  88. AutoGLM_GUI/device_adapter.py +0 -263
  89. AutoGLM_GUI/dual_model/__init__.py +0 -53
  90. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  91. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  92. AutoGLM_GUI/dual_model/protocols.py +0 -354
  93. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  94. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  95. AutoGLM_GUI/phone_agent_patches.py +0 -147
  96. AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
  97. AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
  98. AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
  99. AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
  100. AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
  101. autoglm_gui-1.4.1.dist-info/RECORD +0 -117
  102. mai_agent/base.py +0 -137
  103. mai_agent/mai_grounding_agent.py +0 -263
  104. mai_agent/mai_naivigation_agent.py +0 -526
  105. mai_agent/prompt.py +0 -148
  106. mai_agent/unified_memory.py +0 -67
  107. mai_agent/utils.py +0 -73
  108. phone_agent/__init__.py +0 -12
  109. phone_agent/actions/__init__.py +0 -5
  110. phone_agent/actions/handler.py +0 -400
  111. phone_agent/adb/screenshot.py +0 -108
  112. phone_agent/agent.py +0 -253
  113. phone_agent/agent_ios.py +0 -277
  114. phone_agent/config/__init__.py +0 -53
  115. phone_agent/config/apps_harmonyos.py +0 -256
  116. phone_agent/config/apps_ios.py +0 -339
  117. phone_agent/config/prompts.py +0 -80
  118. phone_agent/device_factory.py +0 -166
  119. phone_agent/hdc/__init__.py +0 -53
  120. phone_agent/hdc/connection.py +0 -384
  121. phone_agent/hdc/device.py +0 -269
  122. phone_agent/hdc/input.py +0 -145
  123. phone_agent/hdc/screenshot.py +0 -127
  124. phone_agent/model/__init__.py +0 -5
  125. phone_agent/model/client.py +0 -290
  126. phone_agent/xctest/__init__.py +0 -47
  127. phone_agent/xctest/connection.py +0 -379
  128. phone_agent/xctest/device.py +0 -472
  129. phone_agent/xctest/input.py +0 -311
  130. phone_agent/xctest/screenshot.py +0 -226
  131. {phone_agent/config → AutoGLM_GUI/adb}/apps.py +0 -0
  132. {phone_agent/config → AutoGLM_GUI}/i18n.py +0 -0
  133. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
  134. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
  135. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
phone_agent/agent.py DELETED
@@ -1,253 +0,0 @@
1
- """Main PhoneAgent class for orchestrating phone automation."""
2
-
3
- import json
4
- import traceback
5
- from dataclasses import dataclass
6
- from typing import Any, Callable
7
-
8
- from phone_agent.actions import ActionHandler
9
- from phone_agent.actions.handler import finish, parse_action
10
- from phone_agent.config import get_messages, get_system_prompt
11
- from phone_agent.device_factory import get_device_factory
12
- from phone_agent.model import ModelClient, ModelConfig
13
- from phone_agent.model.client import MessageBuilder
14
-
15
-
16
- @dataclass
17
- class AgentConfig:
18
- """Configuration for the PhoneAgent."""
19
-
20
- max_steps: int = 100
21
- device_id: str | None = None
22
- lang: str = "cn"
23
- system_prompt: str | None = None
24
- verbose: bool = True
25
-
26
- def __post_init__(self):
27
- if self.system_prompt is None:
28
- self.system_prompt = get_system_prompt(self.lang)
29
-
30
-
31
- @dataclass
32
- class StepResult:
33
- """Result of a single agent step."""
34
-
35
- success: bool
36
- finished: bool
37
- action: dict[str, Any] | None
38
- thinking: str
39
- message: str | None = None
40
-
41
-
42
- class PhoneAgent:
43
- """
44
- AI-powered agent for automating Android phone interactions.
45
-
46
- The agent uses a vision-language model to understand screen content
47
- and decide on actions to complete user tasks.
48
-
49
- Args:
50
- model_config: Configuration for the AI model.
51
- agent_config: Configuration for the agent behavior.
52
- confirmation_callback: Optional callback for sensitive action confirmation.
53
- takeover_callback: Optional callback for takeover requests.
54
-
55
- Example:
56
- >>> from phone_agent import PhoneAgent
57
- >>> from phone_agent.model import ModelConfig
58
- >>>
59
- >>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
60
- >>> agent = PhoneAgent(model_config)
61
- >>> agent.run("Open WeChat and send a message to John")
62
- """
63
-
64
- def __init__(
65
- self,
66
- model_config: ModelConfig | None = None,
67
- agent_config: AgentConfig | None = None,
68
- confirmation_callback: Callable[[str], bool] | None = None,
69
- takeover_callback: Callable[[str], None] | None = None,
70
- ):
71
- self.model_config = model_config or ModelConfig()
72
- self.agent_config = agent_config or AgentConfig()
73
-
74
- self.model_client = ModelClient(self.model_config)
75
- self.action_handler = ActionHandler(
76
- device_id=self.agent_config.device_id,
77
- confirmation_callback=confirmation_callback,
78
- takeover_callback=takeover_callback,
79
- )
80
-
81
- self._context: list[dict[str, Any]] = []
82
- self._step_count = 0
83
-
84
- def run(self, task: str) -> str:
85
- """
86
- Run the agent to complete a task.
87
-
88
- Args:
89
- task: Natural language description of the task.
90
-
91
- Returns:
92
- Final message from the agent.
93
- """
94
- self._context = []
95
- self._step_count = 0
96
-
97
- # First step with user prompt
98
- result = self._execute_step(task, is_first=True)
99
-
100
- if result.finished:
101
- return result.message or "Task completed"
102
-
103
- # Continue until finished or max steps reached
104
- while self._step_count < self.agent_config.max_steps:
105
- result = self._execute_step(is_first=False)
106
-
107
- if result.finished:
108
- return result.message or "Task completed"
109
-
110
- return "Max steps reached"
111
-
112
- def step(self, task: str | None = None) -> StepResult:
113
- """
114
- Execute a single step of the agent.
115
-
116
- Useful for manual control or debugging.
117
-
118
- Args:
119
- task: Task description (only needed for first step).
120
-
121
- Returns:
122
- StepResult with step details.
123
- """
124
- is_first = len(self._context) == 0
125
-
126
- if is_first and not task:
127
- raise ValueError("Task is required for the first step")
128
-
129
- return self._execute_step(task, is_first)
130
-
131
- def reset(self) -> None:
132
- """Reset the agent state for a new task."""
133
- self._context = []
134
- self._step_count = 0
135
-
136
- def _execute_step(
137
- self, user_prompt: str | None = None, is_first: bool = False
138
- ) -> StepResult:
139
- """Execute a single step of the agent loop."""
140
- self._step_count += 1
141
-
142
- # Capture current screen state
143
- device_factory = get_device_factory()
144
- screenshot = device_factory.get_screenshot(self.agent_config.device_id)
145
- current_app = device_factory.get_current_app(self.agent_config.device_id)
146
-
147
- # Build messages
148
- if is_first:
149
- self._context.append(
150
- MessageBuilder.create_system_message(self.agent_config.system_prompt)
151
- )
152
-
153
- screen_info = MessageBuilder.build_screen_info(current_app)
154
- text_content = f"{user_prompt}\n\n{screen_info}"
155
-
156
- self._context.append(
157
- MessageBuilder.create_user_message(
158
- text=text_content, image_base64=screenshot.base64_data
159
- )
160
- )
161
- else:
162
- screen_info = MessageBuilder.build_screen_info(current_app)
163
- text_content = f"** Screen Info **\n\n{screen_info}"
164
-
165
- self._context.append(
166
- MessageBuilder.create_user_message(
167
- text=text_content, image_base64=screenshot.base64_data
168
- )
169
- )
170
-
171
- # Get model response
172
- try:
173
- msgs = get_messages(self.agent_config.lang)
174
- print("\n" + "=" * 50)
175
- print(f"💭 {msgs['thinking']}:")
176
- print("-" * 50)
177
- response = self.model_client.request(self._context)
178
- except Exception as e:
179
- if self.agent_config.verbose:
180
- traceback.print_exc()
181
- return StepResult(
182
- success=False,
183
- finished=True,
184
- action=None,
185
- thinking="",
186
- message=f"Model error: {e}",
187
- )
188
-
189
- # Parse action from response
190
- try:
191
- action = parse_action(response.action)
192
- except ValueError:
193
- if self.agent_config.verbose:
194
- traceback.print_exc()
195
- action = finish(message=response.action)
196
-
197
- if self.agent_config.verbose:
198
- # Print thinking process
199
- print("-" * 50)
200
- print(f"🎯 {msgs['action']}:")
201
- print(json.dumps(action, ensure_ascii=False, indent=2))
202
- print("=" * 50 + "\n")
203
-
204
- # Remove image from context to save space
205
- self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
206
-
207
- # Execute action
208
- try:
209
- result = self.action_handler.execute(
210
- action, screenshot.width, screenshot.height
211
- )
212
- except Exception as e:
213
- if self.agent_config.verbose:
214
- traceback.print_exc()
215
- result = self.action_handler.execute(
216
- finish(message=str(e)), screenshot.width, screenshot.height
217
- )
218
-
219
- # Add assistant response to context
220
- self._context.append(
221
- MessageBuilder.create_assistant_message(
222
- f"<think>{response.thinking}</think><answer>{response.action}</answer>"
223
- )
224
- )
225
-
226
- # Check if finished
227
- finished = action.get("_metadata") == "finish" or result.should_finish
228
-
229
- if finished and self.agent_config.verbose:
230
- msgs = get_messages(self.agent_config.lang)
231
- print("\n" + "🎉 " + "=" * 48)
232
- print(
233
- f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
234
- )
235
- print("=" * 50 + "\n")
236
-
237
- return StepResult(
238
- success=result.success,
239
- finished=finished,
240
- action=action,
241
- thinking=response.thinking,
242
- message=result.message or action.get("message"),
243
- )
244
-
245
- @property
246
- def context(self) -> list[dict[str, Any]]:
247
- """Get the current conversation context."""
248
- return self._context.copy()
249
-
250
- @property
251
- def step_count(self) -> int:
252
- """Get the current step count."""
253
- return self._step_count
phone_agent/agent_ios.py DELETED
@@ -1,277 +0,0 @@
1
- """iOS PhoneAgent class for orchestrating iOS phone automation."""
2
-
3
- import json
4
- import traceback
5
- from dataclasses import dataclass
6
- from typing import Any, Callable
7
-
8
- from phone_agent.actions.handler import finish, parse_action
9
- from phone_agent.actions.handler_ios import IOSActionHandler
10
- from phone_agent.config import get_messages, get_system_prompt
11
- from phone_agent.model import ModelClient, ModelConfig
12
- from phone_agent.model.client import MessageBuilder
13
- from phone_agent.xctest import XCTestConnection, get_current_app, get_screenshot
14
-
15
-
16
- @dataclass
17
- class IOSAgentConfig:
18
- """Configuration for the iOS PhoneAgent."""
19
-
20
- max_steps: int = 100
21
- wda_url: str = "http://localhost:8100"
22
- session_id: str | None = None
23
- device_id: str | None = None # iOS device UDID
24
- lang: str = "cn"
25
- system_prompt: str | None = None
26
- verbose: bool = True
27
-
28
- def __post_init__(self):
29
- if self.system_prompt is None:
30
- self.system_prompt = get_system_prompt(self.lang)
31
-
32
-
33
- @dataclass
34
- class StepResult:
35
- """Result of a single agent step."""
36
-
37
- success: bool
38
- finished: bool
39
- action: dict[str, Any] | None
40
- thinking: str
41
- message: str | None = None
42
-
43
-
44
- class IOSPhoneAgent:
45
- """
46
- AI-powered agent for automating iOS phone interactions.
47
-
48
- The agent uses a vision-language model to understand screen content
49
- and decide on actions to complete user tasks via WebDriverAgent.
50
-
51
- Args:
52
- model_config: Configuration for the AI model.
53
- agent_config: Configuration for the iOS agent behavior.
54
- confirmation_callback: Optional callback for sensitive action confirmation.
55
- takeover_callback: Optional callback for takeover requests.
56
-
57
- Example:
58
- >>> from phone_agent.agent_ios import IOSPhoneAgent, IOSAgentConfig
59
- >>> from phone_agent.model import ModelConfig
60
- >>>
61
- >>> model_config = ModelConfig(base_url="http://localhost:8000/v1")
62
- >>> agent_config = IOSAgentConfig(wda_url="http://localhost:8100")
63
- >>> agent = IOSPhoneAgent(model_config, agent_config)
64
- >>> agent.run("Open Safari and search for Apple")
65
- """
66
-
67
- def __init__(
68
- self,
69
- model_config: ModelConfig | None = None,
70
- agent_config: IOSAgentConfig | None = None,
71
- confirmation_callback: Callable[[str], bool] | None = None,
72
- takeover_callback: Callable[[str], None] | None = None,
73
- ):
74
- self.model_config = model_config or ModelConfig()
75
- self.agent_config = agent_config or IOSAgentConfig()
76
-
77
- self.model_client = ModelClient(self.model_config)
78
-
79
- # Initialize WDA connection and create session if needed
80
- self.wda_connection = XCTestConnection(wda_url=self.agent_config.wda_url)
81
-
82
- # Auto-create session if not provided
83
- if self.agent_config.session_id is None:
84
- success, session_id = self.wda_connection.start_wda_session()
85
- if success and session_id != "session_started":
86
- self.agent_config.session_id = session_id
87
- if self.agent_config.verbose:
88
- print(f"✅ Created WDA session: {session_id}")
89
- elif self.agent_config.verbose:
90
- print("⚠️ Using default WDA session (no explicit session ID)")
91
-
92
- self.action_handler = IOSActionHandler(
93
- wda_url=self.agent_config.wda_url,
94
- session_id=self.agent_config.session_id,
95
- confirmation_callback=confirmation_callback,
96
- takeover_callback=takeover_callback,
97
- )
98
-
99
- self._context: list[dict[str, Any]] = []
100
- self._step_count = 0
101
-
102
- def run(self, task: str) -> str:
103
- """
104
- Run the agent to complete a task.
105
-
106
- Args:
107
- task: Natural language description of the task.
108
-
109
- Returns:
110
- Final message from the agent.
111
- """
112
- self._context = []
113
- self._step_count = 0
114
-
115
- # First step with user prompt
116
- result = self._execute_step(task, is_first=True)
117
-
118
- if result.finished:
119
- return result.message or "Task completed"
120
-
121
- # Continue until finished or max steps reached
122
- while self._step_count < self.agent_config.max_steps:
123
- result = self._execute_step(is_first=False)
124
-
125
- if result.finished:
126
- return result.message or "Task completed"
127
-
128
- return "Max steps reached"
129
-
130
- def step(self, task: str | None = None) -> StepResult:
131
- """
132
- Execute a single step of the agent.
133
-
134
- Useful for manual control or debugging.
135
-
136
- Args:
137
- task: Task description (only needed for first step).
138
-
139
- Returns:
140
- StepResult with step details.
141
- """
142
- is_first = len(self._context) == 0
143
-
144
- if is_first and not task:
145
- raise ValueError("Task is required for the first step")
146
-
147
- return self._execute_step(task, is_first)
148
-
149
- def reset(self) -> None:
150
- """Reset the agent state for a new task."""
151
- self._context = []
152
- self._step_count = 0
153
-
154
- def _execute_step(
155
- self, user_prompt: str | None = None, is_first: bool = False
156
- ) -> StepResult:
157
- """Execute a single step of the agent loop."""
158
- self._step_count += 1
159
-
160
- # Capture current screen state
161
- screenshot = get_screenshot(
162
- wda_url=self.agent_config.wda_url,
163
- session_id=self.agent_config.session_id,
164
- device_id=self.agent_config.device_id,
165
- )
166
- current_app = get_current_app(
167
- wda_url=self.agent_config.wda_url, session_id=self.agent_config.session_id
168
- )
169
-
170
- # Build messages
171
- if is_first:
172
- self._context.append(
173
- MessageBuilder.create_system_message(self.agent_config.system_prompt)
174
- )
175
-
176
- screen_info = MessageBuilder.build_screen_info(current_app)
177
- text_content = f"{user_prompt}\n\n{screen_info}"
178
-
179
- self._context.append(
180
- MessageBuilder.create_user_message(
181
- text=text_content, image_base64=screenshot.base64_data
182
- )
183
- )
184
- else:
185
- screen_info = MessageBuilder.build_screen_info(current_app)
186
- text_content = f"** Screen Info **\n\n{screen_info}"
187
-
188
- self._context.append(
189
- MessageBuilder.create_user_message(
190
- text=text_content, image_base64=screenshot.base64_data
191
- )
192
- )
193
-
194
- # Get model response
195
- try:
196
- response = self.model_client.request(self._context)
197
- except Exception as e:
198
- if self.agent_config.verbose:
199
- traceback.print_exc()
200
- return StepResult(
201
- success=False,
202
- finished=True,
203
- action=None,
204
- thinking="",
205
- message=f"Model error: {e}",
206
- )
207
-
208
- # Parse action from response
209
- try:
210
- action = parse_action(response.action)
211
- except ValueError:
212
- if self.agent_config.verbose:
213
- traceback.print_exc()
214
- action = finish(message=response.action)
215
-
216
- if self.agent_config.verbose:
217
- # Print thinking process
218
- msgs = get_messages(self.agent_config.lang)
219
- print("\n" + "=" * 50)
220
- print(f"💭 {msgs['thinking']}:")
221
- print("-" * 50)
222
- print(response.thinking)
223
- print("-" * 50)
224
- print(f"🎯 {msgs['action']}:")
225
- print(json.dumps(action, ensure_ascii=False, indent=2))
226
- print("=" * 50 + "\n")
227
-
228
- # Remove image from context to save space
229
- self._context[-1] = MessageBuilder.remove_images_from_message(self._context[-1])
230
-
231
- # Execute action
232
- try:
233
- result = self.action_handler.execute(
234
- action, screenshot.width, screenshot.height
235
- )
236
- except Exception as e:
237
- if self.agent_config.verbose:
238
- traceback.print_exc()
239
- result = self.action_handler.execute(
240
- finish(message=str(e)), screenshot.width, screenshot.height
241
- )
242
-
243
- # Add assistant response to context
244
- self._context.append(
245
- MessageBuilder.create_assistant_message(
246
- f"<think>{response.thinking}</think><answer>{response.action}</answer>"
247
- )
248
- )
249
-
250
- # Check if finished
251
- finished = action.get("_metadata") == "finish" or result.should_finish
252
-
253
- if finished and self.agent_config.verbose:
254
- msgs = get_messages(self.agent_config.lang)
255
- print("\n" + "🎉 " + "=" * 48)
256
- print(
257
- f"✅ {msgs['task_completed']}: {result.message or action.get('message', msgs['done'])}"
258
- )
259
- print("=" * 50 + "\n")
260
-
261
- return StepResult(
262
- success=result.success,
263
- finished=finished,
264
- action=action,
265
- thinking=response.thinking,
266
- message=result.message or action.get("message"),
267
- )
268
-
269
- @property
270
- def context(self) -> list[dict[str, Any]]:
271
- """Get the current conversation context."""
272
- return self._context.copy()
273
-
274
- @property
275
- def step_count(self) -> int:
276
- """Get the current step count."""
277
- return self._step_count
@@ -1,53 +0,0 @@
1
- """Configuration module for Phone Agent."""
2
-
3
- from phone_agent.config.apps import APP_PACKAGES
4
- from phone_agent.config.apps_ios import APP_PACKAGES_IOS
5
- from phone_agent.config.i18n import get_message, get_messages
6
- from phone_agent.config.prompts_en import SYSTEM_PROMPT as SYSTEM_PROMPT_EN
7
- from phone_agent.config.prompts_zh import SYSTEM_PROMPT as SYSTEM_PROMPT_ZH
8
- from phone_agent.config.timing import (
9
- TIMING_CONFIG,
10
- ActionTimingConfig,
11
- ConnectionTimingConfig,
12
- DeviceTimingConfig,
13
- TimingConfig,
14
- get_timing_config,
15
- update_timing_config,
16
- )
17
-
18
-
19
- def get_system_prompt(lang: str = "cn") -> str:
20
- """
21
- Get system prompt by language.
22
-
23
- Args:
24
- lang: Language code, 'cn' for Chinese, 'en' for English.
25
-
26
- Returns:
27
- System prompt string.
28
- """
29
- if lang == "en":
30
- return SYSTEM_PROMPT_EN
31
- return SYSTEM_PROMPT_ZH
32
-
33
-
34
- # Default to Chinese for backward compatibility
35
- SYSTEM_PROMPT = SYSTEM_PROMPT_ZH
36
-
37
- __all__ = [
38
- "APP_PACKAGES",
39
- "APP_PACKAGES_IOS",
40
- "SYSTEM_PROMPT",
41
- "SYSTEM_PROMPT_ZH",
42
- "SYSTEM_PROMPT_EN",
43
- "get_system_prompt",
44
- "get_messages",
45
- "get_message",
46
- "TIMING_CONFIG",
47
- "TimingConfig",
48
- "ActionTimingConfig",
49
- "DeviceTimingConfig",
50
- "ConnectionTimingConfig",
51
- "get_timing_config",
52
- "update_timing_config",
53
- ]