autoglm-gui 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,13 +54,10 @@ class ConfigModel(BaseModel):
54
54
 
55
55
  # 双模型配置
56
56
  dual_model_enabled: bool = False
57
- decision_base_url: str = "https://api-inference.modelscope.cn/v1"
58
- decision_model_name: str = "ZhipuAI/GLM-4.7"
57
+ decision_base_url: str = ""
58
+ decision_model_name: str = ""
59
59
  decision_api_key: str = ""
60
60
 
61
- # 思考模式配置
62
- thinking_mode: str = "deep" # "fast" 或 "deep"
63
-
64
61
  @field_validator("base_url")
65
62
  @classmethod
66
63
  def validate_base_url(cls, v: str) -> str:
@@ -85,14 +82,6 @@ class ConfigModel(BaseModel):
85
82
  raise ValueError("decision_base_url must start with http:// or https://")
86
83
  return v.rstrip("/") # 去除尾部斜杠
87
84
 
88
- @field_validator("thinking_mode")
89
- @classmethod
90
- def validate_thinking_mode(cls, v: str) -> str:
91
- """验证思考模式."""
92
- if v not in ("fast", "deep"):
93
- raise ValueError("thinking_mode must be 'fast' or 'deep'")
94
- return v
95
-
96
85
 
97
86
  # ==================== 配置层数据类 ====================
98
87
 
@@ -109,8 +98,6 @@ class ConfigLayer:
109
98
  decision_base_url: Optional[str] = None
110
99
  decision_model_name: Optional[str] = None
111
100
  decision_api_key: Optional[str] = None
112
- # 思考模式配置
113
- thinking_mode: Optional[str] = None
114
101
 
115
102
  source: ConfigSource = ConfigSource.DEFAULT
116
103
 
@@ -142,7 +129,6 @@ class ConfigLayer:
142
129
  "decision_base_url": self.decision_base_url,
143
130
  "decision_model_name": self.decision_model_name,
144
131
  "decision_api_key": self.decision_api_key,
145
- "thinking_mode": self.thinking_mode,
146
132
  }.items()
147
133
  if v is not None
148
134
  }
@@ -314,7 +300,6 @@ class UnifiedConfigManager:
314
300
  decision_base_url=config_data.get("decision_base_url"),
315
301
  decision_model_name=config_data.get("decision_model_name"),
316
302
  decision_api_key=config_data.get("decision_api_key"),
317
- thinking_mode=config_data.get("thinking_mode"),
318
303
  source=ConfigSource.FILE,
319
304
  )
320
305
  self._effective_config = None # 清除缓存
@@ -346,7 +331,6 @@ class UnifiedConfigManager:
346
331
  decision_base_url: Optional[str] = None,
347
332
  decision_model_name: Optional[str] = None,
348
333
  decision_api_key: Optional[str] = None,
349
- thinking_mode: Optional[str] = None,
350
334
  merge_mode: bool = True,
351
335
  ) -> bool:
352
336
  """
@@ -360,7 +344,6 @@ class UnifiedConfigManager:
360
344
  decision_base_url: 决策模型 Base URL
361
345
  decision_model_name: 决策模型名称
362
346
  decision_api_key: 决策模型 API key
363
- thinking_mode: 思考模式 (fast/deep)
364
347
  merge_mode: 是否合并现有配置(True: 保留未提供的字段)
365
348
 
366
349
  Returns:
@@ -386,8 +369,6 @@ class UnifiedConfigManager:
386
369
  new_config["decision_model_name"] = decision_model_name
387
370
  if decision_api_key:
388
371
  new_config["decision_api_key"] = decision_api_key
389
- if thinking_mode:
390
- new_config["thinking_mode"] = thinking_mode
391
372
 
392
373
  # 合并模式:保留现有文件中未提供的字段
393
374
  if merge_mode and self._config_path.exists():
@@ -402,7 +383,6 @@ class UnifiedConfigManager:
402
383
  "decision_base_url",
403
384
  "decision_model_name",
404
385
  "decision_api_key",
405
- "thinking_mode",
406
386
  ]
407
387
  for key in preserve_keys:
408
388
  if key not in new_config and key in existing:
@@ -491,7 +471,6 @@ class UnifiedConfigManager:
491
471
  "decision_base_url",
492
472
  "decision_model_name",
493
473
  "decision_api_key",
494
- "thinking_mode",
495
474
  ]
496
475
 
497
476
  for key in config_keys:
@@ -658,7 +637,6 @@ class UnifiedConfigManager:
658
637
  "decision_base_url": config.decision_base_url,
659
638
  "decision_model_name": config.decision_model_name,
660
639
  "decision_api_key": config.decision_api_key,
661
- "thinking_mode": config.thinking_mode,
662
640
  }
663
641
 
664
642
 
@@ -21,9 +21,9 @@ class ThinkingMode(str, Enum):
21
21
  class DecisionModelConfig(BaseModel):
22
22
  """决策大模型配置"""
23
23
 
24
- base_url: str = "https://api-inference.modelscope.cn/v1"
24
+ base_url: str
25
25
  api_key: str = ""
26
- model_name: str = "ZhipuAI/GLM-4.7"
26
+ model_name: str
27
27
  max_tokens: int = 4096
28
28
  temperature: float = 0.7
29
29
  thinking_mode: ThinkingMode = ThinkingMode.DEEP
@@ -33,7 +33,7 @@ class DualModelConfig(BaseModel):
33
33
  """双模型协作配置"""
34
34
 
35
35
  enabled: bool = False
36
- decision_model: DecisionModelConfig = DecisionModelConfig()
36
+ decision_model: Optional[DecisionModelConfig] = None
37
37
 
38
38
 
39
39
  class ModelRole(str, Enum):
@@ -0,0 +1,291 @@
1
+ """MAI-UI PhoneAgent wrapper for compatibility with AutoGLM-GUI interface."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Callable, Optional
5
+
6
+ from phone_agent.agent import AgentConfig, StepResult
7
+ from phone_agent.actions.handler import ActionHandler
8
+ from phone_agent.model import ModelConfig
9
+
10
+ from AutoGLM_GUI.logger import logger
11
+ from AutoGLM_GUI.mai_ui.mai_navigation_agent import MAIUINaivigationAgent
12
+ from AutoGLM_GUI.mai_ui_adapter.action_adapter import MAIUIActionAdapter
13
+
14
+
15
+ @dataclass
16
+ class MAIUIConfig:
17
+ """MAI-UI specific configuration."""
18
+
19
+ history_n: int = 3
20
+ temperature: float = 0.0
21
+ top_k: int = -1
22
+ top_p: float = 1.0
23
+ max_tokens: int = 2048
24
+
25
+
26
+ class MAIUIPhoneAgent:
27
+ """
28
+ MAI-UI Agent wrapper that implements the PhoneAgent interface.
29
+
30
+ This wrapper allows MAI-UI agents to be used transparently in place of
31
+ the standard PhoneAgent, providing compatibility with the existing
32
+ PhoneAgentManager and API infrastructure.
33
+
34
+ Usage:
35
+ agent = MAIUIPhoneAgent(
36
+ model_config=model_config,
37
+ agent_config=agent_config,
38
+ )
39
+ result = agent.run("Open WeChat")
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ model_config: ModelConfig,
45
+ agent_config: AgentConfig,
46
+ mai_config: Optional[MAIUIConfig] = None,
47
+ takeover_callback: Optional[Callable[[str], None]] = None,
48
+ ):
49
+ """
50
+ Initialize MAI-UI PhoneAgent wrapper.
51
+
52
+ Args:
53
+ model_config: Model configuration (base_url, api_key, model_name).
54
+ agent_config: Agent configuration (device_id, max_steps, etc.).
55
+ mai_config: MAI-UI specific configuration.
56
+ takeover_callback: Callback for takeover requests.
57
+ """
58
+ self.model_config = model_config
59
+ self.agent_config = agent_config
60
+ self.mai_config = mai_config or MAIUIConfig()
61
+
62
+ # Create MAI-UI navigation agent
63
+ self._mai_agent = MAIUINaivigationAgent(
64
+ llm_base_url=model_config.base_url,
65
+ model_name=model_config.model_name,
66
+ api_key=model_config.api_key,
67
+ runtime_conf={
68
+ "history_n": self.mai_config.history_n,
69
+ "temperature": self.mai_config.temperature,
70
+ "top_k": self.mai_config.top_k,
71
+ "top_p": self.mai_config.top_p,
72
+ "max_tokens": self.mai_config.max_tokens,
73
+ },
74
+ )
75
+
76
+ # Action adapter and handler
77
+ self._action_adapter = MAIUIActionAdapter()
78
+ self.action_handler = ActionHandler(
79
+ device_id=agent_config.device_id,
80
+ takeover_callback=takeover_callback,
81
+ )
82
+
83
+ # PhoneAgent-compatible state
84
+ self._context: list[dict[str, Any]] = []
85
+ self._step_count = 0
86
+ self._current_task: str = ""
87
+
88
+ # For model_client compatibility (used by streaming patches)
89
+ self.model_client = _DummyModelClient()
90
+
91
+ # Debug: Print model configuration for troubleshooting
92
+ logger.info("=" * 60)
93
+ logger.info("[MAI-UI Agent] Initialization")
94
+ logger.info(f" Device ID: {agent_config.device_id}")
95
+ logger.info(f" Base URL: {model_config.base_url}")
96
+ logger.info(f" Model: {model_config.model_name}")
97
+ logger.info("=" * 60)
98
+
99
+ def run(self, task: str) -> str:
100
+ """
101
+ Execute a complete task.
102
+
103
+ Args:
104
+ task: Natural language task description.
105
+
106
+ Returns:
107
+ Final message from the agent.
108
+ """
109
+ self.reset()
110
+ self._current_task = task
111
+
112
+ # First step
113
+ result = self._execute_step(task, is_first=True)
114
+
115
+ if result.finished:
116
+ return result.message or "Task completed"
117
+
118
+ # Continue until finished or max steps reached
119
+ while self._step_count < self.agent_config.max_steps:
120
+ result = self._execute_step(is_first=False)
121
+
122
+ if result.finished:
123
+ return result.message or "Task completed"
124
+
125
+ return "Max steps reached"
126
+
127
+ def step(self, task: Optional[str] = None) -> StepResult:
128
+ """
129
+ Execute a single step.
130
+
131
+ Args:
132
+ task: Task description (required for first step).
133
+
134
+ Returns:
135
+ StepResult with step details.
136
+ """
137
+ is_first = len(self._context) == 0
138
+
139
+ if is_first:
140
+ if not task:
141
+ raise ValueError("Task is required for the first step")
142
+ self._current_task = task
143
+
144
+ return self._execute_step(task, is_first)
145
+
146
+ def _execute_step(
147
+ self, user_prompt: Optional[str] = None, is_first: bool = False
148
+ ) -> StepResult:
149
+ """Execute a single step of the agent loop."""
150
+ from phone_agent.device_factory import get_device_factory
151
+ from PIL import Image
152
+ from io import BytesIO
153
+
154
+ self._step_count += 1
155
+ logger.info(f"[MAI-UI] Executing step {self._step_count}")
156
+
157
+ # Get screenshot
158
+ device_factory = get_device_factory()
159
+ screenshot = device_factory.get_screenshot(self.agent_config.device_id)
160
+
161
+ # Convert base64 to PIL Image
162
+ import base64
163
+
164
+ image_bytes = base64.b64decode(screenshot.base64_data)
165
+ pil_image = Image.open(BytesIO(image_bytes))
166
+
167
+ # Build observation
168
+ obs = {
169
+ "screenshot": pil_image,
170
+ "accessibility_tree": None,
171
+ }
172
+
173
+ # Get instruction
174
+ instruction = user_prompt or self._current_task
175
+
176
+ # Call MAI-UI predict
177
+ try:
178
+ raw_response, action_json = self._mai_agent.predict(
179
+ instruction=instruction,
180
+ obs=obs,
181
+ )
182
+ except Exception as e:
183
+ logger.error(f"[MAI-UI] Predict failed: {e}")
184
+ return StepResult(
185
+ success=False,
186
+ finished=True,
187
+ action=None,
188
+ thinking="",
189
+ message=f"Prediction failed: {e}",
190
+ )
191
+
192
+ # Check for error
193
+ if action_json.get("action") is None:
194
+ logger.error("[MAI-UI] Invalid action returned")
195
+ return StepResult(
196
+ success=False,
197
+ finished=True,
198
+ action=None,
199
+ thinking="",
200
+ message="Invalid action from model",
201
+ )
202
+
203
+ # Get thinking from trajectory
204
+ thinking = ""
205
+ if self._mai_agent.traj_memory.steps:
206
+ last_step = self._mai_agent.traj_memory.steps[-1]
207
+ thinking = last_step.thought or ""
208
+
209
+ # Convert action to AutoGLM-GUI format
210
+ converted_action = self._action_adapter.convert(action_json)
211
+ logger.debug(f"[MAI-UI] Converted action: {converted_action}")
212
+
213
+ # Check if finished (terminate action)
214
+ if converted_action.get("_metadata") == "finish":
215
+ return StepResult(
216
+ success=True,
217
+ finished=True,
218
+ action=converted_action,
219
+ thinking=thinking,
220
+ message=converted_action.get("message", "Task completed"),
221
+ )
222
+
223
+ # Execute action
224
+ try:
225
+ result = self.action_handler.execute(
226
+ converted_action,
227
+ screenshot.width,
228
+ screenshot.height,
229
+ )
230
+ except Exception as e:
231
+ logger.error(f"[MAI-UI] Action execution failed: {e}")
232
+ return StepResult(
233
+ success=False,
234
+ finished=False,
235
+ action=converted_action,
236
+ thinking=thinking,
237
+ message=f"Action failed: {e}",
238
+ )
239
+
240
+ # Update context for compatibility
241
+ self._context.append(
242
+ {
243
+ "step": self._step_count,
244
+ "action": action_json,
245
+ "converted_action": converted_action,
246
+ "result": result.success,
247
+ "thinking": thinking,
248
+ }
249
+ )
250
+
251
+ return StepResult(
252
+ success=result.success,
253
+ finished=result.should_finish,
254
+ action=converted_action,
255
+ thinking=thinking,
256
+ message=result.message,
257
+ )
258
+
259
+ def reset(self) -> None:
260
+ """Reset agent state for a new task."""
261
+ self._context = []
262
+ self._step_count = 0
263
+ self._current_task = ""
264
+ self._mai_agent.reset()
265
+ logger.debug("[MAI-UI] Agent reset")
266
+
267
+ @property
268
+ def step_count(self) -> int:
269
+ """Get current step count."""
270
+ return self._step_count
271
+
272
+ @property
273
+ def context(self) -> list[dict[str, Any]]:
274
+ """Get conversation context (for compatibility)."""
275
+ return self._context.copy()
276
+
277
+
278
+ class _DummyModelClient:
279
+ """
280
+ Dummy model client for compatibility with streaming patches.
281
+
282
+ The actual model calls are handled by MAI-UI agent internally.
283
+ This exists to satisfy code that expects model_client attribute.
284
+ """
285
+
286
+ def request(self, messages: list, **kwargs) -> Any:
287
+ """Dummy request method - should not be called directly."""
288
+ raise NotImplementedError(
289
+ "MAIUIPhoneAgent handles model calls internally. "
290
+ "Do not call model_client.request() directly."
291
+ )
AutoGLM_GUI/prompts.py CHANGED
@@ -11,7 +11,12 @@ from datetime import datetime
11
11
  today = datetime.today()
12
12
  weekday_names = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
13
13
  weekday = weekday_names[today.weekday()]
14
- formatted_date = today.strftime("%Y年%m月%d日") + " " + weekday
14
+ # NOTE: Do NOT use strftime with Chinese characters in format string!
15
+ # On some Windows systems with non-UTF-8 locale (e.g., GBK/CP936),
16
+ # strftime("%Y年%m月%d日") raises UnicodeEncodeError because the C library's
17
+ # strftime uses locale encoding, not Python's UTF-8 mode.
18
+ # Use f-string instead to avoid this issue completely.
19
+ formatted_date = f"{today.year}年{today.month:02d}月{today.day:02d}日 {weekday}"
15
20
 
16
21
  MCP_SYSTEM_PROMPT_ZH = f"""
17
22
  # Context
AutoGLM_GUI/schemas.py CHANGED
@@ -326,9 +326,6 @@ class ConfigResponse(BaseModel):
326
326
  decision_model_name: str = ""
327
327
  decision_api_key: str = ""
328
328
 
329
- # 思考模式
330
- thinking_mode: str = "deep" # "fast" | "deep"
331
-
332
329
  conflicts: list[dict] | None = None # 配置冲突信息(可选)
333
330
 
334
331
 
@@ -345,9 +342,6 @@ class ConfigSaveRequest(BaseModel):
345
342
  decision_model_name: str | None = None
346
343
  decision_api_key: str | None = None
347
344
 
348
- # 思考模式
349
- thinking_mode: str | None = None # "fast" | "deep"
350
-
351
345
  @field_validator("base_url")
352
346
  @classmethod
353
347
  def validate_base_url(cls, v: str) -> str:
@@ -367,16 +361,6 @@ class ConfigSaveRequest(BaseModel):
367
361
  raise ValueError("model_name cannot be empty")
368
362
  return v.strip()
369
363
 
370
- @field_validator("thinking_mode")
371
- @classmethod
372
- def validate_thinking_mode(cls, v: str | None) -> str | None:
373
- """验证 thinking_mode."""
374
- if v is None:
375
- return v
376
- if v not in ("fast", "deep"):
377
- raise ValueError("thinking_mode must be 'fast' or 'deep'")
378
- return v
379
-
380
364
  @field_validator("decision_base_url")
381
365
  @classmethod
382
366
  def validate_decision_base_url(cls, v: str | None) -> str | None:
@@ -1 +1 @@
1
- import{j as o}from"./index-Dt7cVkfR.js";function t(){return o.jsx("div",{className:"p-2",children:o.jsx("h3",{children:"About"})})}export{t as component};
1
+ import{j as o}from"./index-DHF1NZh0.js";function t(){return o.jsx("div",{className:"p-2",children:o.jsx("h3",{children:"About"})})}export{t as component};