autoglm-gui 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. AutoGLM_GUI/adb_plus/__init__.py +6 -6
  2. AutoGLM_GUI/api/__init__.py +49 -15
  3. AutoGLM_GUI/api/agents.py +163 -209
  4. AutoGLM_GUI/api/dual_model.py +310 -0
  5. AutoGLM_GUI/api/mcp.py +134 -0
  6. AutoGLM_GUI/api/metrics.py +36 -0
  7. AutoGLM_GUI/config_manager.py +110 -6
  8. AutoGLM_GUI/dual_model/__init__.py +53 -0
  9. AutoGLM_GUI/dual_model/decision_model.py +664 -0
  10. AutoGLM_GUI/dual_model/dual_agent.py +917 -0
  11. AutoGLM_GUI/dual_model/protocols.py +354 -0
  12. AutoGLM_GUI/dual_model/vision_model.py +442 -0
  13. AutoGLM_GUI/exceptions.py +75 -3
  14. AutoGLM_GUI/metrics.py +283 -0
  15. AutoGLM_GUI/phone_agent_manager.py +264 -14
  16. AutoGLM_GUI/prompts.py +97 -0
  17. AutoGLM_GUI/schemas.py +40 -9
  18. AutoGLM_GUI/static/assets/{about-PcGX7dIG.js → about-CrBXGOgB.js} +1 -1
  19. AutoGLM_GUI/static/assets/chat-Di2fwu8V.js +124 -0
  20. AutoGLM_GUI/static/assets/dialog-CHJSPLHJ.js +45 -0
  21. AutoGLM_GUI/static/assets/{index-DOt5XNhh.js → index-9IaIXvyy.js} +1 -1
  22. AutoGLM_GUI/static/assets/index-Dt7cVkfR.js +12 -0
  23. AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +1 -0
  24. AutoGLM_GUI/static/assets/{workflows-B1hgBC_O.js → workflows-DHadKApI.js} +1 -1
  25. AutoGLM_GUI/static/index.html +2 -2
  26. {autoglm_gui-1.2.0.dist-info → autoglm_gui-1.3.0.dist-info}/METADATA +11 -4
  27. {autoglm_gui-1.2.0.dist-info → autoglm_gui-1.3.0.dist-info}/RECORD +30 -20
  28. AutoGLM_GUI/static/assets/chat-B0FKL2ne.js +0 -124
  29. AutoGLM_GUI/static/assets/dialog-BSNX0L1i.js +0 -45
  30. AutoGLM_GUI/static/assets/index-BjYIY--m.css +0 -1
  31. AutoGLM_GUI/static/assets/index-CnEYDOXp.js +0 -11
  32. {autoglm_gui-1.2.0.dist-info → autoglm_gui-1.3.0.dist-info}/WHEEL +0 -0
  33. {autoglm_gui-1.2.0.dist-info → autoglm_gui-1.3.0.dist-info}/entry_points.txt +0 -0
  34. {autoglm_gui-1.2.0.dist-info → autoglm_gui-1.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,917 @@
1
+ """
2
+ 双模型协调器
3
+
4
+ 协调大模型(决策)和小模型(执行)的协作
5
+ """
6
+
7
+ import hashlib
8
+ import time
9
+ import threading
10
+ from dataclasses import dataclass
11
+ from typing import Callable, Optional
12
+ from queue import Queue
13
+
14
+ from phone_agent.model.client import ModelConfig
15
+
16
+ from AutoGLM_GUI.logger import logger
17
+ from .decision_model import (
18
+ DecisionModel,
19
+ Decision,
20
+ TaskPlan,
21
+ ActionSequence,
22
+ ActionStep,
23
+ )
24
+ from .vision_model import VisionModel, ScreenDescription, ExecutionResult
25
+ from .protocols import (
26
+ DecisionModelConfig,
27
+ DualModelState,
28
+ DualModelEvent,
29
+ DualModelEventType,
30
+ ModelRole,
31
+ ModelStage,
32
+ ThinkingMode,
33
+ DECISION_ERROR_CONTEXT_TEMPLATE,
34
+ )
35
+
36
+
37
+ @dataclass
38
+ class DualModelCallbacks:
39
+ """双模型回调接口"""
40
+
41
+ # 大模型回调
42
+ on_decision_start: Optional[Callable[[], None]] = None
43
+ on_decision_thinking: Optional[Callable[[str], None]] = None
44
+ on_decision_result: Optional[Callable[[Decision], None]] = None
45
+ on_task_plan: Optional[Callable[[TaskPlan], None]] = None
46
+ on_content_generation: Optional[Callable[[str, str], None]] = (
47
+ None # (content, purpose)
48
+ )
49
+
50
+ # 小模型回调
51
+ on_vision_start: Optional[Callable[[], None]] = None
52
+ on_vision_recognition: Optional[Callable[[ScreenDescription], None]] = None
53
+ on_action_start: Optional[Callable[[dict], None]] = None
54
+ on_action_result: Optional[Callable[[ExecutionResult], None]] = None
55
+
56
+ # 整体回调
57
+ on_step_complete: Optional[Callable[[int, bool], None]] = None # (step, success)
58
+ on_task_complete: Optional[Callable[[bool, str], None]] = None # (success, message)
59
+ on_error: Optional[Callable[[str], None]] = None
60
+
61
+
62
+ @dataclass
63
+ class StepResult:
64
+ """单步执行结果"""
65
+
66
+ step: int
67
+ success: bool
68
+ finished: bool
69
+ decision: Optional[Decision] = None
70
+ screen_desc: Optional[ScreenDescription] = None
71
+ execution: Optional[ExecutionResult] = None
72
+ error: Optional[str] = None
73
+
74
+
75
+ @dataclass
76
+ class AnomalyState:
77
+ """异常状态追踪"""
78
+
79
+ consecutive_failures: int = 0
80
+ consecutive_same_screen: int = 0
81
+ last_screenshot_hash: str = ""
82
+ last_action: str = ""
83
+ repeated_actions: int = 0
84
+ max_same_screen: int = 3
85
+ max_failures: int = 5
86
+ max_repeated_actions: int = 3
87
+
88
+ def reset(self):
89
+ """重置异常状态"""
90
+ self.consecutive_failures = 0
91
+ self.consecutive_same_screen = 0
92
+ self.last_screenshot_hash = ""
93
+ self.last_action = ""
94
+ self.repeated_actions = 0
95
+
96
+ def check_screenshot(self, screenshot_base64: str) -> bool:
97
+ """检查截图是否重复,返回 True 表示重复"""
98
+ current_hash = hashlib.md5(screenshot_base64.encode()[:10000]).hexdigest()
99
+ is_same = current_hash == self.last_screenshot_hash
100
+ if is_same:
101
+ self.consecutive_same_screen += 1
102
+ else:
103
+ self.consecutive_same_screen = 0
104
+ self.last_screenshot_hash = current_hash
105
+ return is_same and self.consecutive_same_screen >= 2
106
+
107
+ def check_action(self, action: str, target: str) -> bool:
108
+ """检查动作是否重复,返回 True 表示重复"""
109
+ action_key = f"{action}:{target}"
110
+ if action_key == self.last_action:
111
+ self.repeated_actions += 1
112
+ else:
113
+ self.repeated_actions = 0
114
+ self.last_action = action_key
115
+ return self.repeated_actions >= self.max_repeated_actions
116
+
117
+ def record_failure(self):
118
+ """记录失败"""
119
+ self.consecutive_failures += 1
120
+
121
+ def record_success(self):
122
+ """记录成功"""
123
+ self.consecutive_failures = 0
124
+
125
+ def has_anomaly(self) -> bool:
126
+ """是否存在异常"""
127
+ return (
128
+ self.consecutive_failures >= self.max_failures
129
+ or self.consecutive_same_screen >= self.max_same_screen
130
+ or self.repeated_actions >= self.max_repeated_actions
131
+ )
132
+
133
+ def get_error_context(self) -> str:
134
+ """生成异常上下文描述"""
135
+ contexts = []
136
+ if self.consecutive_same_screen >= 2:
137
+ contexts.append(
138
+ f"⚠️ 屏幕连续 {self.consecutive_same_screen} 次无变化,可能原因:网络延迟、点击未生效、页面加载中"
139
+ )
140
+ if self.consecutive_failures >= 2:
141
+ contexts.append(f"⚠️ 连续 {self.consecutive_failures} 次操作失败")
142
+ if self.repeated_actions >= 2:
143
+ contexts.append(f"⚠️ 相同操作已重复 {self.repeated_actions} 次无效果")
144
+ return "\n".join(contexts) if contexts else ""
145
+
146
+
147
+ class DualModelAgent:
148
+ """
149
+ 双模型协调器
150
+
151
+ 协调大模型(GLM-4.7)和小模型(autoglm-phone)的协作:
152
+ 1. 大模型分析任务,制定计划
153
+ 2. 小模型识别屏幕,描述内容
154
+ 3. 大模型根据屏幕描述做决策
155
+ 4. 小模型执行决策
156
+ 5. 循环直到任务完成
157
+
158
+ Usage:
159
+ agent = DualModelAgent(decision_config, vision_config, device_id)
160
+ result = await agent.run("打开微信发送消息")
161
+ """
162
+
163
+ def __init__(
164
+ self,
165
+ decision_config: DecisionModelConfig,
166
+ vision_config: ModelConfig,
167
+ device_id: str,
168
+ max_steps: int = 50,
169
+ callbacks: Optional[DualModelCallbacks] = None,
170
+ thinking_mode: ThinkingMode = ThinkingMode.DEEP,
171
+ ):
172
+ self.decision_model = DecisionModel(decision_config, thinking_mode)
173
+ self.vision_model = VisionModel(vision_config, device_id)
174
+ self.device_id = device_id
175
+ self.max_steps = max_steps
176
+ self.callbacks = callbacks or DualModelCallbacks()
177
+ self.thinking_mode = thinking_mode
178
+
179
+ # 状态
180
+ self.state = DualModelState()
181
+ self.current_task: str = ""
182
+ self.task_plan: Optional[TaskPlan] = None
183
+ self.step_count: int = 0
184
+ self.stop_event = threading.Event()
185
+
186
+ # TURBO 模式状态
187
+ self.action_sequence: Optional[ActionSequence] = None
188
+ self.current_action_index: int = 0
189
+ self.executed_actions: list[str] = []
190
+
191
+ # 异常状态追踪
192
+ self.anomaly_state = AnomalyState()
193
+
194
+ # 事件队列(用于SSE)
195
+ self.event_queue: Queue[DualModelEvent] = Queue()
196
+
197
+ logger.info(
198
+ f"双模型协调器初始化完成, 设备: {device_id}, 模式: {thinking_mode.value}"
199
+ )
200
+
201
+ def _emit_event(
202
+ self,
203
+ event_type: DualModelEventType,
204
+ data: dict,
205
+ model: Optional[ModelRole] = None,
206
+ ):
207
+ """发送事件到队列"""
208
+ event = DualModelEvent(
209
+ type=event_type,
210
+ data=data,
211
+ model=model,
212
+ step=self.step_count,
213
+ timestamp=time.time(),
214
+ )
215
+ self.event_queue.put(event)
216
+
217
+ def run(self, task: str) -> dict:
218
+ """
219
+ 执行任务(同步版本)
220
+
221
+ Args:
222
+ task: 用户任务描述
223
+
224
+ Returns:
225
+ 执行结果
226
+ """
227
+ self.current_task = task
228
+ self.step_count = 0
229
+ self.stop_event.clear()
230
+ self.anomaly_state.reset()
231
+ self.executed_actions = []
232
+ self.current_action_index = 0
233
+
234
+ logger.info(f"开始执行任务: {task[:50]}... (模式: {self.thinking_mode.value})")
235
+
236
+ # TURBO 模式使用批量执行
237
+ if self.thinking_mode == ThinkingMode.TURBO:
238
+ return self._run_turbo(task)
239
+
240
+ # FAST/DEEP 模式使用原有逻辑
241
+ return self._run_standard(task)
242
+
243
+ def _run_standard(self, task: str) -> dict:
244
+ """标准执行模式 (FAST/DEEP)"""
245
+
246
+ try:
247
+ # 1. 大模型分析任务
248
+ self._update_state(
249
+ decision_stage=ModelStage.ANALYZING, decision_active=True
250
+ )
251
+ self._emit_event(
252
+ DualModelEventType.DECISION_START,
253
+ {"stage": "analyzing", "task": task},
254
+ ModelRole.DECISION,
255
+ )
256
+
257
+ if self.callbacks.on_decision_start:
258
+ self.callbacks.on_decision_start()
259
+
260
+ # 分析任务,获取计划
261
+ self.task_plan = self.decision_model.analyze_task(
262
+ task,
263
+ on_thinking=self._on_decision_thinking,
264
+ on_answer=self._on_decision_answer,
265
+ )
266
+
267
+ self._emit_event(
268
+ DualModelEventType.TASK_PLAN,
269
+ {"plan": self.task_plan.to_dict()},
270
+ ModelRole.DECISION,
271
+ )
272
+
273
+ if self.callbacks.on_task_plan:
274
+ self.callbacks.on_task_plan(self.task_plan)
275
+
276
+ self.state.task_plan = self.task_plan.steps
277
+ self.state.total_steps = self.task_plan.estimated_actions
278
+
279
+ # 2. 执行循环
280
+ finished = False
281
+ last_message = ""
282
+
283
+ while not finished and self.step_count < self.max_steps:
284
+ if self.stop_event.is_set():
285
+ logger.info("任务被中断")
286
+ return {
287
+ "success": False,
288
+ "message": "任务被用户中断",
289
+ "steps": self.step_count,
290
+ }
291
+
292
+ self.step_count += 1
293
+ logger.info(f"执行步骤 {self.step_count}/{self.max_steps}")
294
+
295
+ step_result = self._execute_step()
296
+
297
+ if step_result.error:
298
+ logger.error(f"步骤执行失败: {step_result.error}")
299
+ if self.callbacks.on_error:
300
+ self.callbacks.on_error(step_result.error)
301
+ # 继续尝试下一步
302
+ continue
303
+
304
+ if step_result.finished:
305
+ finished = True
306
+ last_message = (
307
+ step_result.decision.reasoning
308
+ if step_result.decision
309
+ else "任务完成"
310
+ )
311
+
312
+ if self.callbacks.on_step_complete:
313
+ self.callbacks.on_step_complete(
314
+ self.step_count, step_result.success
315
+ )
316
+
317
+ # 步骤间延迟
318
+ time.sleep(0.5)
319
+
320
+ # 3. 完成
321
+ success = finished
322
+ message = (
323
+ last_message if finished else f"达到最大步数限制({self.max_steps})"
324
+ )
325
+
326
+ self._emit_event(
327
+ DualModelEventType.TASK_COMPLETE,
328
+ {"success": success, "message": message, "steps": self.step_count},
329
+ )
330
+
331
+ if self.callbacks.on_task_complete:
332
+ self.callbacks.on_task_complete(success, message)
333
+
334
+ logger.info(f"任务完成: success={success}, steps={self.step_count}")
335
+
336
+ return {
337
+ "success": success,
338
+ "message": message,
339
+ "steps": self.step_count,
340
+ }
341
+
342
+ except Exception as e:
343
+ logger.exception(f"任务执行异常: {e}")
344
+ self._emit_event(
345
+ DualModelEventType.ERROR,
346
+ {"message": str(e)},
347
+ )
348
+ return {
349
+ "success": False,
350
+ "message": f"执行异常: {e}",
351
+ "steps": self.step_count,
352
+ }
353
+
354
+ def _run_turbo(self, task: str) -> dict:
355
+ """
356
+ TURBO 模式执行
357
+
358
+ 一次性生成操作序列,批量执行,仅异常时调用决策模型
359
+ """
360
+ try:
361
+ # 1. 大模型一次性生成操作序列
362
+ self._update_state(
363
+ decision_stage=ModelStage.ANALYZING, decision_active=True
364
+ )
365
+ self._emit_event(
366
+ DualModelEventType.DECISION_START,
367
+ {"stage": "analyzing", "task": task, "mode": "turbo"},
368
+ ModelRole.DECISION,
369
+ )
370
+
371
+ if self.callbacks.on_decision_start:
372
+ self.callbacks.on_decision_start()
373
+
374
+ self.action_sequence = self.decision_model.analyze_task_turbo(
375
+ task,
376
+ on_thinking=self._on_decision_thinking,
377
+ on_answer=self._on_decision_answer,
378
+ )
379
+
380
+ self.task_plan = self.action_sequence.to_plan()
381
+ self._emit_event(
382
+ DualModelEventType.TASK_PLAN,
383
+ {
384
+ "plan": self.task_plan.to_dict(),
385
+ "actions": self.action_sequence.to_dict(),
386
+ },
387
+ ModelRole.DECISION,
388
+ )
389
+
390
+ if self.callbacks.on_task_plan:
391
+ self.callbacks.on_task_plan(self.task_plan)
392
+
393
+ self.state.task_plan = self.task_plan.steps
394
+ self.state.total_steps = len(self.action_sequence.actions)
395
+
396
+ logger.info(f"[TURBO] 生成 {len(self.action_sequence.actions)} 个操作步骤")
397
+
398
+ # 2. 批量执行操作序列
399
+ self.current_action_index = 0
400
+ finished = False
401
+ last_message = ""
402
+ replan_count = 0
403
+ max_replans = 3
404
+
405
+ while not finished and self.step_count < self.max_steps:
406
+ if self.stop_event.is_set():
407
+ logger.info("[TURBO] 任务被中断")
408
+ return {
409
+ "success": False,
410
+ "message": "任务被用户中断",
411
+ "steps": self.step_count,
412
+ }
413
+
414
+ # 检查是否还有操作需要执行
415
+ if self.current_action_index >= len(self.action_sequence.actions):
416
+ finished = True
417
+ last_message = "操作序列执行完成"
418
+ break
419
+
420
+ self.step_count += 1
421
+ action = self.action_sequence.actions[self.current_action_index]
422
+ logger.info(
423
+ f"[TURBO] 执行步骤 {self.step_count}: {action.action} -> {action.target}"
424
+ )
425
+
426
+ # 执行单步操作
427
+ step_result = self._execute_turbo_step(action)
428
+
429
+ if step_result.error or not step_result.success:
430
+ logger.warning(f"[TURBO] 步骤执行失败: {step_result.error}")
431
+ self.anomaly_state.record_failure()
432
+
433
+ # 检查是否需要重新规划
434
+ if self.anomaly_state.has_anomaly() and replan_count < max_replans:
435
+ replan_count += 1
436
+ logger.info(
437
+ f"[TURBO] 触发重新规划 ({replan_count}/{max_replans})"
438
+ )
439
+
440
+ # 获取当前屏幕状态
441
+ screenshot_base64, _, _ = self.vision_model.capture_screenshot()
442
+ screen_desc = self.vision_model.describe_screen(
443
+ screenshot_base64
444
+ )
445
+
446
+ # 重新规划
447
+ new_sequence = self.decision_model.replan(
448
+ current_state=screen_desc.description,
449
+ executed_actions=self.executed_actions,
450
+ error_info=step_result.error or "操作失败",
451
+ on_thinking=self._on_decision_thinking,
452
+ on_answer=self._on_decision_answer,
453
+ )
454
+
455
+ if new_sequence.actions:
456
+ self.action_sequence = new_sequence
457
+ self.current_action_index = 0
458
+ self.anomaly_state.reset()
459
+ logger.info(
460
+ f"[TURBO] 重新规划成功,新增 {len(new_sequence.actions)} 个步骤"
461
+ )
462
+ else:
463
+ logger.warning("[TURBO] 重新规划返回空序列")
464
+
465
+ if self.callbacks.on_error:
466
+ self.callbacks.on_error(step_result.error or "执行失败")
467
+ continue
468
+
469
+ # 成功执行
470
+ self.anomaly_state.record_success()
471
+ self.executed_actions.append(f"{action.action}: {action.target}")
472
+ self.current_action_index += 1
473
+
474
+ if step_result.finished:
475
+ finished = True
476
+ last_message = "任务完成"
477
+
478
+ if self.callbacks.on_step_complete:
479
+ self.callbacks.on_step_complete(
480
+ self.step_count, step_result.success
481
+ )
482
+
483
+ # 步骤间短延迟
484
+ time.sleep(0.3)
485
+
486
+ # 3. 完成
487
+ success = finished
488
+ message = (
489
+ last_message if finished else f"达到最大步数限制({self.max_steps})"
490
+ )
491
+
492
+ self._emit_event(
493
+ DualModelEventType.TASK_COMPLETE,
494
+ {"success": success, "message": message, "steps": self.step_count},
495
+ )
496
+
497
+ if self.callbacks.on_task_complete:
498
+ self.callbacks.on_task_complete(success, message)
499
+
500
+ logger.info(f"[TURBO] 任务完成: success={success}, steps={self.step_count}")
501
+
502
+ return {
503
+ "success": success,
504
+ "message": message,
505
+ "steps": self.step_count,
506
+ }
507
+
508
+ except Exception as e:
509
+ logger.exception(f"[TURBO] 任务执行异常: {e}")
510
+ self._emit_event(
511
+ DualModelEventType.ERROR,
512
+ {"message": str(e)},
513
+ )
514
+ return {
515
+ "success": False,
516
+ "message": f"执行异常: {e}",
517
+ "steps": self.step_count,
518
+ }
519
+
520
+ def _execute_turbo_step(self, action: ActionStep) -> StepResult:
521
+ """
522
+ TURBO 模式执行单步操作
523
+
524
+ 直接执行操作,不调用决策模型(除非需要生成内容)
525
+ """
526
+ try:
527
+ # 截图
528
+ screenshot_base64, width, height = self.vision_model.capture_screenshot()
529
+
530
+ # 检查截图是否重复
531
+ is_same_screen = self.anomaly_state.check_screenshot(screenshot_base64)
532
+ if is_same_screen:
533
+ logger.warning(
534
+ f"[TURBO] 屏幕连续 {self.anomaly_state.consecutive_same_screen} 次无变化"
535
+ )
536
+
537
+ self._update_state(
538
+ vision_stage=ModelStage.EXECUTING,
539
+ vision_active=True,
540
+ decision_active=False,
541
+ )
542
+
543
+ # 处理需要生成内容的操作
544
+ content = action.content
545
+ if action.need_generate and action.action == "type":
546
+ logger.info("[TURBO] 需要生成人性化内容,调用决策模型")
547
+ self._update_state(
548
+ decision_stage=ModelStage.GENERATING, decision_active=True
549
+ )
550
+ self._emit_event(
551
+ DualModelEventType.DECISION_START,
552
+ {"stage": "generating", "content_type": action.target},
553
+ ModelRole.DECISION,
554
+ )
555
+
556
+ # 获取屏幕描述作为上下文
557
+ screen_desc = self.vision_model.describe_screen(screenshot_base64)
558
+
559
+ content = self.decision_model.generate_humanize_content(
560
+ task_context=self.current_task,
561
+ current_scene=screen_desc.description,
562
+ content_type=action.target or "回复内容",
563
+ on_thinking=self._on_decision_thinking,
564
+ on_answer=self._on_decision_answer,
565
+ )
566
+
567
+ self._emit_event(
568
+ DualModelEventType.CONTENT_GENERATION,
569
+ {"content": content, "purpose": action.target},
570
+ ModelRole.DECISION,
571
+ )
572
+
573
+ if self.callbacks.on_content_generation:
574
+ self.callbacks.on_content_generation(content, action.target)
575
+
576
+ # 构建决策对象
577
+ decision_dict = {
578
+ "action": action.action,
579
+ "target": action.target,
580
+ "content": content,
581
+ "direction": action.direction,
582
+ }
583
+
584
+ self._emit_event(
585
+ DualModelEventType.ACTION_START,
586
+ {"action": decision_dict},
587
+ ModelRole.VISION,
588
+ )
589
+
590
+ if self.callbacks.on_action_start:
591
+ self.callbacks.on_action_start(decision_dict)
592
+
593
+ # 执行操作
594
+ execution = self.vision_model.execute_decision(
595
+ decision=decision_dict,
596
+ screenshot_base64=screenshot_base64,
597
+ )
598
+
599
+ self._update_state(
600
+ vision_action=f"{execution.action_type}: {execution.target}",
601
+ vision_stage=ModelStage.IDLE,
602
+ vision_active=False,
603
+ )
604
+
605
+ self._emit_event(
606
+ DualModelEventType.ACTION_RESULT,
607
+ {
608
+ "success": execution.success,
609
+ "action_type": execution.action_type,
610
+ "target": execution.target,
611
+ "position": execution.position,
612
+ "message": execution.message,
613
+ },
614
+ ModelRole.VISION,
615
+ )
616
+
617
+ if self.callbacks.on_action_result:
618
+ self.callbacks.on_action_result(execution)
619
+
620
+ self._emit_event(
621
+ DualModelEventType.STEP_COMPLETE,
622
+ {
623
+ "step": self.step_count,
624
+ "success": execution.success,
625
+ "finished": execution.finished,
626
+ },
627
+ )
628
+
629
+ return StepResult(
630
+ step=self.step_count,
631
+ success=execution.success,
632
+ finished=execution.finished,
633
+ execution=execution,
634
+ )
635
+
636
+ except Exception as e:
637
+ logger.exception(f"[TURBO] 步骤执行异常: {e}")
638
+ return StepResult(
639
+ step=self.step_count,
640
+ success=False,
641
+ finished=False,
642
+ error=str(e),
643
+ )
644
+
645
+ def _execute_step(self) -> StepResult:
646
+ """执行单步操作"""
647
+ try:
648
+ # 2.1 小模型识别屏幕
649
+ self._update_state(
650
+ vision_stage=ModelStage.RECOGNIZING,
651
+ vision_active=True,
652
+ decision_active=False,
653
+ )
654
+ self._emit_event(
655
+ DualModelEventType.VISION_START,
656
+ {"stage": "recognizing"},
657
+ ModelRole.VISION,
658
+ )
659
+
660
+ if self.callbacks.on_vision_start:
661
+ self.callbacks.on_vision_start()
662
+
663
+ # 截图并识别
664
+ screenshot_base64, width, height = self.vision_model.capture_screenshot()
665
+
666
+ # 检查截图是否重复
667
+ is_same_screen = self.anomaly_state.check_screenshot(screenshot_base64)
668
+ if is_same_screen:
669
+ logger.warning(
670
+ f"屏幕连续 {self.anomaly_state.consecutive_same_screen} 次无变化"
671
+ )
672
+
673
+ screen_desc = self.vision_model.describe_screen(screenshot_base64)
674
+
675
+ self._update_state(
676
+ vision_description=screen_desc.description[:200],
677
+ vision_stage=ModelStage.IDLE,
678
+ )
679
+ self._emit_event(
680
+ DualModelEventType.VISION_RECOGNITION,
681
+ {
682
+ "description": screen_desc.description,
683
+ "current_app": screen_desc.current_app,
684
+ "elements": screen_desc.elements,
685
+ },
686
+ ModelRole.VISION,
687
+ )
688
+
689
+ if self.callbacks.on_vision_recognition:
690
+ self.callbacks.on_vision_recognition(screen_desc)
691
+
692
+ # 2.2 大模型决策
693
+ self._update_state(
694
+ decision_stage=ModelStage.DECIDING,
695
+ decision_active=True,
696
+ vision_active=False,
697
+ )
698
+ self._emit_event(
699
+ DualModelEventType.DECISION_START,
700
+ {"stage": "deciding"},
701
+ ModelRole.DECISION,
702
+ )
703
+
704
+ if self.callbacks.on_decision_start:
705
+ self.callbacks.on_decision_start()
706
+
707
+ # 构建任务上下文,包含异常信息
708
+ task_context = f"当前应用: {screen_desc.current_app}"
709
+ error_context = self.anomaly_state.get_error_context()
710
+ if error_context:
711
+ task_context += f"\n\n{DECISION_ERROR_CONTEXT_TEMPLATE.format(error_context=error_context)}"
712
+ logger.info("添加异常上下文到决策请求")
713
+
714
+ # 调用决策模型
715
+ decision = self.decision_model.make_decision(
716
+ screen_description=screen_desc.description,
717
+ task_context=task_context,
718
+ on_thinking=self._on_decision_thinking,
719
+ on_answer=self._on_decision_answer,
720
+ )
721
+
722
+ # 检查是否重复操作
723
+ if decision.action and decision.target:
724
+ is_repeated = self.anomaly_state.check_action(
725
+ decision.action, decision.target
726
+ )
727
+ if is_repeated:
728
+ logger.warning(
729
+ f"操作重复 {self.anomaly_state.repeated_actions} 次: {decision.action} -> {decision.target}"
730
+ )
731
+
732
+ self._update_state(
733
+ decision_result=f"{decision.action}: {decision.target}",
734
+ decision_thinking=decision.reasoning,
735
+ decision_stage=ModelStage.IDLE,
736
+ )
737
+ self._emit_event(
738
+ DualModelEventType.DECISION_RESULT,
739
+ {
740
+ "decision": decision.to_dict(),
741
+ "reasoning": decision.reasoning,
742
+ },
743
+ ModelRole.DECISION,
744
+ )
745
+
746
+ if self.callbacks.on_decision_result:
747
+ self.callbacks.on_decision_result(decision)
748
+
749
+ # 检查是否完成
750
+ if decision.finished:
751
+ self.anomaly_state.record_success()
752
+ return StepResult(
753
+ step=self.step_count,
754
+ success=True,
755
+ finished=True,
756
+ decision=decision,
757
+ screen_desc=screen_desc,
758
+ )
759
+
760
+ # 处理等待操作
761
+ if decision.action == "wait":
762
+ logger.info("执行等待操作...")
763
+ time.sleep(2) # 等待2秒
764
+ return StepResult(
765
+ step=self.step_count,
766
+ success=True,
767
+ finished=False,
768
+ decision=decision,
769
+ screen_desc=screen_desc,
770
+ )
771
+
772
+ # 2.3 小模型执行
773
+ self._update_state(
774
+ vision_stage=ModelStage.EXECUTING,
775
+ vision_active=True,
776
+ decision_active=False,
777
+ )
778
+
779
+ action_dict = {
780
+ "action": decision.action,
781
+ "target": decision.target,
782
+ "content": decision.content,
783
+ }
784
+
785
+ self._emit_event(
786
+ DualModelEventType.ACTION_START,
787
+ {"action": action_dict},
788
+ ModelRole.VISION,
789
+ )
790
+
791
+ if self.callbacks.on_action_start:
792
+ self.callbacks.on_action_start(action_dict)
793
+
794
+ execution = self.vision_model.execute_decision(
795
+ decision=action_dict,
796
+ screenshot_base64=screenshot_base64,
797
+ )
798
+
799
+ # 记录执行结果
800
+ if execution.success:
801
+ self.anomaly_state.record_success()
802
+ else:
803
+ self.anomaly_state.record_failure()
804
+
805
+ self._update_state(
806
+ vision_action=f"{execution.action_type}: {execution.target}",
807
+ vision_stage=ModelStage.IDLE,
808
+ vision_active=False,
809
+ )
810
+ self._emit_event(
811
+ DualModelEventType.ACTION_RESULT,
812
+ {
813
+ "success": execution.success,
814
+ "action_type": execution.action_type,
815
+ "target": execution.target,
816
+ "position": execution.position,
817
+ "message": execution.message,
818
+ },
819
+ ModelRole.VISION,
820
+ )
821
+
822
+ if self.callbacks.on_action_result:
823
+ self.callbacks.on_action_result(execution)
824
+
825
+ # 步骤完成事件
826
+ self._emit_event(
827
+ DualModelEventType.STEP_COMPLETE,
828
+ {
829
+ "step": self.step_count,
830
+ "success": execution.success,
831
+ "finished": execution.finished,
832
+ },
833
+ )
834
+
835
+ return StepResult(
836
+ step=self.step_count,
837
+ success=execution.success,
838
+ finished=execution.finished,
839
+ decision=decision,
840
+ screen_desc=screen_desc,
841
+ execution=execution,
842
+ )
843
+
844
+ except Exception as e:
845
+ logger.exception(f"步骤执行异常: {e}")
846
+ self.anomaly_state.record_failure()
847
+ return StepResult(
848
+ step=self.step_count,
849
+ success=False,
850
+ finished=False,
851
+ error=str(e),
852
+ )
853
+
854
+ def _update_state(self, **kwargs):
855
+ """更新状态"""
856
+ for key, value in kwargs.items():
857
+ if hasattr(self.state, key):
858
+ setattr(self.state, key, value)
859
+ self.state.current_step = self.step_count
860
+
861
+ def _on_decision_thinking(self, chunk: str):
862
+ """决策思考回调"""
863
+ self._emit_event(
864
+ DualModelEventType.DECISION_THINKING,
865
+ {"chunk": chunk},
866
+ ModelRole.DECISION,
867
+ )
868
+ if self.callbacks.on_decision_thinking:
869
+ self.callbacks.on_decision_thinking(chunk)
870
+
871
+ def _on_decision_answer(self, chunk: str):
872
+ """决策答案回调"""
873
+ pass # 答案通过 DECISION_RESULT 事件发送
874
+
875
+ def abort(self):
876
+ """中止任务"""
877
+ logger.info("中止任务")
878
+ self.stop_event.set()
879
+
880
+ def reset(self):
881
+ """重置状态"""
882
+ self.current_task = ""
883
+ self.task_plan = None
884
+ self.step_count = 0
885
+ self.stop_event.clear()
886
+ self.state = DualModelState()
887
+ self.anomaly_state.reset()
888
+ self.decision_model.reset()
889
+
890
+ # TURBO 模式状态重置
891
+ self.action_sequence = None
892
+ self.current_action_index = 0
893
+ self.executed_actions = []
894
+
895
+ # 清空事件队列
896
+ while not self.event_queue.empty():
897
+ try:
898
+ self.event_queue.get_nowait()
899
+ except Exception:
900
+ break
901
+
902
+ logger.info("双模型协调器已重置")
903
+
904
+ def get_state(self) -> dict:
905
+ """获取当前状态"""
906
+ return self.state.to_dict()
907
+
908
+ def get_events(self, timeout: float = 0.1) -> list[DualModelEvent]:
909
+ """获取待处理的事件"""
910
+ events = []
911
+ while True:
912
+ try:
913
+ event = self.event_queue.get(timeout=timeout)
914
+ events.append(event)
915
+ except Exception:
916
+ break
917
+ return events