autoglm-gui 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. AutoGLM_GUI/__init__.py +11 -0
  2. AutoGLM_GUI/__main__.py +26 -4
  3. AutoGLM_GUI/actions/__init__.py +6 -0
  4. phone_agent/actions/handler_ios.py → AutoGLM_GUI/actions/handler.py +30 -112
  5. AutoGLM_GUI/actions/types.py +15 -0
  6. {phone_agent → AutoGLM_GUI}/adb/__init__.py +25 -23
  7. {phone_agent → AutoGLM_GUI}/adb/connection.py +5 -40
  8. {phone_agent → AutoGLM_GUI}/adb/device.py +12 -94
  9. {phone_agent → AutoGLM_GUI}/adb/input.py +6 -47
  10. AutoGLM_GUI/adb/screenshot.py +11 -0
  11. {phone_agent/config → AutoGLM_GUI/adb}/timing.py +1 -1
  12. AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
  13. AutoGLM_GUI/adb_plus/screenshot.py +22 -1
  14. AutoGLM_GUI/adb_plus/serial.py +38 -20
  15. AutoGLM_GUI/adb_plus/touch.py +4 -9
  16. AutoGLM_GUI/agents/__init__.py +43 -12
  17. AutoGLM_GUI/agents/events.py +19 -0
  18. AutoGLM_GUI/agents/factory.py +31 -38
  19. AutoGLM_GUI/agents/glm/__init__.py +7 -0
  20. AutoGLM_GUI/agents/glm/agent.py +297 -0
  21. AutoGLM_GUI/agents/glm/message_builder.py +81 -0
  22. AutoGLM_GUI/agents/glm/parser.py +110 -0
  23. {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_en.py +7 -9
  24. {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_zh.py +18 -25
  25. AutoGLM_GUI/agents/mai/__init__.py +28 -0
  26. AutoGLM_GUI/agents/mai/agent.py +408 -0
  27. AutoGLM_GUI/agents/mai/parser.py +254 -0
  28. AutoGLM_GUI/agents/mai/prompts.py +103 -0
  29. AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
  30. AutoGLM_GUI/agents/protocols.py +12 -8
  31. AutoGLM_GUI/agents/stream_runner.py +193 -0
  32. AutoGLM_GUI/api/__init__.py +40 -21
  33. AutoGLM_GUI/api/agents.py +181 -239
  34. AutoGLM_GUI/api/control.py +9 -6
  35. AutoGLM_GUI/api/devices.py +102 -12
  36. AutoGLM_GUI/api/history.py +104 -0
  37. AutoGLM_GUI/api/layered_agent.py +67 -15
  38. AutoGLM_GUI/api/media.py +64 -1
  39. AutoGLM_GUI/api/scheduled_tasks.py +98 -0
  40. AutoGLM_GUI/config.py +81 -0
  41. AutoGLM_GUI/config_manager.py +68 -51
  42. AutoGLM_GUI/device_manager.py +248 -29
  43. AutoGLM_GUI/device_protocol.py +1 -1
  44. AutoGLM_GUI/devices/adb_device.py +5 -10
  45. AutoGLM_GUI/devices/mock_device.py +4 -2
  46. AutoGLM_GUI/devices/remote_device.py +8 -3
  47. AutoGLM_GUI/history_manager.py +164 -0
  48. AutoGLM_GUI/model/__init__.py +5 -0
  49. AutoGLM_GUI/model/message_builder.py +69 -0
  50. AutoGLM_GUI/model/types.py +24 -0
  51. AutoGLM_GUI/models/__init__.py +10 -0
  52. AutoGLM_GUI/models/history.py +140 -0
  53. AutoGLM_GUI/models/scheduled_task.py +71 -0
  54. AutoGLM_GUI/parsers/__init__.py +22 -0
  55. AutoGLM_GUI/parsers/base.py +50 -0
  56. AutoGLM_GUI/parsers/phone_parser.py +58 -0
  57. AutoGLM_GUI/phone_agent_manager.py +62 -396
  58. AutoGLM_GUI/platform_utils.py +26 -0
  59. AutoGLM_GUI/prompt_config.py +15 -0
  60. AutoGLM_GUI/prompts/__init__.py +32 -0
  61. AutoGLM_GUI/scheduler_manager.py +350 -0
  62. AutoGLM_GUI/schemas.py +246 -72
  63. AutoGLM_GUI/scrcpy_stream.py +142 -24
  64. AutoGLM_GUI/socketio_server.py +100 -27
  65. AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-CfwX1Cmc.js} +1 -1
  66. AutoGLM_GUI/static/assets/alert-dialog-CtGlN2IJ.js +1 -0
  67. AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
  68. AutoGLM_GUI/static/assets/circle-alert-t08bEMPO.js +1 -0
  69. AutoGLM_GUI/static/assets/dialog-FNwZJFwk.js +45 -0
  70. AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
  71. AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
  72. AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-BaLMSqd3.js} +1 -1
  73. AutoGLM_GUI/static/assets/index-CTHbFvKl.js +11 -0
  74. AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
  75. AutoGLM_GUI/static/assets/label-DJFevVmr.js +1 -0
  76. AutoGLM_GUI/static/assets/logs-RW09DyYY.js +1 -0
  77. AutoGLM_GUI/static/assets/popover--JTJrE5v.js +1 -0
  78. AutoGLM_GUI/static/assets/scheduled-tasks-DTRKsQXF.js +1 -0
  79. AutoGLM_GUI/static/assets/square-pen-CPK_K680.js +1 -0
  80. AutoGLM_GUI/static/assets/textarea-PRmVnWq5.js +1 -0
  81. AutoGLM_GUI/static/assets/workflows-CdcsAoaT.js +1 -0
  82. AutoGLM_GUI/static/index.html +2 -2
  83. AutoGLM_GUI/types.py +17 -0
  84. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +179 -130
  85. autoglm_gui-1.5.1.dist-info/RECORD +118 -0
  86. AutoGLM_GUI/agents/mai_adapter.py +0 -627
  87. AutoGLM_GUI/api/dual_model.py +0 -317
  88. AutoGLM_GUI/device_adapter.py +0 -263
  89. AutoGLM_GUI/dual_model/__init__.py +0 -53
  90. AutoGLM_GUI/dual_model/decision_model.py +0 -664
  91. AutoGLM_GUI/dual_model/dual_agent.py +0 -917
  92. AutoGLM_GUI/dual_model/protocols.py +0 -354
  93. AutoGLM_GUI/dual_model/vision_model.py +0 -442
  94. AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
  95. AutoGLM_GUI/phone_agent_patches.py +0 -147
  96. AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
  97. AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
  98. AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
  99. AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
  100. AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
  101. autoglm_gui-1.4.1.dist-info/RECORD +0 -117
  102. mai_agent/base.py +0 -137
  103. mai_agent/mai_grounding_agent.py +0 -263
  104. mai_agent/mai_naivigation_agent.py +0 -526
  105. mai_agent/prompt.py +0 -148
  106. mai_agent/unified_memory.py +0 -67
  107. mai_agent/utils.py +0 -73
  108. phone_agent/__init__.py +0 -12
  109. phone_agent/actions/__init__.py +0 -5
  110. phone_agent/actions/handler.py +0 -400
  111. phone_agent/adb/screenshot.py +0 -108
  112. phone_agent/agent.py +0 -253
  113. phone_agent/agent_ios.py +0 -277
  114. phone_agent/config/__init__.py +0 -53
  115. phone_agent/config/apps_harmonyos.py +0 -256
  116. phone_agent/config/apps_ios.py +0 -339
  117. phone_agent/config/prompts.py +0 -80
  118. phone_agent/device_factory.py +0 -166
  119. phone_agent/hdc/__init__.py +0 -53
  120. phone_agent/hdc/connection.py +0 -384
  121. phone_agent/hdc/device.py +0 -269
  122. phone_agent/hdc/input.py +0 -145
  123. phone_agent/hdc/screenshot.py +0 -127
  124. phone_agent/model/__init__.py +0 -5
  125. phone_agent/model/client.py +0 -290
  126. phone_agent/xctest/__init__.py +0 -47
  127. phone_agent/xctest/connection.py +0 -379
  128. phone_agent/xctest/device.py +0 -472
  129. phone_agent/xctest/input.py +0 -311
  130. phone_agent/xctest/screenshot.py +0 -226
  131. {phone_agent/config → AutoGLM_GUI/adb}/apps.py +0 -0
  132. {phone_agent/config → AutoGLM_GUI}/i18n.py +0 -0
  133. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
  134. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
  135. {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,664 +0,0 @@
1
- """
2
- 决策大模型客户端
3
-
4
- 调用 GLM-4.7 进行任务分析和决策
5
- """
6
-
7
- import json
8
- from dataclasses import dataclass, field
9
- from typing import Callable, Optional
10
-
11
- from openai import OpenAI
12
-
13
- from AutoGLM_GUI.logger import logger
14
- from .protocols import (
15
- DecisionModelConfig,
16
- DECISION_SYSTEM_PROMPT,
17
- DECISION_SYSTEM_PROMPT_FAST,
18
- DECISION_SYSTEM_PROMPT_TURBO,
19
- DECISION_REPLAN_PROMPT,
20
- DECISION_HUMANIZE_PROMPT,
21
- ThinkingMode,
22
- )
23
-
24
-
25
- @dataclass
26
- class TaskPlan:
27
- """任务计划"""
28
-
29
- summary: str
30
- steps: list[str]
31
- estimated_actions: int
32
- raw_response: str = ""
33
-
34
- def to_dict(self) -> dict:
35
- return {
36
- "summary": self.summary,
37
- "steps": self.steps,
38
- "estimated_actions": self.estimated_actions,
39
- }
40
-
41
-
42
- @dataclass
43
- class ActionStep:
44
- """单个操作步骤"""
45
-
46
- action: str
47
- target: str = ""
48
- content: Optional[str] = None
49
- need_generate: bool = False
50
- direction: Optional[str] = None
51
-
52
- def to_dict(self) -> dict[str, str | bool]:
53
- result: dict[str, str | bool] = {"action": self.action, "target": self.target}
54
- if self.content:
55
- result["content"] = self.content
56
- if self.need_generate:
57
- result["need_generate"] = True
58
- if self.direction:
59
- result["direction"] = self.direction
60
- return result
61
-
62
-
63
- @dataclass
64
- class ActionSequence:
65
- """操作序列(TURBO模式)"""
66
-
67
- summary: str
68
- actions: list[ActionStep]
69
- checkpoints: list[str] = field(default_factory=list)
70
- humanize_steps: list[int] = field(default_factory=list)
71
- raw_response: str = ""
72
-
73
- def to_dict(self) -> dict:
74
- return {
75
- "summary": self.summary,
76
- "actions": [a.to_dict() for a in self.actions],
77
- "checkpoints": self.checkpoints,
78
- "humanize_steps": self.humanize_steps,
79
- }
80
-
81
- def to_plan(self) -> TaskPlan:
82
- """转换为 TaskPlan 以保持兼容性"""
83
- return TaskPlan(
84
- summary=self.summary,
85
- steps=[f"{a.action}: {a.target}" for a in self.actions],
86
- estimated_actions=len(self.actions),
87
- raw_response=self.raw_response,
88
- )
89
-
90
-
91
- @dataclass
92
- class Decision:
93
- """决策结果"""
94
-
95
- action: str # tap, swipe, type, scroll, back, home, launch
96
- target: str # 目标描述
97
- reasoning: str # 决策理由
98
- content: Optional[str] = None # 输入内容(type操作时使用)
99
- finished: bool = False
100
- raw_response: str = ""
101
-
102
- def to_dict(self) -> dict:
103
- return {
104
- "action": self.action,
105
- "target": self.target,
106
- "reasoning": self.reasoning,
107
- "content": self.content,
108
- "finished": self.finished,
109
- }
110
-
111
-
112
- class DecisionModel:
113
- """
114
- 决策大模型 - 负责任务分析和决策制定
115
-
116
- 使用 GLM-4.7 或其他高智商模型,通过文本理解屏幕状态,
117
- 制定操作决策并指导小模型执行。
118
- """
119
-
120
- def __init__(
121
- self,
122
- config: DecisionModelConfig,
123
- thinking_mode: ThinkingMode = ThinkingMode.DEEP,
124
- ):
125
- self.config = config
126
- self.thinking_mode = thinking_mode
127
- self.client = OpenAI(
128
- base_url=config.base_url,
129
- api_key=config.api_key,
130
- ) # type: ignore[call-arg]
131
- self.model_name = config.model_name
132
- self.conversation_history: list[dict] = []
133
- self.current_task: str = ""
134
-
135
- if thinking_mode == ThinkingMode.TURBO:
136
- self.system_prompt = DECISION_SYSTEM_PROMPT_TURBO
137
- elif thinking_mode == ThinkingMode.FAST:
138
- self.system_prompt = DECISION_SYSTEM_PROMPT_FAST
139
- else:
140
- self.system_prompt = DECISION_SYSTEM_PROMPT
141
-
142
- logger.info(
143
- f"决策大模型初始化: {config.model_name}, 模式: {thinking_mode.value}"
144
- )
145
-
146
- def _stream_completion(
147
- self,
148
- messages: list[dict],
149
- on_thinking: Optional[Callable[[str], None]] = None,
150
- on_answer: Optional[Callable[[str], None]] = None,
151
- ) -> str:
152
- """
153
- 流式调用大模型
154
-
155
- GLM-4.7 支持 reasoning_content 字段,可以分离思考过程和最终答案
156
- """
157
- logger.debug(f"调用决策大模型,消息数: {len(messages)}")
158
-
159
- try:
160
- response = self.client.chat.completions.create(
161
- model=self.model_name,
162
- messages=messages, # type: ignore[arg-type]
163
- max_tokens=self.config.max_tokens,
164
- temperature=self.config.temperature,
165
- stream=True,
166
- )
167
-
168
- full_reasoning = ""
169
- full_answer = ""
170
- done_reasoning = False
171
-
172
- for chunk in response:
173
- if chunk.choices:
174
- delta = chunk.choices[0].delta
175
-
176
- # 处理思考过程 (reasoning_content)
177
- reasoning_chunk = getattr(delta, "reasoning_content", None) or ""
178
- if reasoning_chunk:
179
- full_reasoning += reasoning_chunk
180
- if on_thinking:
181
- on_thinking(reasoning_chunk)
182
-
183
- # 处理最终答案 (content)
184
- answer_chunk = delta.content or ""
185
- if answer_chunk:
186
- if not done_reasoning and full_reasoning:
187
- done_reasoning = True
188
- logger.debug("思考阶段结束,开始输出答案")
189
-
190
- full_answer += answer_chunk
191
- if on_answer:
192
- on_answer(answer_chunk)
193
-
194
- # 如果模型不支持 reasoning_content,整个响应都在 content 中
195
- if not full_answer and full_reasoning:
196
- full_answer = full_reasoning
197
- full_reasoning = ""
198
-
199
- logger.debug(f"大模型响应完成,答案长度: {len(full_answer)}")
200
- return full_answer
201
-
202
- except Exception as e:
203
- logger.error(f"决策大模型调用失败: {e}")
204
- raise
205
-
206
- def analyze_task(
207
- self,
208
- task: str,
209
- on_thinking: Optional[Callable[[str], None]] = None,
210
- on_answer: Optional[Callable[[str], None]] = None,
211
- ) -> TaskPlan:
212
- """
213
- 分析用户任务,制定执行计划
214
-
215
- Args:
216
- task: 用户任务描述
217
- on_thinking: 思考过程回调
218
- on_answer: 答案输出回调
219
-
220
- Returns:
221
- TaskPlan: 任务执行计划
222
- """
223
- logger.info(f"分析任务: {task[:50]}... (模式: {self.thinking_mode.value})")
224
-
225
- # 构建消息(使用动态提示词)
226
- messages = [
227
- {"role": "system", "content": self.system_prompt},
228
- {
229
- "role": "user",
230
- "content": f"""请分析以下任务,并制定执行计划:
231
-
232
- 任务: {task}
233
-
234
- 请以JSON格式返回任务计划。""",
235
- },
236
- ]
237
-
238
- # 调用模型
239
- response = self._stream_completion(messages, on_thinking, on_answer)
240
-
241
- # 解析响应
242
- try:
243
- # 尝试提取JSON
244
- plan_data = self._extract_json(response)
245
-
246
- if plan_data.get("type") == "plan":
247
- plan = TaskPlan(
248
- summary=plan_data.get("summary", task),
249
- steps=plan_data.get("steps", []),
250
- estimated_actions=plan_data.get("estimated_actions", 5),
251
- raw_response=response,
252
- )
253
- else:
254
- # 回退处理
255
- plan = TaskPlan(
256
- summary=task,
257
- steps=[task],
258
- estimated_actions=5,
259
- raw_response=response,
260
- )
261
- except Exception as e:
262
- logger.warning(f"解析任务计划失败: {e}")
263
- plan = TaskPlan(
264
- summary=task,
265
- steps=[task],
266
- estimated_actions=5,
267
- raw_response=response,
268
- )
269
-
270
- # 初始化对话历史(使用动态提示词)
271
- self.conversation_history = [
272
- {"role": "system", "content": self.system_prompt},
273
- {"role": "user", "content": f"任务: {task}"},
274
- {"role": "assistant", "content": response},
275
- ]
276
-
277
- logger.info(f"任务计划: {plan.summary}, 预计 {plan.estimated_actions} 步")
278
- return plan
279
-
280
- def analyze_task_turbo(
281
- self,
282
- task: str,
283
- on_thinking: Optional[Callable[[str], None]] = None,
284
- on_answer: Optional[Callable[[str], None]] = None,
285
- ) -> ActionSequence:
286
- """
287
- TURBO模式:分析任务并生成完整操作序列
288
-
289
- 一次性生成所有操作步骤,视觉模型直接执行,只有异常时才重新调用。
290
-
291
- Args:
292
- task: 用户任务描述
293
- on_thinking: 思考过程回调
294
- on_answer: 答案输出回调
295
-
296
- Returns:
297
- ActionSequence: 操作序列
298
- """
299
- logger.info(f"[TURBO] 分析任务: {task[:50]}...")
300
- self.current_task = task
301
-
302
- messages = [
303
- {"role": "system", "content": self.system_prompt},
304
- {"role": "user", "content": f"任务: {task}\n\n请生成完整的操作序列。"},
305
- ]
306
-
307
- response = self._stream_completion(messages, on_thinking, on_answer)
308
-
309
- try:
310
- data = self._extract_json(response)
311
-
312
- if data.get("type") == "action_sequence":
313
- actions = []
314
- for a in data.get("actions", []):
315
- actions.append(
316
- ActionStep(
317
- action=a.get("action", "tap"),
318
- target=a.get("target", ""),
319
- content=a.get("content"),
320
- need_generate=a.get("need_generate", False),
321
- direction=a.get("direction"),
322
- )
323
- )
324
-
325
- sequence = ActionSequence(
326
- summary=data.get("summary", task),
327
- actions=actions,
328
- checkpoints=data.get("checkpoints", []),
329
- humanize_steps=data.get("humanize_steps", []),
330
- raw_response=response,
331
- )
332
- else:
333
- sequence = ActionSequence(
334
- summary=task,
335
- actions=[ActionStep(action="tap", target="未知")],
336
- raw_response=response,
337
- )
338
- except Exception as e:
339
- logger.warning(f"[TURBO] 解析操作序列失败: {e}")
340
- sequence = ActionSequence(
341
- summary=task,
342
- actions=[ActionStep(action="tap", target="未知")],
343
- raw_response=response,
344
- )
345
-
346
- self.conversation_history = [
347
- {"role": "system", "content": self.system_prompt},
348
- {"role": "user", "content": f"任务: {task}"},
349
- {"role": "assistant", "content": response},
350
- ]
351
-
352
- logger.info(f"[TURBO] 生成 {len(sequence.actions)} 个操作步骤")
353
- return sequence
354
-
355
- def replan(
356
- self,
357
- current_state: str,
358
- executed_actions: list[str],
359
- error_info: str,
360
- on_thinking: Optional[Callable[[str], None]] = None,
361
- on_answer: Optional[Callable[[str], None]] = None,
362
- ) -> ActionSequence:
363
- """
364
- TURBO模式:遇到问题时重新规划
365
-
366
- Args:
367
- current_state: 当前屏幕状态描述
368
- executed_actions: 已执行的操作列表
369
- error_info: 错误信息
370
- on_thinking: 思考过程回调
371
- on_answer: 答案输出回调
372
-
373
- Returns:
374
- ActionSequence: 新的操作序列
375
- """
376
- logger.info(f"[TURBO] 重新规划,错误: {error_info[:50]}...")
377
-
378
- prompt = DECISION_REPLAN_PROMPT.format(
379
- current_state=current_state,
380
- executed_actions="\n".join([f"- {a}" for a in executed_actions]),
381
- error_info=error_info,
382
- )
383
-
384
- self.conversation_history.append({"role": "user", "content": prompt})
385
- response = self._stream_completion(
386
- self.conversation_history, on_thinking, on_answer
387
- )
388
- self.conversation_history.append({"role": "assistant", "content": response})
389
-
390
- try:
391
- data = self._extract_json(response)
392
- actions = []
393
- for a in data.get("actions", []):
394
- actions.append(
395
- ActionStep(
396
- action=a.get("action", "tap"),
397
- target=a.get("target", ""),
398
- content=a.get("content"),
399
- need_generate=a.get("need_generate", False),
400
- direction=a.get("direction"),
401
- )
402
- )
403
-
404
- return ActionSequence(
405
- summary=data.get("summary", "重新规划"),
406
- actions=actions,
407
- checkpoints=data.get("checkpoints", []),
408
- humanize_steps=data.get("humanize_steps", []),
409
- raw_response=response,
410
- )
411
- except Exception as e:
412
- logger.warning(f"[TURBO] 解析重规划失败: {e}")
413
- return ActionSequence(
414
- summary="重新规划失败",
415
- actions=[],
416
- raw_response=response,
417
- )
418
-
419
- def generate_humanize_content(
420
- self,
421
- task_context: str,
422
- current_scene: str,
423
- content_type: str,
424
- on_thinking: Optional[Callable[[str], None]] = None,
425
- on_answer: Optional[Callable[[str], None]] = None,
426
- ) -> str:
427
- """
428
- 生成人性化内容(回复、评论、帖子等)
429
-
430
- Args:
431
- task_context: 任务背景
432
- current_scene: 当前场景描述
433
- content_type: 内容类型
434
- on_thinking: 思考过程回调
435
- on_answer: 答案输出回调
436
-
437
- Returns:
438
- str: 生成的内容
439
- """
440
- logger.info(f"[TURBO] 生成人性化内容: {content_type}")
441
-
442
- prompt = DECISION_HUMANIZE_PROMPT.format(
443
- task_context=task_context,
444
- current_scene=current_scene,
445
- content_type=content_type,
446
- )
447
-
448
- messages = [
449
- {
450
- "role": "system",
451
- "content": "你是一个社交媒体内容创作专家,擅长生成自然、真实、有个性的内容。",
452
- },
453
- {"role": "user", "content": prompt},
454
- ]
455
-
456
- content = self._stream_completion(messages, on_thinking, on_answer)
457
- content = content.strip()
458
- if content.startswith('"') and content.endswith('"'):
459
- content = content[1:-1]
460
-
461
- logger.info(f"[TURBO] 生成内容长度: {len(content)}")
462
- return content
463
-
464
- def make_decision(
465
- self,
466
- screen_description: str,
467
- task_context: Optional[str] = None,
468
- on_thinking: Optional[Callable[[str], None]] = None,
469
- on_answer: Optional[Callable[[str], None]] = None,
470
- ) -> Decision:
471
- """
472
- 根据屏幕描述做出决策
473
-
474
- Args:
475
- screen_description: 小模型提供的屏幕描述
476
- task_context: 额外的任务上下文
477
- on_thinking: 思考过程回调
478
- on_answer: 答案输出回调
479
-
480
- Returns:
481
- Decision: 决策结果
482
- """
483
- logger.info("正在做决策...")
484
-
485
- # 构建消息
486
- user_message = f"""当前屏幕状态:
487
- {screen_description}
488
-
489
- {f"补充信息: {task_context}" if task_context else ""}
490
-
491
- 请根据屏幕状态,决定下一步操作。以JSON格式返回决策。"""
492
-
493
- self.conversation_history.append({"role": "user", "content": user_message})
494
-
495
- # 调用模型
496
- response = self._stream_completion(
497
- self.conversation_history,
498
- on_thinking,
499
- on_answer,
500
- )
501
-
502
- # 保存助手响应
503
- self.conversation_history.append({"role": "assistant", "content": response})
504
-
505
- # 解析决策
506
- try:
507
- decision_data = self._extract_json(response)
508
-
509
- if decision_data.get("type") == "finish":
510
- decision = Decision(
511
- action="finish",
512
- target="",
513
- reasoning=decision_data.get("message", "任务完成"),
514
- finished=True,
515
- raw_response=response,
516
- )
517
- elif decision_data.get("type") == "decision":
518
- decision = Decision(
519
- action=decision_data.get("action", "tap"),
520
- target=decision_data.get("target", ""),
521
- reasoning=decision_data.get("reasoning", ""),
522
- content=decision_data.get("content"),
523
- finished=decision_data.get("finished", False),
524
- raw_response=response,
525
- )
526
- else:
527
- # 尝试直接解析为决策
528
- decision = Decision(
529
- action=decision_data.get("action", "tap"),
530
- target=decision_data.get("target", "未知目标"),
531
- reasoning=decision_data.get("reasoning", response),
532
- content=decision_data.get("content"),
533
- finished=decision_data.get("finished", False),
534
- raw_response=response,
535
- )
536
- except Exception as e:
537
- logger.warning(f"解析决策失败: {e}")
538
- # 回退:将整个响应作为reasoning
539
- decision = Decision(
540
- action="unknown",
541
- target="",
542
- reasoning=response,
543
- raw_response=response,
544
- )
545
-
546
- logger.info(f"决策: {decision.action} -> {decision.target}")
547
- return decision
548
-
549
- def generate_content(
550
- self,
551
- content_type: str,
552
- context: str,
553
- requirements: Optional[str] = None,
554
- on_thinking: Optional[Callable[[str], None]] = None,
555
- on_answer: Optional[Callable[[str], None]] = None,
556
- ) -> str:
557
- """
558
- 生成需要输入的内容(帖子、回复、消息等)
559
-
560
- Args:
561
- content_type: 内容类型(post, reply, message等)
562
- context: 上下文信息
563
- requirements: 具体要求
564
- on_thinking: 思考过程回调
565
- on_answer: 答案输出回调
566
-
567
- Returns:
568
- str: 生成的内容
569
- """
570
- logger.info(f"生成内容: {content_type}")
571
-
572
- prompt = f"""请为以下场景生成内容:
573
-
574
- 内容类型: {content_type}
575
- 上下文: {context}
576
- {f"具体要求: {requirements}" if requirements else ""}
577
-
578
- 请直接返回生成的内容文本,不需要JSON格式,不需要额外解释。"""
579
-
580
- messages = [
581
- {
582
- "role": "system",
583
- "content": "你是一个内容创作助手,擅长生成各类社交媒体内容。请直接返回内容,不要添加任何解释或格式标记。",
584
- },
585
- {"role": "user", "content": prompt},
586
- ]
587
-
588
- content = self._stream_completion(messages, on_thinking, on_answer)
589
-
590
- # 清理内容(移除可能的引号和格式标记)
591
- content = content.strip()
592
- if content.startswith('"') and content.endswith('"'):
593
- content = content[1:-1]
594
- if content.startswith("```") and content.endswith("```"):
595
- lines = content.split("\n")
596
- content = "\n".join(lines[1:-1])
597
-
598
- logger.info(f"生成内容完成,长度: {len(content)}")
599
- return content
600
-
601
- def _extract_json(self, text: str) -> dict:
602
- """从文本中提取JSON"""
603
- import re
604
-
605
- # 清理文本
606
- text = text.strip()
607
-
608
- # 尝试直接解析
609
- try:
610
- return json.loads(text)
611
- except json.JSONDecodeError:
612
- pass
613
-
614
- # 尝试提取 ```json ... ``` 代码块
615
- json_match = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
616
- if json_match:
617
- try:
618
- return json.loads(json_match.group(1))
619
- except json.JSONDecodeError:
620
- pass
621
-
622
- # 尝试提取 ``` ... ``` 代码块(不带json标记)
623
- code_match = re.search(r"```\s*(.*?)\s*```", text, re.DOTALL)
624
- if code_match:
625
- try:
626
- return json.loads(code_match.group(1))
627
- except json.JSONDecodeError:
628
- pass
629
-
630
- # 尝试找到第一个 { 并匹配到对应的 }
631
- start_idx = text.find("{")
632
- if start_idx != -1:
633
- brace_count = 0
634
- end_idx = start_idx
635
- for i in range(start_idx, len(text)):
636
- if text[i] == "{":
637
- brace_count += 1
638
- elif text[i] == "}":
639
- brace_count -= 1
640
- if brace_count == 0:
641
- end_idx = i + 1
642
- break
643
-
644
- if end_idx > start_idx:
645
- json_str = text[start_idx:end_idx]
646
- try:
647
- return json.loads(json_str)
648
- except json.JSONDecodeError:
649
- pass
650
-
651
- # 最后尝试用非贪婪正则提取
652
- brace_match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", text)
653
- if brace_match:
654
- try:
655
- return json.loads(brace_match.group(0))
656
- except json.JSONDecodeError:
657
- pass
658
-
659
- raise ValueError(f"无法从文本中提取JSON: {text[:100]}...")
660
-
661
- def reset(self):
662
- """重置对话历史"""
663
- self.conversation_history = []
664
- logger.info("决策大模型对话历史已重置")