autoglm-gui 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__init__.py +11 -0
- AutoGLM_GUI/__main__.py +26 -8
- AutoGLM_GUI/actions/__init__.py +6 -0
- AutoGLM_GUI/actions/handler.py +196 -0
- AutoGLM_GUI/actions/types.py +15 -0
- AutoGLM_GUI/adb/__init__.py +53 -0
- AutoGLM_GUI/adb/apps.py +227 -0
- AutoGLM_GUI/adb/connection.py +323 -0
- AutoGLM_GUI/adb/device.py +171 -0
- AutoGLM_GUI/adb/input.py +67 -0
- AutoGLM_GUI/adb/screenshot.py +11 -0
- AutoGLM_GUI/adb/timing.py +167 -0
- AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
- AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
- AutoGLM_GUI/adb_plus/screenshot.py +22 -1
- AutoGLM_GUI/adb_plus/serial.py +38 -20
- AutoGLM_GUI/adb_plus/touch.py +4 -9
- AutoGLM_GUI/agents/__init__.py +51 -0
- AutoGLM_GUI/agents/events.py +19 -0
- AutoGLM_GUI/agents/factory.py +153 -0
- AutoGLM_GUI/agents/glm/__init__.py +7 -0
- AutoGLM_GUI/agents/glm/agent.py +292 -0
- AutoGLM_GUI/agents/glm/message_builder.py +81 -0
- AutoGLM_GUI/agents/glm/parser.py +110 -0
- AutoGLM_GUI/agents/glm/prompts_en.py +77 -0
- AutoGLM_GUI/agents/glm/prompts_zh.py +75 -0
- AutoGLM_GUI/agents/mai/__init__.py +28 -0
- AutoGLM_GUI/agents/mai/agent.py +405 -0
- AutoGLM_GUI/agents/mai/parser.py +254 -0
- AutoGLM_GUI/agents/mai/prompts.py +103 -0
- AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
- AutoGLM_GUI/agents/protocols.py +27 -0
- AutoGLM_GUI/agents/stream_runner.py +188 -0
- AutoGLM_GUI/api/__init__.py +71 -11
- AutoGLM_GUI/api/agents.py +190 -229
- AutoGLM_GUI/api/control.py +9 -6
- AutoGLM_GUI/api/devices.py +112 -28
- AutoGLM_GUI/api/health.py +13 -0
- AutoGLM_GUI/api/history.py +78 -0
- AutoGLM_GUI/api/layered_agent.py +306 -181
- AutoGLM_GUI/api/mcp.py +11 -10
- AutoGLM_GUI/api/media.py +64 -1
- AutoGLM_GUI/api/scheduled_tasks.py +98 -0
- AutoGLM_GUI/api/version.py +23 -10
- AutoGLM_GUI/api/workflows.py +2 -1
- AutoGLM_GUI/config.py +72 -14
- AutoGLM_GUI/config_manager.py +98 -27
- AutoGLM_GUI/device_adapter.py +263 -0
- AutoGLM_GUI/device_manager.py +248 -29
- AutoGLM_GUI/device_protocol.py +266 -0
- AutoGLM_GUI/devices/__init__.py +49 -0
- AutoGLM_GUI/devices/adb_device.py +200 -0
- AutoGLM_GUI/devices/mock_device.py +185 -0
- AutoGLM_GUI/devices/remote_device.py +177 -0
- AutoGLM_GUI/exceptions.py +3 -3
- AutoGLM_GUI/history_manager.py +164 -0
- AutoGLM_GUI/i18n.py +81 -0
- AutoGLM_GUI/metrics.py +13 -20
- AutoGLM_GUI/model/__init__.py +5 -0
- AutoGLM_GUI/model/message_builder.py +69 -0
- AutoGLM_GUI/model/types.py +24 -0
- AutoGLM_GUI/models/__init__.py +10 -0
- AutoGLM_GUI/models/history.py +96 -0
- AutoGLM_GUI/models/scheduled_task.py +71 -0
- AutoGLM_GUI/parsers/__init__.py +22 -0
- AutoGLM_GUI/parsers/base.py +50 -0
- AutoGLM_GUI/parsers/phone_parser.py +58 -0
- AutoGLM_GUI/phone_agent_manager.py +118 -367
- AutoGLM_GUI/platform_utils.py +31 -2
- AutoGLM_GUI/prompt_config.py +15 -0
- AutoGLM_GUI/prompts/__init__.py +32 -0
- AutoGLM_GUI/scheduler_manager.py +304 -0
- AutoGLM_GUI/schemas.py +272 -63
- AutoGLM_GUI/scrcpy_stream.py +159 -37
- AutoGLM_GUI/server.py +3 -1
- AutoGLM_GUI/socketio_server.py +114 -29
- AutoGLM_GUI/state.py +10 -30
- AutoGLM_GUI/static/assets/{about-DeclntHg.js → about-BQm96DAl.js} +1 -1
- AutoGLM_GUI/static/assets/alert-dialog-B42XxGPR.js +1 -0
- AutoGLM_GUI/static/assets/chat-C0L2gQYG.js +129 -0
- AutoGLM_GUI/static/assets/circle-alert-D4rSJh37.js +1 -0
- AutoGLM_GUI/static/assets/dialog-DZ78cEcj.js +45 -0
- AutoGLM_GUI/static/assets/history-DFBv7TGc.js +1 -0
- AutoGLM_GUI/static/assets/index-Bzyv2yQ2.css +1 -0
- AutoGLM_GUI/static/assets/{index-zQ4KKDHt.js → index-CmZSnDqc.js} +1 -1
- AutoGLM_GUI/static/assets/index-CssG-3TH.js +11 -0
- AutoGLM_GUI/static/assets/label-BCUzE_nm.js +1 -0
- AutoGLM_GUI/static/assets/logs-eoFxn5of.js +1 -0
- AutoGLM_GUI/static/assets/popover-DLsuV5Sx.js +1 -0
- AutoGLM_GUI/static/assets/scheduled-tasks-MyqGJvy_.js +1 -0
- AutoGLM_GUI/static/assets/square-pen-zGWYrdfj.js +1 -0
- AutoGLM_GUI/static/assets/textarea-BX6y7uM5.js +1 -0
- AutoGLM_GUI/static/assets/workflows-CYFs6ssC.js +1 -0
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +142 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/METADATA +178 -92
- autoglm_gui-1.5.0.dist-info/RECORD +157 -0
- mai_agent/base.py +137 -0
- mai_agent/mai_grounding_agent.py +263 -0
- mai_agent/mai_naivigation_agent.py +526 -0
- mai_agent/prompt.py +148 -0
- mai_agent/unified_memory.py +67 -0
- mai_agent/utils.py +73 -0
- AutoGLM_GUI/api/dual_model.py +0 -311
- AutoGLM_GUI/dual_model/__init__.py +0 -53
- AutoGLM_GUI/dual_model/decision_model.py +0 -664
- AutoGLM_GUI/dual_model/dual_agent.py +0 -917
- AutoGLM_GUI/dual_model/protocols.py +0 -354
- AutoGLM_GUI/dual_model/vision_model.py +0 -442
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
- AutoGLM_GUI/phone_agent_patches.py +0 -146
- AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +0 -125
- AutoGLM_GUI/static/assets/dialog-BfdcBs1x.js +0 -45
- AutoGLM_GUI/static/assets/index-5hCCwHA7.css +0 -1
- AutoGLM_GUI/static/assets/index-DHF1NZh0.js +0 -12
- AutoGLM_GUI/static/assets/workflows-xiplap-r.js +0 -1
- autoglm_gui-1.4.0.dist-info/RECORD +0 -100
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.4.0.dist-info → autoglm_gui-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,664 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
决策大模型客户端
|
|
3
|
-
|
|
4
|
-
调用 GLM-4.7 进行任务分析和决策
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
from dataclasses import dataclass, field
|
|
9
|
-
from typing import Callable, Optional
|
|
10
|
-
|
|
11
|
-
from openai import OpenAI
|
|
12
|
-
|
|
13
|
-
from AutoGLM_GUI.logger import logger
|
|
14
|
-
from .protocols import (
|
|
15
|
-
DecisionModelConfig,
|
|
16
|
-
DECISION_SYSTEM_PROMPT,
|
|
17
|
-
DECISION_SYSTEM_PROMPT_FAST,
|
|
18
|
-
DECISION_SYSTEM_PROMPT_TURBO,
|
|
19
|
-
DECISION_REPLAN_PROMPT,
|
|
20
|
-
DECISION_HUMANIZE_PROMPT,
|
|
21
|
-
ThinkingMode,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@dataclass
|
|
26
|
-
class TaskPlan:
|
|
27
|
-
"""任务计划"""
|
|
28
|
-
|
|
29
|
-
summary: str
|
|
30
|
-
steps: list[str]
|
|
31
|
-
estimated_actions: int
|
|
32
|
-
raw_response: str = ""
|
|
33
|
-
|
|
34
|
-
def to_dict(self) -> dict:
|
|
35
|
-
return {
|
|
36
|
-
"summary": self.summary,
|
|
37
|
-
"steps": self.steps,
|
|
38
|
-
"estimated_actions": self.estimated_actions,
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class ActionStep:
|
|
44
|
-
"""单个操作步骤"""
|
|
45
|
-
|
|
46
|
-
action: str
|
|
47
|
-
target: str = ""
|
|
48
|
-
content: Optional[str] = None
|
|
49
|
-
need_generate: bool = False
|
|
50
|
-
direction: Optional[str] = None
|
|
51
|
-
|
|
52
|
-
def to_dict(self) -> dict:
|
|
53
|
-
result = {"action": self.action, "target": self.target}
|
|
54
|
-
if self.content:
|
|
55
|
-
result["content"] = self.content
|
|
56
|
-
if self.need_generate:
|
|
57
|
-
result["need_generate"] = True
|
|
58
|
-
if self.direction:
|
|
59
|
-
result["direction"] = self.direction
|
|
60
|
-
return result
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@dataclass
|
|
64
|
-
class ActionSequence:
|
|
65
|
-
"""操作序列(TURBO模式)"""
|
|
66
|
-
|
|
67
|
-
summary: str
|
|
68
|
-
actions: list[ActionStep]
|
|
69
|
-
checkpoints: list[str] = field(default_factory=list)
|
|
70
|
-
humanize_steps: list[int] = field(default_factory=list)
|
|
71
|
-
raw_response: str = ""
|
|
72
|
-
|
|
73
|
-
def to_dict(self) -> dict:
|
|
74
|
-
return {
|
|
75
|
-
"summary": self.summary,
|
|
76
|
-
"actions": [a.to_dict() for a in self.actions],
|
|
77
|
-
"checkpoints": self.checkpoints,
|
|
78
|
-
"humanize_steps": self.humanize_steps,
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
def to_plan(self) -> TaskPlan:
|
|
82
|
-
"""转换为 TaskPlan 以保持兼容性"""
|
|
83
|
-
return TaskPlan(
|
|
84
|
-
summary=self.summary,
|
|
85
|
-
steps=[f"{a.action}: {a.target}" for a in self.actions],
|
|
86
|
-
estimated_actions=len(self.actions),
|
|
87
|
-
raw_response=self.raw_response,
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
@dataclass
|
|
92
|
-
class Decision:
|
|
93
|
-
"""决策结果"""
|
|
94
|
-
|
|
95
|
-
action: str # tap, swipe, type, scroll, back, home, launch
|
|
96
|
-
target: str # 目标描述
|
|
97
|
-
reasoning: str # 决策理由
|
|
98
|
-
content: Optional[str] = None # 输入内容(type操作时使用)
|
|
99
|
-
finished: bool = False
|
|
100
|
-
raw_response: str = ""
|
|
101
|
-
|
|
102
|
-
def to_dict(self) -> dict:
|
|
103
|
-
return {
|
|
104
|
-
"action": self.action,
|
|
105
|
-
"target": self.target,
|
|
106
|
-
"reasoning": self.reasoning,
|
|
107
|
-
"content": self.content,
|
|
108
|
-
"finished": self.finished,
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
class DecisionModel:
|
|
113
|
-
"""
|
|
114
|
-
决策大模型 - 负责任务分析和决策制定
|
|
115
|
-
|
|
116
|
-
使用 GLM-4.7 或其他高智商模型,通过文本理解屏幕状态,
|
|
117
|
-
制定操作决策并指导小模型执行。
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
def __init__(
|
|
121
|
-
self,
|
|
122
|
-
config: DecisionModelConfig,
|
|
123
|
-
thinking_mode: ThinkingMode = ThinkingMode.DEEP,
|
|
124
|
-
):
|
|
125
|
-
self.config = config
|
|
126
|
-
self.thinking_mode = thinking_mode
|
|
127
|
-
self.client = OpenAI(
|
|
128
|
-
base_url=config.base_url,
|
|
129
|
-
api_key=config.api_key,
|
|
130
|
-
)
|
|
131
|
-
self.model_name = config.model_name
|
|
132
|
-
self.conversation_history: list[dict] = []
|
|
133
|
-
self.current_task: str = ""
|
|
134
|
-
|
|
135
|
-
if thinking_mode == ThinkingMode.TURBO:
|
|
136
|
-
self.system_prompt = DECISION_SYSTEM_PROMPT_TURBO
|
|
137
|
-
elif thinking_mode == ThinkingMode.FAST:
|
|
138
|
-
self.system_prompt = DECISION_SYSTEM_PROMPT_FAST
|
|
139
|
-
else:
|
|
140
|
-
self.system_prompt = DECISION_SYSTEM_PROMPT
|
|
141
|
-
|
|
142
|
-
logger.info(
|
|
143
|
-
f"决策大模型初始化: {config.model_name}, 模式: {thinking_mode.value}"
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
def _stream_completion(
|
|
147
|
-
self,
|
|
148
|
-
messages: list[dict],
|
|
149
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
150
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
151
|
-
) -> str:
|
|
152
|
-
"""
|
|
153
|
-
流式调用大模型
|
|
154
|
-
|
|
155
|
-
GLM-4.7 支持 reasoning_content 字段,可以分离思考过程和最终答案
|
|
156
|
-
"""
|
|
157
|
-
logger.debug(f"调用决策大模型,消息数: {len(messages)}")
|
|
158
|
-
|
|
159
|
-
try:
|
|
160
|
-
response = self.client.chat.completions.create(
|
|
161
|
-
model=self.model_name,
|
|
162
|
-
messages=messages,
|
|
163
|
-
max_tokens=self.config.max_tokens,
|
|
164
|
-
temperature=self.config.temperature,
|
|
165
|
-
stream=True,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
full_reasoning = ""
|
|
169
|
-
full_answer = ""
|
|
170
|
-
done_reasoning = False
|
|
171
|
-
|
|
172
|
-
for chunk in response:
|
|
173
|
-
if chunk.choices:
|
|
174
|
-
delta = chunk.choices[0].delta
|
|
175
|
-
|
|
176
|
-
# 处理思考过程 (reasoning_content)
|
|
177
|
-
reasoning_chunk = getattr(delta, "reasoning_content", None) or ""
|
|
178
|
-
if reasoning_chunk:
|
|
179
|
-
full_reasoning += reasoning_chunk
|
|
180
|
-
if on_thinking:
|
|
181
|
-
on_thinking(reasoning_chunk)
|
|
182
|
-
|
|
183
|
-
# 处理最终答案 (content)
|
|
184
|
-
answer_chunk = delta.content or ""
|
|
185
|
-
if answer_chunk:
|
|
186
|
-
if not done_reasoning and full_reasoning:
|
|
187
|
-
done_reasoning = True
|
|
188
|
-
logger.debug("思考阶段结束,开始输出答案")
|
|
189
|
-
|
|
190
|
-
full_answer += answer_chunk
|
|
191
|
-
if on_answer:
|
|
192
|
-
on_answer(answer_chunk)
|
|
193
|
-
|
|
194
|
-
# 如果模型不支持 reasoning_content,整个响应都在 content 中
|
|
195
|
-
if not full_answer and full_reasoning:
|
|
196
|
-
full_answer = full_reasoning
|
|
197
|
-
full_reasoning = ""
|
|
198
|
-
|
|
199
|
-
logger.debug(f"大模型响应完成,答案长度: {len(full_answer)}")
|
|
200
|
-
return full_answer
|
|
201
|
-
|
|
202
|
-
except Exception as e:
|
|
203
|
-
logger.error(f"决策大模型调用失败: {e}")
|
|
204
|
-
raise
|
|
205
|
-
|
|
206
|
-
def analyze_task(
|
|
207
|
-
self,
|
|
208
|
-
task: str,
|
|
209
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
210
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
211
|
-
) -> TaskPlan:
|
|
212
|
-
"""
|
|
213
|
-
分析用户任务,制定执行计划
|
|
214
|
-
|
|
215
|
-
Args:
|
|
216
|
-
task: 用户任务描述
|
|
217
|
-
on_thinking: 思考过程回调
|
|
218
|
-
on_answer: 答案输出回调
|
|
219
|
-
|
|
220
|
-
Returns:
|
|
221
|
-
TaskPlan: 任务执行计划
|
|
222
|
-
"""
|
|
223
|
-
logger.info(f"分析任务: {task[:50]}... (模式: {self.thinking_mode.value})")
|
|
224
|
-
|
|
225
|
-
# 构建消息(使用动态提示词)
|
|
226
|
-
messages = [
|
|
227
|
-
{"role": "system", "content": self.system_prompt},
|
|
228
|
-
{
|
|
229
|
-
"role": "user",
|
|
230
|
-
"content": f"""请分析以下任务,并制定执行计划:
|
|
231
|
-
|
|
232
|
-
任务: {task}
|
|
233
|
-
|
|
234
|
-
请以JSON格式返回任务计划。""",
|
|
235
|
-
},
|
|
236
|
-
]
|
|
237
|
-
|
|
238
|
-
# 调用模型
|
|
239
|
-
response = self._stream_completion(messages, on_thinking, on_answer)
|
|
240
|
-
|
|
241
|
-
# 解析响应
|
|
242
|
-
try:
|
|
243
|
-
# 尝试提取JSON
|
|
244
|
-
plan_data = self._extract_json(response)
|
|
245
|
-
|
|
246
|
-
if plan_data.get("type") == "plan":
|
|
247
|
-
plan = TaskPlan(
|
|
248
|
-
summary=plan_data.get("summary", task),
|
|
249
|
-
steps=plan_data.get("steps", []),
|
|
250
|
-
estimated_actions=plan_data.get("estimated_actions", 5),
|
|
251
|
-
raw_response=response,
|
|
252
|
-
)
|
|
253
|
-
else:
|
|
254
|
-
# 回退处理
|
|
255
|
-
plan = TaskPlan(
|
|
256
|
-
summary=task,
|
|
257
|
-
steps=[task],
|
|
258
|
-
estimated_actions=5,
|
|
259
|
-
raw_response=response,
|
|
260
|
-
)
|
|
261
|
-
except Exception as e:
|
|
262
|
-
logger.warning(f"解析任务计划失败: {e}")
|
|
263
|
-
plan = TaskPlan(
|
|
264
|
-
summary=task,
|
|
265
|
-
steps=[task],
|
|
266
|
-
estimated_actions=5,
|
|
267
|
-
raw_response=response,
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
# 初始化对话历史(使用动态提示词)
|
|
271
|
-
self.conversation_history = [
|
|
272
|
-
{"role": "system", "content": self.system_prompt},
|
|
273
|
-
{"role": "user", "content": f"任务: {task}"},
|
|
274
|
-
{"role": "assistant", "content": response},
|
|
275
|
-
]
|
|
276
|
-
|
|
277
|
-
logger.info(f"任务计划: {plan.summary}, 预计 {plan.estimated_actions} 步")
|
|
278
|
-
return plan
|
|
279
|
-
|
|
280
|
-
def analyze_task_turbo(
|
|
281
|
-
self,
|
|
282
|
-
task: str,
|
|
283
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
284
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
285
|
-
) -> ActionSequence:
|
|
286
|
-
"""
|
|
287
|
-
TURBO模式:分析任务并生成完整操作序列
|
|
288
|
-
|
|
289
|
-
一次性生成所有操作步骤,视觉模型直接执行,只有异常时才重新调用。
|
|
290
|
-
|
|
291
|
-
Args:
|
|
292
|
-
task: 用户任务描述
|
|
293
|
-
on_thinking: 思考过程回调
|
|
294
|
-
on_answer: 答案输出回调
|
|
295
|
-
|
|
296
|
-
Returns:
|
|
297
|
-
ActionSequence: 操作序列
|
|
298
|
-
"""
|
|
299
|
-
logger.info(f"[TURBO] 分析任务: {task[:50]}...")
|
|
300
|
-
self.current_task = task
|
|
301
|
-
|
|
302
|
-
messages = [
|
|
303
|
-
{"role": "system", "content": self.system_prompt},
|
|
304
|
-
{"role": "user", "content": f"任务: {task}\n\n请生成完整的操作序列。"},
|
|
305
|
-
]
|
|
306
|
-
|
|
307
|
-
response = self._stream_completion(messages, on_thinking, on_answer)
|
|
308
|
-
|
|
309
|
-
try:
|
|
310
|
-
data = self._extract_json(response)
|
|
311
|
-
|
|
312
|
-
if data.get("type") == "action_sequence":
|
|
313
|
-
actions = []
|
|
314
|
-
for a in data.get("actions", []):
|
|
315
|
-
actions.append(
|
|
316
|
-
ActionStep(
|
|
317
|
-
action=a.get("action", "tap"),
|
|
318
|
-
target=a.get("target", ""),
|
|
319
|
-
content=a.get("content"),
|
|
320
|
-
need_generate=a.get("need_generate", False),
|
|
321
|
-
direction=a.get("direction"),
|
|
322
|
-
)
|
|
323
|
-
)
|
|
324
|
-
|
|
325
|
-
sequence = ActionSequence(
|
|
326
|
-
summary=data.get("summary", task),
|
|
327
|
-
actions=actions,
|
|
328
|
-
checkpoints=data.get("checkpoints", []),
|
|
329
|
-
humanize_steps=data.get("humanize_steps", []),
|
|
330
|
-
raw_response=response,
|
|
331
|
-
)
|
|
332
|
-
else:
|
|
333
|
-
sequence = ActionSequence(
|
|
334
|
-
summary=task,
|
|
335
|
-
actions=[ActionStep(action="tap", target="未知")],
|
|
336
|
-
raw_response=response,
|
|
337
|
-
)
|
|
338
|
-
except Exception as e:
|
|
339
|
-
logger.warning(f"[TURBO] 解析操作序列失败: {e}")
|
|
340
|
-
sequence = ActionSequence(
|
|
341
|
-
summary=task,
|
|
342
|
-
actions=[ActionStep(action="tap", target="未知")],
|
|
343
|
-
raw_response=response,
|
|
344
|
-
)
|
|
345
|
-
|
|
346
|
-
self.conversation_history = [
|
|
347
|
-
{"role": "system", "content": self.system_prompt},
|
|
348
|
-
{"role": "user", "content": f"任务: {task}"},
|
|
349
|
-
{"role": "assistant", "content": response},
|
|
350
|
-
]
|
|
351
|
-
|
|
352
|
-
logger.info(f"[TURBO] 生成 {len(sequence.actions)} 个操作步骤")
|
|
353
|
-
return sequence
|
|
354
|
-
|
|
355
|
-
def replan(
|
|
356
|
-
self,
|
|
357
|
-
current_state: str,
|
|
358
|
-
executed_actions: list[str],
|
|
359
|
-
error_info: str,
|
|
360
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
361
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
362
|
-
) -> ActionSequence:
|
|
363
|
-
"""
|
|
364
|
-
TURBO模式:遇到问题时重新规划
|
|
365
|
-
|
|
366
|
-
Args:
|
|
367
|
-
current_state: 当前屏幕状态描述
|
|
368
|
-
executed_actions: 已执行的操作列表
|
|
369
|
-
error_info: 错误信息
|
|
370
|
-
on_thinking: 思考过程回调
|
|
371
|
-
on_answer: 答案输出回调
|
|
372
|
-
|
|
373
|
-
Returns:
|
|
374
|
-
ActionSequence: 新的操作序列
|
|
375
|
-
"""
|
|
376
|
-
logger.info(f"[TURBO] 重新规划,错误: {error_info[:50]}...")
|
|
377
|
-
|
|
378
|
-
prompt = DECISION_REPLAN_PROMPT.format(
|
|
379
|
-
current_state=current_state,
|
|
380
|
-
executed_actions="\n".join([f"- {a}" for a in executed_actions]),
|
|
381
|
-
error_info=error_info,
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
self.conversation_history.append({"role": "user", "content": prompt})
|
|
385
|
-
response = self._stream_completion(
|
|
386
|
-
self.conversation_history, on_thinking, on_answer
|
|
387
|
-
)
|
|
388
|
-
self.conversation_history.append({"role": "assistant", "content": response})
|
|
389
|
-
|
|
390
|
-
try:
|
|
391
|
-
data = self._extract_json(response)
|
|
392
|
-
actions = []
|
|
393
|
-
for a in data.get("actions", []):
|
|
394
|
-
actions.append(
|
|
395
|
-
ActionStep(
|
|
396
|
-
action=a.get("action", "tap"),
|
|
397
|
-
target=a.get("target", ""),
|
|
398
|
-
content=a.get("content"),
|
|
399
|
-
need_generate=a.get("need_generate", False),
|
|
400
|
-
direction=a.get("direction"),
|
|
401
|
-
)
|
|
402
|
-
)
|
|
403
|
-
|
|
404
|
-
return ActionSequence(
|
|
405
|
-
summary=data.get("summary", "重新规划"),
|
|
406
|
-
actions=actions,
|
|
407
|
-
checkpoints=data.get("checkpoints", []),
|
|
408
|
-
humanize_steps=data.get("humanize_steps", []),
|
|
409
|
-
raw_response=response,
|
|
410
|
-
)
|
|
411
|
-
except Exception as e:
|
|
412
|
-
logger.warning(f"[TURBO] 解析重规划失败: {e}")
|
|
413
|
-
return ActionSequence(
|
|
414
|
-
summary="重新规划失败",
|
|
415
|
-
actions=[],
|
|
416
|
-
raw_response=response,
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
def generate_humanize_content(
|
|
420
|
-
self,
|
|
421
|
-
task_context: str,
|
|
422
|
-
current_scene: str,
|
|
423
|
-
content_type: str,
|
|
424
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
425
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
426
|
-
) -> str:
|
|
427
|
-
"""
|
|
428
|
-
生成人性化内容(回复、评论、帖子等)
|
|
429
|
-
|
|
430
|
-
Args:
|
|
431
|
-
task_context: 任务背景
|
|
432
|
-
current_scene: 当前场景描述
|
|
433
|
-
content_type: 内容类型
|
|
434
|
-
on_thinking: 思考过程回调
|
|
435
|
-
on_answer: 答案输出回调
|
|
436
|
-
|
|
437
|
-
Returns:
|
|
438
|
-
str: 生成的内容
|
|
439
|
-
"""
|
|
440
|
-
logger.info(f"[TURBO] 生成人性化内容: {content_type}")
|
|
441
|
-
|
|
442
|
-
prompt = DECISION_HUMANIZE_PROMPT.format(
|
|
443
|
-
task_context=task_context,
|
|
444
|
-
current_scene=current_scene,
|
|
445
|
-
content_type=content_type,
|
|
446
|
-
)
|
|
447
|
-
|
|
448
|
-
messages = [
|
|
449
|
-
{
|
|
450
|
-
"role": "system",
|
|
451
|
-
"content": "你是一个社交媒体内容创作专家,擅长生成自然、真实、有个性的内容。",
|
|
452
|
-
},
|
|
453
|
-
{"role": "user", "content": prompt},
|
|
454
|
-
]
|
|
455
|
-
|
|
456
|
-
content = self._stream_completion(messages, on_thinking, on_answer)
|
|
457
|
-
content = content.strip()
|
|
458
|
-
if content.startswith('"') and content.endswith('"'):
|
|
459
|
-
content = content[1:-1]
|
|
460
|
-
|
|
461
|
-
logger.info(f"[TURBO] 生成内容长度: {len(content)}")
|
|
462
|
-
return content
|
|
463
|
-
|
|
464
|
-
def make_decision(
|
|
465
|
-
self,
|
|
466
|
-
screen_description: str,
|
|
467
|
-
task_context: Optional[str] = None,
|
|
468
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
469
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
470
|
-
) -> Decision:
|
|
471
|
-
"""
|
|
472
|
-
根据屏幕描述做出决策
|
|
473
|
-
|
|
474
|
-
Args:
|
|
475
|
-
screen_description: 小模型提供的屏幕描述
|
|
476
|
-
task_context: 额外的任务上下文
|
|
477
|
-
on_thinking: 思考过程回调
|
|
478
|
-
on_answer: 答案输出回调
|
|
479
|
-
|
|
480
|
-
Returns:
|
|
481
|
-
Decision: 决策结果
|
|
482
|
-
"""
|
|
483
|
-
logger.info("正在做决策...")
|
|
484
|
-
|
|
485
|
-
# 构建消息
|
|
486
|
-
user_message = f"""当前屏幕状态:
|
|
487
|
-
{screen_description}
|
|
488
|
-
|
|
489
|
-
{f"补充信息: {task_context}" if task_context else ""}
|
|
490
|
-
|
|
491
|
-
请根据屏幕状态,决定下一步操作。以JSON格式返回决策。"""
|
|
492
|
-
|
|
493
|
-
self.conversation_history.append({"role": "user", "content": user_message})
|
|
494
|
-
|
|
495
|
-
# 调用模型
|
|
496
|
-
response = self._stream_completion(
|
|
497
|
-
self.conversation_history,
|
|
498
|
-
on_thinking,
|
|
499
|
-
on_answer,
|
|
500
|
-
)
|
|
501
|
-
|
|
502
|
-
# 保存助手响应
|
|
503
|
-
self.conversation_history.append({"role": "assistant", "content": response})
|
|
504
|
-
|
|
505
|
-
# 解析决策
|
|
506
|
-
try:
|
|
507
|
-
decision_data = self._extract_json(response)
|
|
508
|
-
|
|
509
|
-
if decision_data.get("type") == "finish":
|
|
510
|
-
decision = Decision(
|
|
511
|
-
action="finish",
|
|
512
|
-
target="",
|
|
513
|
-
reasoning=decision_data.get("message", "任务完成"),
|
|
514
|
-
finished=True,
|
|
515
|
-
raw_response=response,
|
|
516
|
-
)
|
|
517
|
-
elif decision_data.get("type") == "decision":
|
|
518
|
-
decision = Decision(
|
|
519
|
-
action=decision_data.get("action", "tap"),
|
|
520
|
-
target=decision_data.get("target", ""),
|
|
521
|
-
reasoning=decision_data.get("reasoning", ""),
|
|
522
|
-
content=decision_data.get("content"),
|
|
523
|
-
finished=decision_data.get("finished", False),
|
|
524
|
-
raw_response=response,
|
|
525
|
-
)
|
|
526
|
-
else:
|
|
527
|
-
# 尝试直接解析为决策
|
|
528
|
-
decision = Decision(
|
|
529
|
-
action=decision_data.get("action", "tap"),
|
|
530
|
-
target=decision_data.get("target", "未知目标"),
|
|
531
|
-
reasoning=decision_data.get("reasoning", response),
|
|
532
|
-
content=decision_data.get("content"),
|
|
533
|
-
finished=decision_data.get("finished", False),
|
|
534
|
-
raw_response=response,
|
|
535
|
-
)
|
|
536
|
-
except Exception as e:
|
|
537
|
-
logger.warning(f"解析决策失败: {e}")
|
|
538
|
-
# 回退:将整个响应作为reasoning
|
|
539
|
-
decision = Decision(
|
|
540
|
-
action="unknown",
|
|
541
|
-
target="",
|
|
542
|
-
reasoning=response,
|
|
543
|
-
raw_response=response,
|
|
544
|
-
)
|
|
545
|
-
|
|
546
|
-
logger.info(f"决策: {decision.action} -> {decision.target}")
|
|
547
|
-
return decision
|
|
548
|
-
|
|
549
|
-
def generate_content(
|
|
550
|
-
self,
|
|
551
|
-
content_type: str,
|
|
552
|
-
context: str,
|
|
553
|
-
requirements: Optional[str] = None,
|
|
554
|
-
on_thinking: Optional[Callable[[str], None]] = None,
|
|
555
|
-
on_answer: Optional[Callable[[str], None]] = None,
|
|
556
|
-
) -> str:
|
|
557
|
-
"""
|
|
558
|
-
生成需要输入的内容(帖子、回复、消息等)
|
|
559
|
-
|
|
560
|
-
Args:
|
|
561
|
-
content_type: 内容类型(post, reply, message等)
|
|
562
|
-
context: 上下文信息
|
|
563
|
-
requirements: 具体要求
|
|
564
|
-
on_thinking: 思考过程回调
|
|
565
|
-
on_answer: 答案输出回调
|
|
566
|
-
|
|
567
|
-
Returns:
|
|
568
|
-
str: 生成的内容
|
|
569
|
-
"""
|
|
570
|
-
logger.info(f"生成内容: {content_type}")
|
|
571
|
-
|
|
572
|
-
prompt = f"""请为以下场景生成内容:
|
|
573
|
-
|
|
574
|
-
内容类型: {content_type}
|
|
575
|
-
上下文: {context}
|
|
576
|
-
{f"具体要求: {requirements}" if requirements else ""}
|
|
577
|
-
|
|
578
|
-
请直接返回生成的内容文本,不需要JSON格式,不需要额外解释。"""
|
|
579
|
-
|
|
580
|
-
messages = [
|
|
581
|
-
{
|
|
582
|
-
"role": "system",
|
|
583
|
-
"content": "你是一个内容创作助手,擅长生成各类社交媒体内容。请直接返回内容,不要添加任何解释或格式标记。",
|
|
584
|
-
},
|
|
585
|
-
{"role": "user", "content": prompt},
|
|
586
|
-
]
|
|
587
|
-
|
|
588
|
-
content = self._stream_completion(messages, on_thinking, on_answer)
|
|
589
|
-
|
|
590
|
-
# 清理内容(移除可能的引号和格式标记)
|
|
591
|
-
content = content.strip()
|
|
592
|
-
if content.startswith('"') and content.endswith('"'):
|
|
593
|
-
content = content[1:-1]
|
|
594
|
-
if content.startswith("```") and content.endswith("```"):
|
|
595
|
-
lines = content.split("\n")
|
|
596
|
-
content = "\n".join(lines[1:-1])
|
|
597
|
-
|
|
598
|
-
logger.info(f"生成内容完成,长度: {len(content)}")
|
|
599
|
-
return content
|
|
600
|
-
|
|
601
|
-
def _extract_json(self, text: str) -> dict:
|
|
602
|
-
"""从文本中提取JSON"""
|
|
603
|
-
import re
|
|
604
|
-
|
|
605
|
-
# 清理文本
|
|
606
|
-
text = text.strip()
|
|
607
|
-
|
|
608
|
-
# 尝试直接解析
|
|
609
|
-
try:
|
|
610
|
-
return json.loads(text)
|
|
611
|
-
except json.JSONDecodeError:
|
|
612
|
-
pass
|
|
613
|
-
|
|
614
|
-
# 尝试提取 ```json ... ``` 代码块
|
|
615
|
-
json_match = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
|
|
616
|
-
if json_match:
|
|
617
|
-
try:
|
|
618
|
-
return json.loads(json_match.group(1))
|
|
619
|
-
except json.JSONDecodeError:
|
|
620
|
-
pass
|
|
621
|
-
|
|
622
|
-
# 尝试提取 ``` ... ``` 代码块(不带json标记)
|
|
623
|
-
code_match = re.search(r"```\s*(.*?)\s*```", text, re.DOTALL)
|
|
624
|
-
if code_match:
|
|
625
|
-
try:
|
|
626
|
-
return json.loads(code_match.group(1))
|
|
627
|
-
except json.JSONDecodeError:
|
|
628
|
-
pass
|
|
629
|
-
|
|
630
|
-
# 尝试找到第一个 { 并匹配到对应的 }
|
|
631
|
-
start_idx = text.find("{")
|
|
632
|
-
if start_idx != -1:
|
|
633
|
-
brace_count = 0
|
|
634
|
-
end_idx = start_idx
|
|
635
|
-
for i in range(start_idx, len(text)):
|
|
636
|
-
if text[i] == "{":
|
|
637
|
-
brace_count += 1
|
|
638
|
-
elif text[i] == "}":
|
|
639
|
-
brace_count -= 1
|
|
640
|
-
if brace_count == 0:
|
|
641
|
-
end_idx = i + 1
|
|
642
|
-
break
|
|
643
|
-
|
|
644
|
-
if end_idx > start_idx:
|
|
645
|
-
json_str = text[start_idx:end_idx]
|
|
646
|
-
try:
|
|
647
|
-
return json.loads(json_str)
|
|
648
|
-
except json.JSONDecodeError:
|
|
649
|
-
pass
|
|
650
|
-
|
|
651
|
-
# 最后尝试用非贪婪正则提取
|
|
652
|
-
brace_match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", text)
|
|
653
|
-
if brace_match:
|
|
654
|
-
try:
|
|
655
|
-
return json.loads(brace_match.group(0))
|
|
656
|
-
except json.JSONDecodeError:
|
|
657
|
-
pass
|
|
658
|
-
|
|
659
|
-
raise ValueError(f"无法从文本中提取JSON: {text[:100]}...")
|
|
660
|
-
|
|
661
|
-
def reset(self):
|
|
662
|
-
"""重置对话历史"""
|
|
663
|
-
self.conversation_history = []
|
|
664
|
-
logger.info("决策大模型对话历史已重置")
|