autoglm-gui 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__init__.py +11 -0
- AutoGLM_GUI/__main__.py +26 -4
- AutoGLM_GUI/actions/__init__.py +6 -0
- phone_agent/actions/handler_ios.py → AutoGLM_GUI/actions/handler.py +30 -112
- AutoGLM_GUI/actions/types.py +15 -0
- {phone_agent → AutoGLM_GUI}/adb/__init__.py +25 -23
- {phone_agent → AutoGLM_GUI}/adb/connection.py +5 -40
- {phone_agent → AutoGLM_GUI}/adb/device.py +12 -94
- {phone_agent → AutoGLM_GUI}/adb/input.py +6 -47
- AutoGLM_GUI/adb/screenshot.py +11 -0
- {phone_agent/config → AutoGLM_GUI/adb}/timing.py +1 -1
- AutoGLM_GUI/adb_plus/keyboard_installer.py +4 -2
- AutoGLM_GUI/adb_plus/screenshot.py +22 -1
- AutoGLM_GUI/adb_plus/serial.py +38 -20
- AutoGLM_GUI/adb_plus/touch.py +4 -9
- AutoGLM_GUI/agents/__init__.py +43 -12
- AutoGLM_GUI/agents/events.py +19 -0
- AutoGLM_GUI/agents/factory.py +31 -38
- AutoGLM_GUI/agents/glm/__init__.py +7 -0
- AutoGLM_GUI/agents/glm/agent.py +297 -0
- AutoGLM_GUI/agents/glm/message_builder.py +81 -0
- AutoGLM_GUI/agents/glm/parser.py +110 -0
- {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_en.py +7 -9
- {phone_agent/config → AutoGLM_GUI/agents/glm}/prompts_zh.py +18 -25
- AutoGLM_GUI/agents/mai/__init__.py +28 -0
- AutoGLM_GUI/agents/mai/agent.py +408 -0
- AutoGLM_GUI/agents/mai/parser.py +254 -0
- AutoGLM_GUI/agents/mai/prompts.py +103 -0
- AutoGLM_GUI/agents/mai/traj_memory.py +91 -0
- AutoGLM_GUI/agents/protocols.py +12 -8
- AutoGLM_GUI/agents/stream_runner.py +193 -0
- AutoGLM_GUI/api/__init__.py +40 -21
- AutoGLM_GUI/api/agents.py +181 -239
- AutoGLM_GUI/api/control.py +9 -6
- AutoGLM_GUI/api/devices.py +102 -12
- AutoGLM_GUI/api/history.py +104 -0
- AutoGLM_GUI/api/layered_agent.py +67 -15
- AutoGLM_GUI/api/media.py +64 -1
- AutoGLM_GUI/api/scheduled_tasks.py +98 -0
- AutoGLM_GUI/config.py +81 -0
- AutoGLM_GUI/config_manager.py +68 -51
- AutoGLM_GUI/device_manager.py +248 -29
- AutoGLM_GUI/device_protocol.py +1 -1
- AutoGLM_GUI/devices/adb_device.py +5 -10
- AutoGLM_GUI/devices/mock_device.py +4 -2
- AutoGLM_GUI/devices/remote_device.py +8 -3
- AutoGLM_GUI/history_manager.py +164 -0
- AutoGLM_GUI/model/__init__.py +5 -0
- AutoGLM_GUI/model/message_builder.py +69 -0
- AutoGLM_GUI/model/types.py +24 -0
- AutoGLM_GUI/models/__init__.py +10 -0
- AutoGLM_GUI/models/history.py +140 -0
- AutoGLM_GUI/models/scheduled_task.py +71 -0
- AutoGLM_GUI/parsers/__init__.py +22 -0
- AutoGLM_GUI/parsers/base.py +50 -0
- AutoGLM_GUI/parsers/phone_parser.py +58 -0
- AutoGLM_GUI/phone_agent_manager.py +62 -396
- AutoGLM_GUI/platform_utils.py +26 -0
- AutoGLM_GUI/prompt_config.py +15 -0
- AutoGLM_GUI/prompts/__init__.py +32 -0
- AutoGLM_GUI/scheduler_manager.py +350 -0
- AutoGLM_GUI/schemas.py +246 -72
- AutoGLM_GUI/scrcpy_stream.py +142 -24
- AutoGLM_GUI/socketio_server.py +100 -27
- AutoGLM_GUI/static/assets/{about-_XNhzQZX.js → about-CfwX1Cmc.js} +1 -1
- AutoGLM_GUI/static/assets/alert-dialog-CtGlN2IJ.js +1 -0
- AutoGLM_GUI/static/assets/chat-BYa-foUI.js +129 -0
- AutoGLM_GUI/static/assets/circle-alert-t08bEMPO.js +1 -0
- AutoGLM_GUI/static/assets/dialog-FNwZJFwk.js +45 -0
- AutoGLM_GUI/static/assets/eye-D0UPWCWC.js +1 -0
- AutoGLM_GUI/static/assets/history-CRo95B7i.js +1 -0
- AutoGLM_GUI/static/assets/{index-Cy8TmmHV.js → index-BaLMSqd3.js} +1 -1
- AutoGLM_GUI/static/assets/index-CTHbFvKl.js +11 -0
- AutoGLM_GUI/static/assets/index-CV7jGxGm.css +1 -0
- AutoGLM_GUI/static/assets/label-DJFevVmr.js +1 -0
- AutoGLM_GUI/static/assets/logs-RW09DyYY.js +1 -0
- AutoGLM_GUI/static/assets/popover--JTJrE5v.js +1 -0
- AutoGLM_GUI/static/assets/scheduled-tasks-DTRKsQXF.js +1 -0
- AutoGLM_GUI/static/assets/square-pen-CPK_K680.js +1 -0
- AutoGLM_GUI/static/assets/textarea-PRmVnWq5.js +1 -0
- AutoGLM_GUI/static/assets/workflows-CdcsAoaT.js +1 -0
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +17 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/METADATA +179 -130
- autoglm_gui-1.5.1.dist-info/RECORD +118 -0
- AutoGLM_GUI/agents/mai_adapter.py +0 -627
- AutoGLM_GUI/api/dual_model.py +0 -317
- AutoGLM_GUI/device_adapter.py +0 -263
- AutoGLM_GUI/dual_model/__init__.py +0 -53
- AutoGLM_GUI/dual_model/decision_model.py +0 -664
- AutoGLM_GUI/dual_model/dual_agent.py +0 -917
- AutoGLM_GUI/dual_model/protocols.py +0 -354
- AutoGLM_GUI/dual_model/vision_model.py +0 -442
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +0 -291
- AutoGLM_GUI/phone_agent_patches.py +0 -147
- AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +0 -126
- AutoGLM_GUI/static/assets/dialog-B3uW4T8V.js +0 -45
- AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +0 -1
- AutoGLM_GUI/static/assets/index-UYYauTly.js +0 -12
- AutoGLM_GUI/static/assets/workflows-Du_de-dt.js +0 -1
- autoglm_gui-1.4.1.dist-info/RECORD +0 -117
- mai_agent/base.py +0 -137
- mai_agent/mai_grounding_agent.py +0 -263
- mai_agent/mai_naivigation_agent.py +0 -526
- mai_agent/prompt.py +0 -148
- mai_agent/unified_memory.py +0 -67
- mai_agent/utils.py +0 -73
- phone_agent/__init__.py +0 -12
- phone_agent/actions/__init__.py +0 -5
- phone_agent/actions/handler.py +0 -400
- phone_agent/adb/screenshot.py +0 -108
- phone_agent/agent.py +0 -253
- phone_agent/agent_ios.py +0 -277
- phone_agent/config/__init__.py +0 -53
- phone_agent/config/apps_harmonyos.py +0 -256
- phone_agent/config/apps_ios.py +0 -339
- phone_agent/config/prompts.py +0 -80
- phone_agent/device_factory.py +0 -166
- phone_agent/hdc/__init__.py +0 -53
- phone_agent/hdc/connection.py +0 -384
- phone_agent/hdc/device.py +0 -269
- phone_agent/hdc/input.py +0 -145
- phone_agent/hdc/screenshot.py +0 -127
- phone_agent/model/__init__.py +0 -5
- phone_agent/model/client.py +0 -290
- phone_agent/xctest/__init__.py +0 -47
- phone_agent/xctest/connection.py +0 -379
- phone_agent/xctest/device.py +0 -472
- phone_agent/xctest/input.py +0 -311
- phone_agent/xctest/screenshot.py +0 -226
- {phone_agent/config → AutoGLM_GUI/adb}/apps.py +0 -0
- {phone_agent/config → AutoGLM_GUI}/i18n.py +0 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.4.1.dist-info → autoglm_gui-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,354 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
通信协议定义
|
|
3
|
-
|
|
4
|
-
定义大小模型之间的通信协议和数据结构
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from dataclasses import dataclass, field
|
|
8
|
-
from enum import Enum
|
|
9
|
-
from typing import Optional
|
|
10
|
-
from pydantic import BaseModel
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ThinkingMode(str, Enum):
|
|
14
|
-
"""思考模式"""
|
|
15
|
-
|
|
16
|
-
FAST = "fast" # 快速模式 - 简洁高效
|
|
17
|
-
DEEP = "deep" # 深度模式 - 全面分析
|
|
18
|
-
TURBO = "turbo" # 极速模式 - 批量操作,仅异常时调用决策模型
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class DecisionModelConfig(BaseModel):
|
|
22
|
-
"""决策大模型配置"""
|
|
23
|
-
|
|
24
|
-
base_url: str
|
|
25
|
-
api_key: str = ""
|
|
26
|
-
model_name: str
|
|
27
|
-
max_tokens: int = 4096
|
|
28
|
-
temperature: float = 0.7
|
|
29
|
-
thinking_mode: ThinkingMode = ThinkingMode.DEEP
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class DualModelConfig(BaseModel):
|
|
33
|
-
"""双模型协作配置"""
|
|
34
|
-
|
|
35
|
-
enabled: bool = False
|
|
36
|
-
decision_model: Optional[DecisionModelConfig] = None
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class ModelRole(str, Enum):
|
|
40
|
-
"""模型角色"""
|
|
41
|
-
|
|
42
|
-
DECISION = "decision" # 决策大模型
|
|
43
|
-
VISION = "vision" # 视觉小模型
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class ModelStage(str, Enum):
|
|
47
|
-
"""模型当前阶段"""
|
|
48
|
-
|
|
49
|
-
IDLE = "idle"
|
|
50
|
-
ANALYZING = "analyzing" # 分析任务
|
|
51
|
-
DECIDING = "deciding" # 做决策
|
|
52
|
-
GENERATING = "generating" # 生成内容
|
|
53
|
-
CAPTURING = "capturing" # 截图
|
|
54
|
-
RECOGNIZING = "recognizing" # 识别屏幕
|
|
55
|
-
EXECUTING = "executing" # 执行动作
|
|
56
|
-
WAITING = "waiting" # 等待
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@dataclass
|
|
60
|
-
class DualModelState:
|
|
61
|
-
"""双模型状态"""
|
|
62
|
-
|
|
63
|
-
# 大模型状态
|
|
64
|
-
decision_active: bool = False
|
|
65
|
-
decision_stage: ModelStage = ModelStage.IDLE
|
|
66
|
-
decision_thinking: str = ""
|
|
67
|
-
decision_result: str = ""
|
|
68
|
-
|
|
69
|
-
# 小模型状态
|
|
70
|
-
vision_active: bool = False
|
|
71
|
-
vision_stage: ModelStage = ModelStage.IDLE
|
|
72
|
-
vision_description: str = ""
|
|
73
|
-
vision_action: str = ""
|
|
74
|
-
|
|
75
|
-
# 整体状态
|
|
76
|
-
current_step: int = 0
|
|
77
|
-
total_steps: int = 0
|
|
78
|
-
task_plan: list = field(default_factory=list)
|
|
79
|
-
|
|
80
|
-
def to_dict(self) -> dict:
|
|
81
|
-
return {
|
|
82
|
-
"decision": {
|
|
83
|
-
"active": self.decision_active,
|
|
84
|
-
"stage": self.decision_stage.value,
|
|
85
|
-
"thinking": self.decision_thinking,
|
|
86
|
-
"result": self.decision_result,
|
|
87
|
-
},
|
|
88
|
-
"vision": {
|
|
89
|
-
"active": self.vision_active,
|
|
90
|
-
"stage": self.vision_stage.value,
|
|
91
|
-
"description": self.vision_description,
|
|
92
|
-
"action": self.vision_action,
|
|
93
|
-
},
|
|
94
|
-
"progress": {
|
|
95
|
-
"current_step": self.current_step,
|
|
96
|
-
"total_steps": self.total_steps,
|
|
97
|
-
"task_plan": self.task_plan,
|
|
98
|
-
},
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class DualModelEventType(str, Enum):
|
|
103
|
-
"""双模型事件类型"""
|
|
104
|
-
|
|
105
|
-
# 大模型事件
|
|
106
|
-
DECISION_START = "decision_start"
|
|
107
|
-
DECISION_THINKING = "decision_thinking"
|
|
108
|
-
DECISION_RESULT = "decision_result"
|
|
109
|
-
CONTENT_GENERATION = "content_generation"
|
|
110
|
-
TASK_PLAN = "task_plan"
|
|
111
|
-
|
|
112
|
-
# 小模型事件
|
|
113
|
-
VISION_START = "vision_start"
|
|
114
|
-
VISION_RECOGNITION = "vision_recognition"
|
|
115
|
-
ACTION_START = "action_start"
|
|
116
|
-
ACTION_RESULT = "action_result"
|
|
117
|
-
|
|
118
|
-
# 整体事件
|
|
119
|
-
STEP_COMPLETE = "step_complete"
|
|
120
|
-
TASK_COMPLETE = "task_complete"
|
|
121
|
-
ERROR = "error"
|
|
122
|
-
ABORTED = "aborted"
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
@dataclass
|
|
126
|
-
class DualModelEvent:
|
|
127
|
-
"""双模型事件"""
|
|
128
|
-
|
|
129
|
-
type: DualModelEventType
|
|
130
|
-
data: dict
|
|
131
|
-
model: Optional[ModelRole] = None
|
|
132
|
-
step: int = 0
|
|
133
|
-
timestamp: float = 0.0
|
|
134
|
-
|
|
135
|
-
def to_sse(self) -> str:
|
|
136
|
-
"""转换为SSE格式"""
|
|
137
|
-
import json
|
|
138
|
-
import time
|
|
139
|
-
|
|
140
|
-
event_data = {
|
|
141
|
-
"type": self.type.value,
|
|
142
|
-
"model": self.model.value if self.model else None,
|
|
143
|
-
"step": self.step,
|
|
144
|
-
"timestamp": self.timestamp or time.time(),
|
|
145
|
-
**self.data,
|
|
146
|
-
}
|
|
147
|
-
return f"event: {self.type.value}\ndata: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
# 系统提示词
|
|
151
|
-
DECISION_SYSTEM_PROMPT = """你是一个智能手机操作决策专家。你的任务是根据用户需求和当前屏幕状态,做出精确的操作决策。
|
|
152
|
-
|
|
153
|
-
## 你的能力
|
|
154
|
-
- 分析用户任务,制定执行计划
|
|
155
|
-
- 根据屏幕描述,决定下一步操作
|
|
156
|
-
- 生成需要输入的内容(如帖子、回复、消息等)
|
|
157
|
-
- 处理异常情况,提供多角度的解决方案
|
|
158
|
-
|
|
159
|
-
## 响应格式
|
|
160
|
-
你必须以JSON格式响应,包含以下字段:
|
|
161
|
-
|
|
162
|
-
### 任务分析响应
|
|
163
|
-
```json
|
|
164
|
-
{
|
|
165
|
-
"type": "plan",
|
|
166
|
-
"summary": "任务简述",
|
|
167
|
-
"steps": ["步骤1", "步骤2", ...],
|
|
168
|
-
"estimated_actions": 5
|
|
169
|
-
}
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
### 决策响应
|
|
173
|
-
```json
|
|
174
|
-
{
|
|
175
|
-
"type": "decision",
|
|
176
|
-
"reasoning": "决策理由",
|
|
177
|
-
"action": "tap|swipe|type|scroll|back|home|launch|wait|retry",
|
|
178
|
-
"target": "目标元素描述",
|
|
179
|
-
"content": "如果是type操作,这里是要输入的内容",
|
|
180
|
-
"finished": false
|
|
181
|
-
}
|
|
182
|
-
```
|
|
183
|
-
|
|
184
|
-
### 任务完成响应
|
|
185
|
-
```json
|
|
186
|
-
{
|
|
187
|
-
"type": "finish",
|
|
188
|
-
"message": "任务完成说明",
|
|
189
|
-
"success": true
|
|
190
|
-
}
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
## 异常处理指南
|
|
194
|
-
|
|
195
|
-
当遇到以下异常情况时,请采取相应策略:
|
|
196
|
-
|
|
197
|
-
### 1. 屏幕无变化(连续截图相同)
|
|
198
|
-
可能原因及对策:
|
|
199
|
-
- 网络延迟:等待后重试 → action: "wait"
|
|
200
|
-
- 点击无效:尝试滑动刷新 → action: "scroll"
|
|
201
|
-
- 元素加载中:等待加载完成 → action: "wait"
|
|
202
|
-
- 应用卡顿:返回重进 → action: "back" 然后重新进入
|
|
203
|
-
|
|
204
|
-
### 2. 多次操作无效果
|
|
205
|
-
可能原因及对策:
|
|
206
|
-
- 定位不准:描述更具体的目标元素
|
|
207
|
-
- 页面未完全加载:先滚动触发加载
|
|
208
|
-
- 需要等待动画:执行 wait 操作
|
|
209
|
-
- 权限问题:检查是否需要授权
|
|
210
|
-
|
|
211
|
-
### 3. 意外弹窗或对话框
|
|
212
|
-
对策:
|
|
213
|
-
- 优先处理弹窗(关闭或确认)
|
|
214
|
-
- 记住主任务,处理完弹窗后继续
|
|
215
|
-
|
|
216
|
-
### 4. 目标元素不存在
|
|
217
|
-
对策:
|
|
218
|
-
- 滚动查找:向上或向下滚动
|
|
219
|
-
- 返回上一页:可能进错页面
|
|
220
|
-
- 搜索功能:使用应用内搜索
|
|
221
|
-
|
|
222
|
-
## 注意事项
|
|
223
|
-
1. 你看不到屏幕,只能根据小模型提供的屏幕描述来决策
|
|
224
|
-
2. 每次只做一个决策,等待小模型执行后再继续
|
|
225
|
-
3. 如果屏幕描述不清楚,可以要求重新识别
|
|
226
|
-
4. 遇到需要登录、验证码等情况,请求用户介入
|
|
227
|
-
5. 保持决策的连续性,记住之前的操作和结果
|
|
228
|
-
|
|
229
|
-
## 重要:循环任务处理
|
|
230
|
-
当用户任务包含数量要求时(如"10次"、"10个"、"重复N次"等),你必须:
|
|
231
|
-
1. **跟踪进度**:记住当前完成了多少次,还剩多少次
|
|
232
|
-
2. **持续执行**:完成一个子任务后,立即开始下一个,不要返回finished
|
|
233
|
-
3. **只有全部完成才结束**:只有当所有要求的次数都完成后,才返回 `"type": "finish"`
|
|
234
|
-
4. **在reasoning中报告进度**:例如"已完成3/10,继续执行第4个"
|
|
235
|
-
|
|
236
|
-
示例:如果用户要求"浏览10个帖子并评论":
|
|
237
|
-
- 完成第1个帖子后:继续执行,不要finished
|
|
238
|
-
- 完成第5个帖子后:继续执行,不要finished
|
|
239
|
-
- 完成第10个帖子后:返回 `"type": "finish"`
|
|
240
|
-
"""
|
|
241
|
-
|
|
242
|
-
DECISION_SYSTEM_PROMPT_FAST = """你是手机操作决策专家。根据屏幕描述快速做出操作决策。
|
|
243
|
-
|
|
244
|
-
## 响应格式(JSON)
|
|
245
|
-
|
|
246
|
-
任务分析:
|
|
247
|
-
{"type":"plan","summary":"简述","steps":["步骤"],"estimated_actions":N}
|
|
248
|
-
|
|
249
|
-
决策:
|
|
250
|
-
{"type":"decision","reasoning":"理由","action":"tap|swipe|type|scroll|back|home|launch","target":"目标","content":"输入内容"}
|
|
251
|
-
|
|
252
|
-
完成:
|
|
253
|
-
{"type":"finish","message":"完成说明","success":true}
|
|
254
|
-
|
|
255
|
-
## 规则
|
|
256
|
-
- 每次一个决策
|
|
257
|
-
- 简洁明确
|
|
258
|
-
- 快速响应
|
|
259
|
-
|
|
260
|
-
## 重要:循环任务
|
|
261
|
-
当任务包含数量要求(如"10次"、"10个")时:
|
|
262
|
-
1. 跟踪进度:记住完成了多少次
|
|
263
|
-
2. 持续执行:完成一个后继续下一个,不要返回finished
|
|
264
|
-
3. 只有全部完成才返回 `"type":"finish"`
|
|
265
|
-
4. 在reasoning中报告进度(如"已完成3/10")
|
|
266
|
-
"""
|
|
267
|
-
|
|
268
|
-
DECISION_ERROR_CONTEXT_TEMPLATE = """
|
|
269
|
-
## 异常状态报告
|
|
270
|
-
|
|
271
|
-
{error_context}
|
|
272
|
-
|
|
273
|
-
请根据以上异常情况,从多个角度分析问题并给出解决方案。在 reasoning 中说明你的分析过程。
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
VISION_DESCRIBE_PROMPT = """请详细描述当前屏幕内容,包括:
|
|
277
|
-
|
|
278
|
-
1. 当前所在的应用/页面
|
|
279
|
-
2. 屏幕上可见的主要元素(按钮、文本、图标等)
|
|
280
|
-
3. 各元素的大致位置(上/中/下,左/中/右)
|
|
281
|
-
4. 任何输入框、可点击区域
|
|
282
|
-
5. 当前页面的状态(是否有弹窗、是否在加载等)
|
|
283
|
-
|
|
284
|
-
请用简洁清晰的中文描述,让决策模型能理解当前屏幕状态。
|
|
285
|
-
"""
|
|
286
|
-
|
|
287
|
-
VISION_DESCRIBE_PROMPT_FAST = """简述屏幕内容:应用名、主要按钮、输入框、弹窗状态。"""
|
|
288
|
-
|
|
289
|
-
DECISION_SYSTEM_PROMPT_TURBO = """你是手机操作专家。一次性生成完整的操作序列,让执行模型直接执行。
|
|
290
|
-
|
|
291
|
-
## 响应格式(JSON)
|
|
292
|
-
|
|
293
|
-
任务分析时返回操作序列:
|
|
294
|
-
```json
|
|
295
|
-
{
|
|
296
|
-
"type": "action_sequence",
|
|
297
|
-
"summary": "任务简述",
|
|
298
|
-
"actions": [
|
|
299
|
-
{"action": "launch", "target": "应用名"},
|
|
300
|
-
{"action": "tap", "target": "搜索框"},
|
|
301
|
-
{"action": "type", "content": "搜索内容", "need_generate": false},
|
|
302
|
-
{"action": "tap", "target": "搜索按钮"},
|
|
303
|
-
{"action": "tap", "target": "第一个结果"}
|
|
304
|
-
],
|
|
305
|
-
"checkpoints": ["应用已打开", "搜索结果出现"],
|
|
306
|
-
"humanize_steps": [2]
|
|
307
|
-
}
|
|
308
|
-
```
|
|
309
|
-
|
|
310
|
-
## 字段说明
|
|
311
|
-
- actions: 操作序列,按顺序执行
|
|
312
|
-
- action类型: tap|type|swipe|scroll|back|home|launch|wait
|
|
313
|
-
- need_generate: type操作是否需要决策模型生成内容(人性化回复等)
|
|
314
|
-
- humanize_steps: 需要人性化处理的步骤索引(0起始),这些步骤会调用决策模型生成内容
|
|
315
|
-
- checkpoints: 关键检查点,用于验证执行进度
|
|
316
|
-
|
|
317
|
-
## 规则
|
|
318
|
-
1. 一次性给出所有操作步骤,执行模型会按顺序执行
|
|
319
|
-
2. type操作如果是固定内容(搜索词、用户名等),直接写content,need_generate=false
|
|
320
|
-
3. type操作如果需要人性化内容(回复消息、发帖等),设置need_generate=true,执行时会调用决策模型
|
|
321
|
-
4. 只有遇到异常才会重新调用你,否则按序列执行
|
|
322
|
-
5. 尽量精确描述target,便于视觉模型定位
|
|
323
|
-
"""
|
|
324
|
-
|
|
325
|
-
DECISION_REPLAN_PROMPT = """执行过程中遇到问题,请重新分析并给出后续操作。
|
|
326
|
-
|
|
327
|
-
## 当前状态
|
|
328
|
-
{current_state}
|
|
329
|
-
|
|
330
|
-
## 已执行的操作
|
|
331
|
-
{executed_actions}
|
|
332
|
-
|
|
333
|
-
## 遇到的问题
|
|
334
|
-
{error_info}
|
|
335
|
-
|
|
336
|
-
请分析问题并给出新的操作序列。如果需要人性化回复,请直接生成内容。
|
|
337
|
-
"""
|
|
338
|
-
|
|
339
|
-
DECISION_HUMANIZE_PROMPT = """需要生成人性化内容。
|
|
340
|
-
|
|
341
|
-
## 任务背景
|
|
342
|
-
{task_context}
|
|
343
|
-
|
|
344
|
-
## 当前场景
|
|
345
|
-
{current_scene}
|
|
346
|
-
|
|
347
|
-
## 需要生成的内容类型
|
|
348
|
-
{content_type}
|
|
349
|
-
|
|
350
|
-
请直接生成内容,不要JSON格式,直接返回文本内容。要求:
|
|
351
|
-
1. 自然、真实、有个性
|
|
352
|
-
2. 符合场景和语境
|
|
353
|
-
3. 适当的长度
|
|
354
|
-
"""
|