autoglm-gui 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/__main__.py +0 -4
- AutoGLM_GUI/adb_plus/qr_pair.py +8 -8
- AutoGLM_GUI/agents/__init__.py +20 -0
- AutoGLM_GUI/agents/factory.py +160 -0
- AutoGLM_GUI/agents/mai_adapter.py +627 -0
- AutoGLM_GUI/agents/protocols.py +23 -0
- AutoGLM_GUI/api/__init__.py +50 -7
- AutoGLM_GUI/api/agents.py +61 -19
- AutoGLM_GUI/api/devices.py +12 -18
- AutoGLM_GUI/api/dual_model.py +24 -17
- AutoGLM_GUI/api/health.py +13 -0
- AutoGLM_GUI/api/layered_agent.py +659 -0
- AutoGLM_GUI/api/mcp.py +11 -10
- AutoGLM_GUI/api/version.py +23 -10
- AutoGLM_GUI/api/workflows.py +2 -1
- AutoGLM_GUI/config_manager.py +56 -24
- AutoGLM_GUI/device_adapter.py +263 -0
- AutoGLM_GUI/device_protocol.py +266 -0
- AutoGLM_GUI/devices/__init__.py +49 -0
- AutoGLM_GUI/devices/adb_device.py +205 -0
- AutoGLM_GUI/devices/mock_device.py +183 -0
- AutoGLM_GUI/devices/remote_device.py +172 -0
- AutoGLM_GUI/dual_model/decision_model.py +4 -4
- AutoGLM_GUI/dual_model/protocols.py +3 -3
- AutoGLM_GUI/exceptions.py +3 -3
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +291 -0
- AutoGLM_GUI/metrics.py +13 -20
- AutoGLM_GUI/phone_agent_manager.py +219 -134
- AutoGLM_GUI/phone_agent_patches.py +2 -1
- AutoGLM_GUI/platform_utils.py +5 -2
- AutoGLM_GUI/prompts.py +6 -1
- AutoGLM_GUI/schemas.py +45 -14
- AutoGLM_GUI/scrcpy_stream.py +17 -13
- AutoGLM_GUI/server.py +3 -1
- AutoGLM_GUI/socketio_server.py +16 -4
- AutoGLM_GUI/state.py +10 -30
- AutoGLM_GUI/static/assets/{about-Cj6QXqMf.js → about-_XNhzQZX.js} +1 -1
- AutoGLM_GUI/static/assets/chat-DwJpiAWf.js +126 -0
- AutoGLM_GUI/static/assets/{dialog-CxJlnjzH.js → dialog-B3uW4T8V.js} +3 -3
- AutoGLM_GUI/static/assets/index-Cpv2gSF1.css +1 -0
- AutoGLM_GUI/static/assets/{index-C_B-Arvf.js → index-Cy8TmmHV.js} +1 -1
- AutoGLM_GUI/static/assets/{index-CxJQuE4y.js → index-UYYauTly.js} +6 -6
- AutoGLM_GUI/static/assets/{workflows-BTiGCNI0.js → workflows-Du_de-dt.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- AutoGLM_GUI/types.py +125 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/METADATA +147 -65
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/RECORD +58 -39
- mai_agent/base.py +137 -0
- mai_agent/mai_grounding_agent.py +263 -0
- mai_agent/mai_naivigation_agent.py +526 -0
- mai_agent/prompt.py +148 -0
- mai_agent/unified_memory.py +67 -0
- mai_agent/utils.py +73 -0
- phone_agent/config/prompts.py +6 -1
- phone_agent/config/prompts_zh.py +6 -1
- AutoGLM_GUI/config.py +0 -23
- AutoGLM_GUI/static/assets/chat-BJeomZgh.js +0 -124
- AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +0 -1
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.3.1.dist-info → autoglm_gui-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
"""Layered agent API for hierarchical task execution.
|
|
2
|
+
|
|
3
|
+
This module provides the layered agent API endpoint that uses
|
|
4
|
+
a decision model for planning and autoglm-phone for execution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import threading
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
from agents import Agent, Runner, SQLiteSession, function_tool
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from agents.result import RunResultStreaming
|
|
16
|
+
from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel
|
|
17
|
+
from fastapi import APIRouter
|
|
18
|
+
from fastapi.responses import StreamingResponse
|
|
19
|
+
from openai import AsyncOpenAI
|
|
20
|
+
from pydantic import BaseModel
|
|
21
|
+
|
|
22
|
+
from AutoGLM_GUI.config_manager import config_manager
|
|
23
|
+
from AutoGLM_GUI.logger import logger
|
|
24
|
+
|
|
25
|
+
router = APIRouter()
|
|
26
|
+
|
|
27
|
+
# ==================== Session 管理 ====================
|
|
28
|
+
# 存储每个 session_id 对应的 SQLiteSession(内存模式)
|
|
29
|
+
_sessions: dict[str, SQLiteSession] = {}
|
|
30
|
+
|
|
31
|
+
# ==================== 活跃运行管理 ====================
|
|
32
|
+
# 存储每个 session_id 对应的活跃 RunResultStreaming 实例,用于 abort
|
|
33
|
+
_active_runs: dict[str, "RunResultStreaming"] = {}
|
|
34
|
+
_active_runs_lock = threading.Lock()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_or_create_session(session_id: str) -> SQLiteSession:
|
|
38
|
+
"""获取或创建指定 session_id 的内存 session."""
|
|
39
|
+
if session_id not in _sessions:
|
|
40
|
+
# 使用 session_id 作为会话名称创建 session
|
|
41
|
+
_sessions[session_id] = SQLiteSession(session_id)
|
|
42
|
+
logger.info(f"[LayeredAgent] Created new session: {session_id}")
|
|
43
|
+
return _sessions[session_id]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _clear_session(session_id: str) -> bool:
|
|
47
|
+
"""清除指定 session_id 的 session."""
|
|
48
|
+
if session_id in _sessions:
|
|
49
|
+
del _sessions[session_id]
|
|
50
|
+
logger.info(f"[LayeredAgent] Cleared session: {session_id}")
|
|
51
|
+
return True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_planner_model() -> str:
|
|
56
|
+
"""获取规划层使用的模型名称,从配置读取."""
|
|
57
|
+
config = config_manager.get_effective_config()
|
|
58
|
+
return config.decision_model_name or "glm-4.7"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
PLANNER_INSTRUCTIONS = """## 核心目标
|
|
62
|
+
你是一个负责操控手机的高级智能中枢。你的任务是将用户的意图转化为**视觉模型(Vision Model)**可以执行的原子操作。
|
|
63
|
+
|
|
64
|
+
## ⚠️ 极其重要的限制:视觉模型的能力边界 (Must Read)
|
|
65
|
+
你的下级(Vision Model)是一个**纯粹的执行者和观察者**。
|
|
66
|
+
1. **无"记忆/笔记"功能**:它没有 `Note` 功能,无法为你保存数据。
|
|
67
|
+
2. **无"系统级"权限**:它不能复制源代码,不能直接提取文本,不能读取剪贴板。
|
|
68
|
+
3. **唯一的输出**:它只能通过**对话**告诉你它看到了什么,或者去**点击/滑动**屏幕。
|
|
69
|
+
|
|
70
|
+
## 交互策略 (Interaction Strategy)
|
|
71
|
+
|
|
72
|
+
### 1. 如果你需要"操作手机" (To Act)
|
|
73
|
+
下达明确的 UI 动作指令。
|
|
74
|
+
- ✅ "点击'设置'图标。"
|
|
75
|
+
- ✅ "向下滑动屏幕。"
|
|
76
|
+
- ✅ "打开微信。"
|
|
77
|
+
|
|
78
|
+
### 2. 如果你需要"获取信息" (To Read/Extract)
|
|
79
|
+
你必须通过**提问**的方式,让视觉模型在对话中把信息"念"给你听。
|
|
80
|
+
- ❌ **错误**: "把验证码保存下来。" (它做不到)
|
|
81
|
+
- ❌ **错误**: "使用 Note 功能记录价格。" (它没有这个功能)
|
|
82
|
+
- ✅ **正确**: 调用 `chat` 询问:"请看屏幕,告诉我现在的订单总金额是多少?"
|
|
83
|
+
- *结果*: 视觉模型会回复 "25.5元"。你需要自己处理这个文本信息。
|
|
84
|
+
|
|
85
|
+
### 3. 如果用户要求"复制/粘贴"
|
|
86
|
+
必须通过模拟手指操作来实现,不能直接操作剪贴板。
|
|
87
|
+
- ✅ **正确**: "长按这段文字,等待弹出菜单,然后点击'复制'按钮。"
|
|
88
|
+
|
|
89
|
+
## 任务拆解原则 (Decomposition Rules)
|
|
90
|
+
|
|
91
|
+
1. **原子化**: 每次只给一个动作。
|
|
92
|
+
2. **可视化**: 指令必须基于屏幕上**看得见**的元素。不要说"点击确认",如果屏幕上显示的按钮叫"OK",请说"点击'OK'按钮"。
|
|
93
|
+
3. **Fail Fast**: 如果视觉模型回复 `ELEMENT_NOT_FOUND`,不要死循环。询问它:"那现在屏幕上有什么?"或者尝试滑动寻找。
|
|
94
|
+
|
|
95
|
+
## 核心工作流 (The Loop)
|
|
96
|
+
1. **Observe (看)**: 调用 `chat` 询问当前状态。
|
|
97
|
+
- "现在屏幕上显示什么?" / "刚才的点击生效了吗?"
|
|
98
|
+
2. **Think (想)**:
|
|
99
|
+
- 用户的目标是什么?
|
|
100
|
+
- 我需要让视觉模型**做什么动作**,还是**回答什么问题**?
|
|
101
|
+
3. **Act (做)**:
|
|
102
|
+
- **Case A (动作)**: 发送指令 `点击[坐标]...`
|
|
103
|
+
- **Case B (询问)**: 发送问题 `请读取...`
|
|
104
|
+
|
|
105
|
+
## 内部思维链示例 (Inner Monologue)
|
|
106
|
+
|
|
107
|
+
**场景 1: 用户让你"把这篇笔记的标题发给我"**
|
|
108
|
+
> **Current State**: 笔记详情页。
|
|
109
|
+
> **Goal**: 获取标题文本。
|
|
110
|
+
> **Constraint**: 视觉模型无法直接提取变量,我必须问它。
|
|
111
|
+
> **Strategy**: 问视觉模型标题是什么,它回答后,我再反馈给用户。
|
|
112
|
+
> **Next Action**: 提问。
|
|
113
|
+
**Output**: `chat(id, "请读取并告诉我屏幕上这篇笔记的标题文字内容是什么?")`
|
|
114
|
+
|
|
115
|
+
**场景 2: 用户让你"复制链接"**
|
|
116
|
+
> **Current State**: 详情页。
|
|
117
|
+
> **Goal**: 把链接复制到系统剪贴板。
|
|
118
|
+
> **Constraint**: 不能直接 Get Link。必须找"分享"或"复制"按钮。
|
|
119
|
+
> **Strategy**: 先点右上角菜单,再找复制链接。
|
|
120
|
+
> **Next Action**: 点击菜单。
|
|
121
|
+
**Output**: `chat(id, "点击屏幕右上角的'...'(三个点)菜单按钮。")`
|
|
122
|
+
|
|
123
|
+
## 工具集 (Tools)
|
|
124
|
+
1. `list_devices()`
|
|
125
|
+
2. `chat(device_id, message)`:
|
|
126
|
+
- 发送操作指令(如"点击红色按钮")。
|
|
127
|
+
- 发送查询问题(如"那个验证码是多少?")。
|
|
128
|
+
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ==================== 工具定义 ====================
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _sync_list_devices() -> str:
|
|
136
|
+
"""同步实现:获取所有连接的 ADB 设备列表。"""
|
|
137
|
+
from AutoGLM_GUI.api.devices import _build_device_response_with_agent
|
|
138
|
+
from AutoGLM_GUI.device_manager import DeviceManager
|
|
139
|
+
from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
|
|
140
|
+
|
|
141
|
+
logger.info("[LayeredAgent] list_devices tool called")
|
|
142
|
+
|
|
143
|
+
device_manager = DeviceManager.get_instance()
|
|
144
|
+
agent_manager = PhoneAgentManager.get_instance()
|
|
145
|
+
|
|
146
|
+
# 如果轮询未启动,执行同步刷新
|
|
147
|
+
if not device_manager._poll_thread or not device_manager._poll_thread.is_alive():
|
|
148
|
+
logger.warning("Polling not started, performing sync refresh")
|
|
149
|
+
device_manager.force_refresh()
|
|
150
|
+
|
|
151
|
+
managed_devices = device_manager.get_devices()
|
|
152
|
+
|
|
153
|
+
# 构建设备响应
|
|
154
|
+
devices_with_agents = [
|
|
155
|
+
_build_device_response_with_agent(d, agent_manager) for d in managed_devices
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
# Convert DeviceResponse Pydantic models to dicts before JSON serialization
|
|
159
|
+
devices_dict = [device.model_dump() for device in devices_with_agents]
|
|
160
|
+
return json.dumps(devices_dict, ensure_ascii=False, indent=2)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@function_tool
|
|
164
|
+
async def list_devices() -> str:
|
|
165
|
+
"""
|
|
166
|
+
获取所有连接的 ADB 设备列表。
|
|
167
|
+
|
|
168
|
+
返回设备信息包括:
|
|
169
|
+
- id: 设备标识符,用于 chat 工具调用
|
|
170
|
+
- model: 设备型号
|
|
171
|
+
- status: 连接状态
|
|
172
|
+
- connection_type: 连接类型 (usb/wifi/remote)
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
JSON 格式的设备列表
|
|
176
|
+
"""
|
|
177
|
+
return await asyncio.to_thread(_sync_list_devices)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _sync_chat(device_id: str, message: str) -> str:
|
|
181
|
+
"""同步实现:向指定设备的 Phone Agent 发送子任务指令。"""
|
|
182
|
+
from AutoGLM_GUI.exceptions import DeviceBusyError
|
|
183
|
+
from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
|
|
184
|
+
from AutoGLM_GUI.prompts import MCP_SYSTEM_PROMPT_ZH
|
|
185
|
+
|
|
186
|
+
MCP_MAX_STEPS = 5
|
|
187
|
+
|
|
188
|
+
logger.info(
|
|
189
|
+
f"[LayeredAgent] chat tool called: device_id={device_id}, message={message}"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
manager = PhoneAgentManager.get_instance()
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
# use_agent 现在会自动初始化 agent(auto_initialize=True)
|
|
196
|
+
with manager.use_agent(device_id, timeout=None) as agent:
|
|
197
|
+
# 临时覆盖配置
|
|
198
|
+
original_max_steps = agent.agent_config.max_steps
|
|
199
|
+
original_system_prompt = agent.agent_config.system_prompt
|
|
200
|
+
|
|
201
|
+
agent.agent_config.max_steps = MCP_MAX_STEPS
|
|
202
|
+
agent.agent_config.system_prompt = MCP_SYSTEM_PROMPT_ZH
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# 重置 agent 确保干净状态
|
|
206
|
+
agent.reset()
|
|
207
|
+
|
|
208
|
+
result = agent.run(message)
|
|
209
|
+
steps = agent.step_count
|
|
210
|
+
|
|
211
|
+
# 检查是否达到步数限制
|
|
212
|
+
if steps >= MCP_MAX_STEPS and result == "Max steps reached":
|
|
213
|
+
context_json = json.dumps(
|
|
214
|
+
agent.context, ensure_ascii=False, indent=2
|
|
215
|
+
)
|
|
216
|
+
return json.dumps(
|
|
217
|
+
{
|
|
218
|
+
"result": f"⚠️ 已达到最大步数限制({MCP_MAX_STEPS}步)。视觉模型可能遇到了困难,任务未完成。\n\n执行历史:\n{context_json}\n\n建议: 请重新规划任务或将其拆分为更小的子任务。",
|
|
219
|
+
"steps": MCP_MAX_STEPS,
|
|
220
|
+
"success": False,
|
|
221
|
+
},
|
|
222
|
+
ensure_ascii=False,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return json.dumps(
|
|
226
|
+
{
|
|
227
|
+
"result": result,
|
|
228
|
+
"steps": steps,
|
|
229
|
+
"success": True,
|
|
230
|
+
},
|
|
231
|
+
ensure_ascii=False,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
finally:
|
|
235
|
+
# 恢复原始配置
|
|
236
|
+
agent.agent_config.max_steps = original_max_steps
|
|
237
|
+
agent.agent_config.system_prompt = original_system_prompt
|
|
238
|
+
|
|
239
|
+
except DeviceBusyError:
|
|
240
|
+
return json.dumps(
|
|
241
|
+
{
|
|
242
|
+
"result": f"设备 {device_id} 正忙,请稍后再试。",
|
|
243
|
+
"steps": 0,
|
|
244
|
+
"success": False,
|
|
245
|
+
},
|
|
246
|
+
ensure_ascii=False,
|
|
247
|
+
)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"[LayeredAgent] chat tool error: {e}")
|
|
250
|
+
return json.dumps(
|
|
251
|
+
{
|
|
252
|
+
"result": str(e),
|
|
253
|
+
"steps": 0,
|
|
254
|
+
"success": False,
|
|
255
|
+
},
|
|
256
|
+
ensure_ascii=False,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@function_tool
|
|
261
|
+
async def chat(device_id: str, message: str) -> str:
|
|
262
|
+
"""
|
|
263
|
+
向指定设备的 Phone Agent 发送子任务指令。
|
|
264
|
+
|
|
265
|
+
Phone Agent 是一个视觉模型,能够看到手机屏幕并执行操作。
|
|
266
|
+
每次调用会执行一个原子化的子任务(最多 5 步操作)。
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
device_id: 设备标识符,从 list_devices 获取
|
|
270
|
+
message: 子任务指令,例如 "打开微信"、"点击搜索按钮"
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
JSON 格式的执行结果,包含:
|
|
274
|
+
- result: 执行结果描述
|
|
275
|
+
- steps: 执行的步数
|
|
276
|
+
- success: 是否成功
|
|
277
|
+
"""
|
|
278
|
+
return await asyncio.to_thread(_sync_chat, device_id, message)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# ==================== Agent 初始化 ====================
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _setup_openai_client() -> AsyncOpenAI:
|
|
285
|
+
"""设置 OpenAI 客户端,使用 AutoGLM 的配置"""
|
|
286
|
+
config_manager.load_file_config()
|
|
287
|
+
effective_config = config_manager.get_effective_config()
|
|
288
|
+
|
|
289
|
+
if not effective_config.base_url:
|
|
290
|
+
raise ValueError("base_url not configured")
|
|
291
|
+
|
|
292
|
+
planner_model = get_planner_model()
|
|
293
|
+
logger.info(f"[LayeredAgent] API Base URL: {effective_config.base_url}")
|
|
294
|
+
logger.info(f"[LayeredAgent] Planner Model: {planner_model}")
|
|
295
|
+
|
|
296
|
+
return AsyncOpenAI(
|
|
297
|
+
base_url=effective_config.base_url,
|
|
298
|
+
api_key=effective_config.api_key,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _create_planner_agent(client: AsyncOpenAI) -> Agent[Any]:
|
|
303
|
+
"""创建规划 Agent,使用 Chat Completions API"""
|
|
304
|
+
planner_model = get_planner_model()
|
|
305
|
+
model = OpenAIChatCompletionsModel(
|
|
306
|
+
model=planner_model,
|
|
307
|
+
openai_client=client,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return Agent(
|
|
311
|
+
name="Planner",
|
|
312
|
+
instructions=PLANNER_INSTRUCTIONS,
|
|
313
|
+
model=model,
|
|
314
|
+
tools=[list_devices, chat],
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# Global agent instance (lazy initialized)
|
|
319
|
+
_client: AsyncOpenAI | None = None
|
|
320
|
+
_agent: Agent[Any] | None = None
|
|
321
|
+
_cached_config_hash: str | None = None
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _compute_config_hash() -> str:
|
|
325
|
+
import hashlib
|
|
326
|
+
|
|
327
|
+
config = config_manager.get_effective_config()
|
|
328
|
+
config_str = config.model_dump_json()
|
|
329
|
+
return hashlib.md5(config_str.encode()).hexdigest()
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _ensure_agent() -> Agent[Any]:
|
|
333
|
+
global _client, _agent, _cached_config_hash
|
|
334
|
+
|
|
335
|
+
current_hash = _compute_config_hash()
|
|
336
|
+
|
|
337
|
+
if _agent is None or _cached_config_hash != current_hash:
|
|
338
|
+
if _agent is not None and _cached_config_hash != current_hash:
|
|
339
|
+
logger.info(
|
|
340
|
+
f"[LayeredAgent] Config changed (hash: {_cached_config_hash} -> {current_hash}), reloading agent..."
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
_client = _setup_openai_client()
|
|
344
|
+
_agent = _create_planner_agent(_client)
|
|
345
|
+
_cached_config_hash = current_hash
|
|
346
|
+
logger.info(
|
|
347
|
+
f"[LayeredAgent] Agent initialized/reloaded with config hash: {current_hash}"
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return _agent
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ==================== API 路由 ====================
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
class LayeredAgentRequest(BaseModel):
|
|
357
|
+
"""Request for layered agent chat."""
|
|
358
|
+
|
|
359
|
+
message: str
|
|
360
|
+
device_id: str | None = None
|
|
361
|
+
session_id: str | None = None # 用于保持对话上下文,前端可传入 deviceId
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
@router.post("/api/layered-agent/chat")
|
|
365
|
+
async def layered_agent_chat(request: LayeredAgentRequest):
|
|
366
|
+
"""
|
|
367
|
+
Layered agent chat API with streaming execution steps.
|
|
368
|
+
|
|
369
|
+
Uses a decision model for planning and autoglm-phone for execution.
|
|
370
|
+
|
|
371
|
+
Returns SSE stream with events:
|
|
372
|
+
- tool_call: Agent is calling a tool (with tool_name and tool_args)
|
|
373
|
+
- tool_result: Tool execution result
|
|
374
|
+
- message: Intermediate message from agent
|
|
375
|
+
- done: Final response
|
|
376
|
+
- error: Error occurred
|
|
377
|
+
"""
|
|
378
|
+
from agents.stream_events import (
|
|
379
|
+
RawResponsesStreamEvent,
|
|
380
|
+
RunItemStreamEvent,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
async def event_generator():
|
|
384
|
+
try:
|
|
385
|
+
# Ensure agent is initialized
|
|
386
|
+
agent = _ensure_agent()
|
|
387
|
+
|
|
388
|
+
# 获取或创建 session 以保持对话上下文
|
|
389
|
+
# 优先使用 session_id,其次使用 device_id,最后使用默认值
|
|
390
|
+
session_id = request.session_id or request.device_id or "default"
|
|
391
|
+
session = _get_or_create_session(session_id)
|
|
392
|
+
|
|
393
|
+
# Run the agent with streaming and session for memory
|
|
394
|
+
result = Runner.run_streamed(
|
|
395
|
+
agent,
|
|
396
|
+
request.message,
|
|
397
|
+
max_turns=50,
|
|
398
|
+
session=session,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
# 保存活跃运行实例,用于 abort
|
|
402
|
+
with _active_runs_lock:
|
|
403
|
+
_active_runs[session_id] = result
|
|
404
|
+
|
|
405
|
+
current_tool_call: dict[str, Any] | None = None
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
async for event in result.stream_events():
|
|
409
|
+
if isinstance(event, RawResponsesStreamEvent):
|
|
410
|
+
# Raw response chunk - could contain thinking
|
|
411
|
+
pass
|
|
412
|
+
|
|
413
|
+
elif isinstance(event, RunItemStreamEvent):
|
|
414
|
+
item = event.item
|
|
415
|
+
|
|
416
|
+
# Handle different item types
|
|
417
|
+
item_type = getattr(item, "type", None)
|
|
418
|
+
|
|
419
|
+
if item_type == "tool_call_item":
|
|
420
|
+
# Tool call started - extract name from raw_item
|
|
421
|
+
tool_name = "unknown"
|
|
422
|
+
tool_args: dict[str, Any] = {}
|
|
423
|
+
|
|
424
|
+
# Try to get from raw_item
|
|
425
|
+
if hasattr(item, "raw_item") and item.raw_item:
|
|
426
|
+
raw = item.raw_item
|
|
427
|
+
|
|
428
|
+
# Handle dict format (sometimes returned as dict)
|
|
429
|
+
if isinstance(raw, dict):
|
|
430
|
+
tool_name = raw.get(
|
|
431
|
+
"name",
|
|
432
|
+
raw.get("function", {}).get("name", "unknown"),
|
|
433
|
+
)
|
|
434
|
+
args_str = raw.get(
|
|
435
|
+
"arguments",
|
|
436
|
+
raw.get("function", {}).get("arguments", "{}"),
|
|
437
|
+
)
|
|
438
|
+
try:
|
|
439
|
+
tool_args = (
|
|
440
|
+
json.loads(args_str)
|
|
441
|
+
if isinstance(args_str, str)
|
|
442
|
+
else args_str
|
|
443
|
+
)
|
|
444
|
+
except Exception:
|
|
445
|
+
tool_args = {"raw": str(args_str)}
|
|
446
|
+
else:
|
|
447
|
+
func = getattr(raw, "function", None)
|
|
448
|
+
if func:
|
|
449
|
+
tool_name = getattr(func, "name", "unknown")
|
|
450
|
+
args_val = getattr(func, "arguments", None)
|
|
451
|
+
if args_val:
|
|
452
|
+
try:
|
|
453
|
+
tool_args = (
|
|
454
|
+
json.loads(args_val)
|
|
455
|
+
if isinstance(args_val, str)
|
|
456
|
+
else args_val
|
|
457
|
+
)
|
|
458
|
+
except Exception:
|
|
459
|
+
tool_args = {"raw": str(args_val)}
|
|
460
|
+
else:
|
|
461
|
+
name_val = getattr(raw, "name", None)
|
|
462
|
+
if name_val:
|
|
463
|
+
tool_name = name_val
|
|
464
|
+
args_val = getattr(raw, "arguments", None)
|
|
465
|
+
if args_val:
|
|
466
|
+
try:
|
|
467
|
+
tool_args = (
|
|
468
|
+
json.loads(args_val)
|
|
469
|
+
if isinstance(args_val, str)
|
|
470
|
+
else args_val
|
|
471
|
+
)
|
|
472
|
+
except Exception:
|
|
473
|
+
tool_args = {"raw": str(args_val)}
|
|
474
|
+
|
|
475
|
+
# Fallback to direct item attributes
|
|
476
|
+
if tool_name == "unknown":
|
|
477
|
+
if hasattr(item, "name") and item.name:
|
|
478
|
+
tool_name = item.name
|
|
479
|
+
elif hasattr(item, "call") and item.call:
|
|
480
|
+
call = item.call
|
|
481
|
+
if hasattr(call, "function") and call.function:
|
|
482
|
+
if hasattr(call.function, "name"):
|
|
483
|
+
tool_name = call.function.name
|
|
484
|
+
if hasattr(call.function, "arguments"):
|
|
485
|
+
try:
|
|
486
|
+
tool_args = (
|
|
487
|
+
json.loads(call.function.arguments)
|
|
488
|
+
if isinstance(
|
|
489
|
+
call.function.arguments, str
|
|
490
|
+
)
|
|
491
|
+
else call.function.arguments
|
|
492
|
+
)
|
|
493
|
+
except Exception:
|
|
494
|
+
tool_args = {
|
|
495
|
+
"raw": str(call.function.arguments)
|
|
496
|
+
}
|
|
497
|
+
elif hasattr(call, "name"):
|
|
498
|
+
tool_name = call.name
|
|
499
|
+
if hasattr(call, "arguments"):
|
|
500
|
+
try:
|
|
501
|
+
tool_args = (
|
|
502
|
+
json.loads(call.arguments)
|
|
503
|
+
if isinstance(call.arguments, str)
|
|
504
|
+
else call.arguments
|
|
505
|
+
)
|
|
506
|
+
except Exception:
|
|
507
|
+
tool_args = {"raw": str(call.arguments)}
|
|
508
|
+
|
|
509
|
+
logger.info(
|
|
510
|
+
f"[LayeredAgent] Tool call: {tool_name}, args keys: {list(tool_args.keys()) if isinstance(tool_args, dict) else 'not dict'}"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
current_tool_call = {
|
|
514
|
+
"name": tool_name,
|
|
515
|
+
"args": tool_args,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
event_data = {
|
|
519
|
+
"type": "tool_call",
|
|
520
|
+
"tool_name": tool_name,
|
|
521
|
+
"tool_args": tool_args,
|
|
522
|
+
}
|
|
523
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
524
|
+
|
|
525
|
+
elif item_type == "tool_call_output_item":
|
|
526
|
+
# Tool call result
|
|
527
|
+
output = getattr(item, "output", "")
|
|
528
|
+
|
|
529
|
+
# Get tool name from current_tool_call or try to extract from item
|
|
530
|
+
tool_name = (
|
|
531
|
+
current_tool_call["name"]
|
|
532
|
+
if current_tool_call
|
|
533
|
+
else "unknown"
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
raw_item = getattr(item, "raw_item", None)
|
|
537
|
+
if tool_name == "unknown" and raw_item:
|
|
538
|
+
name_val = getattr(raw_item, "name", None)
|
|
539
|
+
if name_val:
|
|
540
|
+
tool_name = name_val
|
|
541
|
+
|
|
542
|
+
logger.info(
|
|
543
|
+
f"[LayeredAgent] Tool result for {tool_name}: {str(output)[:100] if output else 'empty'}..."
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
event_data = {
|
|
547
|
+
"type": "tool_result",
|
|
548
|
+
"tool_name": tool_name,
|
|
549
|
+
"result": output,
|
|
550
|
+
}
|
|
551
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
552
|
+
current_tool_call = None
|
|
553
|
+
|
|
554
|
+
elif item_type == "message_output_item":
|
|
555
|
+
content = ""
|
|
556
|
+
raw_item = getattr(item, "raw_item", None)
|
|
557
|
+
if raw_item:
|
|
558
|
+
raw_content = getattr(raw_item, "content", None)
|
|
559
|
+
if raw_content:
|
|
560
|
+
for c in raw_content:
|
|
561
|
+
text_val = getattr(c, "text", None)
|
|
562
|
+
if text_val:
|
|
563
|
+
content += text_val
|
|
564
|
+
|
|
565
|
+
if content:
|
|
566
|
+
event_data = {
|
|
567
|
+
"type": "message",
|
|
568
|
+
"content": content,
|
|
569
|
+
}
|
|
570
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
571
|
+
|
|
572
|
+
finally:
|
|
573
|
+
# 清理活跃运行实例
|
|
574
|
+
with _active_runs_lock:
|
|
575
|
+
_active_runs.pop(session_id, None)
|
|
576
|
+
|
|
577
|
+
# Final result
|
|
578
|
+
final_output = (
|
|
579
|
+
result.final_output if hasattr(result, "final_output") else ""
|
|
580
|
+
)
|
|
581
|
+
event_data = {
|
|
582
|
+
"type": "done",
|
|
583
|
+
"content": final_output,
|
|
584
|
+
"success": True,
|
|
585
|
+
}
|
|
586
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
587
|
+
|
|
588
|
+
except Exception as e:
|
|
589
|
+
logger.exception(f"[LayeredAgent] Error: {e}")
|
|
590
|
+
event_data = {
|
|
591
|
+
"type": "error",
|
|
592
|
+
"message": str(e),
|
|
593
|
+
}
|
|
594
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
595
|
+
|
|
596
|
+
return StreamingResponse(
|
|
597
|
+
event_generator(),
|
|
598
|
+
media_type="text/event-stream",
|
|
599
|
+
headers={
|
|
600
|
+
"Cache-Control": "no-cache",
|
|
601
|
+
"Connection": "keep-alive",
|
|
602
|
+
"X-Accel-Buffering": "no",
|
|
603
|
+
},
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class AbortSessionRequest(BaseModel):
|
|
608
|
+
"""Request for aborting a running session."""
|
|
609
|
+
|
|
610
|
+
session_id: str
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
@router.post("/api/layered-agent/abort")
|
|
614
|
+
def abort_session(request: AbortSessionRequest):
|
|
615
|
+
"""
|
|
616
|
+
Abort a running layered agent session.
|
|
617
|
+
|
|
618
|
+
Uses the OpenAI agents SDK's native cancel() method to stop execution.
|
|
619
|
+
"""
|
|
620
|
+
session_id = request.session_id
|
|
621
|
+
|
|
622
|
+
with _active_runs_lock:
|
|
623
|
+
if session_id in _active_runs:
|
|
624
|
+
result = _active_runs[session_id]
|
|
625
|
+
result.cancel(mode="immediate")
|
|
626
|
+
logger.info(f"[LayeredAgent] Aborted session: {session_id}")
|
|
627
|
+
return {
|
|
628
|
+
"success": True,
|
|
629
|
+
"message": f"Session {session_id} abort signal sent",
|
|
630
|
+
}
|
|
631
|
+
else:
|
|
632
|
+
logger.warning(
|
|
633
|
+
f"[LayeredAgent] No active run found for session: {session_id}"
|
|
634
|
+
)
|
|
635
|
+
return {
|
|
636
|
+
"success": False,
|
|
637
|
+
"message": f"No active run found for session {session_id}",
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
class ResetSessionRequest(BaseModel):
|
|
642
|
+
"""Request for resetting a session."""
|
|
643
|
+
|
|
644
|
+
session_id: str
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
@router.post("/api/layered-agent/reset")
|
|
648
|
+
def reset_session(request: ResetSessionRequest):
|
|
649
|
+
"""
|
|
650
|
+
Reset/clear a session to forget conversation history.
|
|
651
|
+
|
|
652
|
+
This should be called when the user clicks "reset" button
|
|
653
|
+
or refreshes the page.
|
|
654
|
+
"""
|
|
655
|
+
cleared = _clear_session(request.session_id)
|
|
656
|
+
return {
|
|
657
|
+
"success": True,
|
|
658
|
+
"message": f"Session {request.session_id} {'cleared' if cleared else 'not found (already empty)'}",
|
|
659
|
+
}
|