autoglm-gui 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AutoGLM_GUI/api/__init__.py +10 -4
- AutoGLM_GUI/api/agents.py +0 -2
- AutoGLM_GUI/api/dual_model.py +9 -8
- AutoGLM_GUI/api/layered_agent.py +586 -0
- AutoGLM_GUI/config_manager.py +2 -24
- AutoGLM_GUI/dual_model/protocols.py +3 -3
- AutoGLM_GUI/mai_ui_adapter/agent_wrapper.py +291 -0
- AutoGLM_GUI/prompts.py +6 -1
- AutoGLM_GUI/schemas.py +0 -16
- AutoGLM_GUI/static/assets/{about-CrBXGOgB.js → about-DeclntHg.js} +1 -1
- AutoGLM_GUI/static/assets/chat-Iut2yhSw.js +125 -0
- AutoGLM_GUI/static/assets/{dialog-CHJSPLHJ.js → dialog-BfdcBs1x.js} +1 -1
- AutoGLM_GUI/static/assets/index-5hCCwHA7.css +1 -0
- AutoGLM_GUI/static/assets/{index-Dt7cVkfR.js → index-DHF1NZh0.js} +2 -2
- AutoGLM_GUI/static/assets/{index-9IaIXvyy.js → index-zQ4KKDHt.js} +1 -1
- AutoGLM_GUI/static/assets/{workflows-DHadKApI.js → workflows-xiplap-r.js} +1 -1
- AutoGLM_GUI/static/index.html +2 -2
- {autoglm_gui-1.3.0.dist-info → autoglm_gui-1.4.0.dist-info}/METADATA +89 -8
- {autoglm_gui-1.3.0.dist-info → autoglm_gui-1.4.0.dist-info}/RECORD +24 -22
- phone_agent/config/prompts.py +6 -1
- phone_agent/config/prompts_zh.py +6 -1
- AutoGLM_GUI/static/assets/chat-Di2fwu8V.js +0 -124
- AutoGLM_GUI/static/assets/index-Z0uYCPOO.css +0 -1
- {autoglm_gui-1.3.0.dist-info → autoglm_gui-1.4.0.dist-info}/WHEEL +0 -0
- {autoglm_gui-1.3.0.dist-info → autoglm_gui-1.4.0.dist-info}/entry_points.txt +0 -0
- {autoglm_gui-1.3.0.dist-info → autoglm_gui-1.4.0.dist-info}/licenses/LICENSE +0 -0
AutoGLM_GUI/api/__init__.py
CHANGED
|
@@ -19,6 +19,7 @@ from . import (
|
|
|
19
19
|
control,
|
|
20
20
|
devices,
|
|
21
21
|
dual_model,
|
|
22
|
+
layered_agent,
|
|
22
23
|
mcp,
|
|
23
24
|
media,
|
|
24
25
|
metrics,
|
|
@@ -89,6 +90,7 @@ def create_app() -> FastAPI:
|
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
app.include_router(agents.router)
|
|
93
|
+
app.include_router(layered_agent.router)
|
|
92
94
|
app.include_router(devices.router)
|
|
93
95
|
app.include_router(control.router)
|
|
94
96
|
app.include_router(media.router)
|
|
@@ -97,9 +99,9 @@ def create_app() -> FastAPI:
|
|
|
97
99
|
app.include_router(workflows.router)
|
|
98
100
|
app.include_router(dual_model.router)
|
|
99
101
|
|
|
100
|
-
# Mount
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
# Mount static files BEFORE MCP to ensure they have priority
|
|
103
|
+
# This is critical: FastAPI processes mounts in order, so static files
|
|
104
|
+
# must be mounted before the catch-all MCP mount
|
|
103
105
|
static_dir = _get_static_dir()
|
|
104
106
|
if static_dir is not None and static_dir.exists():
|
|
105
107
|
assets_dir = static_dir / "assets"
|
|
@@ -113,11 +115,15 @@ def create_app() -> FastAPI:
|
|
|
113
115
|
return FileResponse(file_path)
|
|
114
116
|
return FileResponse(static_dir / "index.html")
|
|
115
117
|
|
|
116
|
-
# Add catch-all route
|
|
118
|
+
# Add catch-all route for SPA (handles all non-API routes)
|
|
117
119
|
app.add_api_route(
|
|
118
120
|
"/{full_path:path}", serve_spa, methods=["GET"], include_in_schema=False
|
|
119
121
|
)
|
|
120
122
|
|
|
123
|
+
# Mount MCP server at root (mcp_app already has /mcp path prefix)
|
|
124
|
+
# This must be AFTER static files to avoid intercepting them
|
|
125
|
+
app.mount("/", mcp_app)
|
|
126
|
+
|
|
121
127
|
return app
|
|
122
128
|
|
|
123
129
|
|
AutoGLM_GUI/api/agents.py
CHANGED
|
@@ -460,7 +460,6 @@ def get_config_endpoint() -> ConfigResponse:
|
|
|
460
460
|
decision_api_key=effective_config.decision_api_key
|
|
461
461
|
if effective_config.decision_api_key
|
|
462
462
|
else "",
|
|
463
|
-
thinking_mode=effective_config.thinking_mode,
|
|
464
463
|
conflicts=[
|
|
465
464
|
{
|
|
466
465
|
"field": c.field,
|
|
@@ -497,7 +496,6 @@ def save_config_endpoint(request: ConfigSaveRequest) -> dict:
|
|
|
497
496
|
decision_base_url=request.decision_base_url,
|
|
498
497
|
decision_model_name=request.decision_model_name,
|
|
499
498
|
decision_api_key=request.decision_api_key,
|
|
500
|
-
thinking_mode=request.thinking_mode,
|
|
501
499
|
merge_mode=True,
|
|
502
500
|
)
|
|
503
501
|
|
AutoGLM_GUI/api/dual_model.py
CHANGED
|
@@ -30,19 +30,17 @@ class DualModelInitRequest(BaseModel):
|
|
|
30
30
|
device_id: str
|
|
31
31
|
|
|
32
32
|
# 决策大模型配置
|
|
33
|
-
decision_base_url: str
|
|
33
|
+
decision_base_url: str
|
|
34
34
|
decision_api_key: str
|
|
35
|
-
decision_model_name: str
|
|
35
|
+
decision_model_name: str
|
|
36
36
|
|
|
37
37
|
# 视觉小模型配置(复用现有配置)
|
|
38
38
|
vision_base_url: Optional[str] = None
|
|
39
39
|
vision_api_key: Optional[str] = None
|
|
40
40
|
vision_model_name: Optional[str] = None
|
|
41
41
|
|
|
42
|
-
# 思考模式: fast 或 deep
|
|
43
|
-
thinking_mode: str = "deep"
|
|
44
|
-
|
|
45
42
|
max_steps: int = 50
|
|
43
|
+
thinking_mode: str = "deep" # fast, deep, turbo
|
|
46
44
|
|
|
47
45
|
|
|
48
46
|
class DualModelChatRequest(BaseModel):
|
|
@@ -73,9 +71,12 @@ def init_dual_model(request: DualModelInitRequest) -> dict:
|
|
|
73
71
|
from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
|
|
74
72
|
|
|
75
73
|
device_id = request.device_id
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
74
|
+
thinking_mode_map = {
|
|
75
|
+
"fast": ThinkingMode.FAST,
|
|
76
|
+
"deep": ThinkingMode.DEEP,
|
|
77
|
+
"turbo": ThinkingMode.TURBO,
|
|
78
|
+
}
|
|
79
|
+
thinking_mode = thinking_mode_map.get(request.thinking_mode, ThinkingMode.DEEP)
|
|
79
80
|
logger.info(f"初始化双模型Agent: {device_id}, 模式: {thinking_mode.value}")
|
|
80
81
|
|
|
81
82
|
# 检查设备是否已有单模型Agent初始化
|
|
@@ -0,0 +1,586 @@
|
|
|
1
|
+
"""Layered agent API for hierarchical task execution.
|
|
2
|
+
|
|
3
|
+
This module provides the layered agent API endpoint that uses
|
|
4
|
+
a decision model for planning and autoglm-phone for execution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from agents import Agent, Runner, SQLiteSession, function_tool
|
|
12
|
+
from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel
|
|
13
|
+
from fastapi import APIRouter
|
|
14
|
+
from fastapi.responses import StreamingResponse
|
|
15
|
+
from openai import AsyncOpenAI
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
from AutoGLM_GUI.config_manager import config_manager
|
|
19
|
+
from AutoGLM_GUI.logger import logger
|
|
20
|
+
|
|
21
|
+
router = APIRouter()
|
|
22
|
+
|
|
23
|
+
# ==================== Session 管理 ====================
|
|
24
|
+
# 存储每个 session_id 对应的 SQLiteSession(内存模式)
|
|
25
|
+
_sessions: dict[str, SQLiteSession] = {}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get_or_create_session(session_id: str) -> SQLiteSession:
|
|
29
|
+
"""获取或创建指定 session_id 的内存 session."""
|
|
30
|
+
if session_id not in _sessions:
|
|
31
|
+
# 使用 session_id 作为会话名称创建 session
|
|
32
|
+
_sessions[session_id] = SQLiteSession(session_id)
|
|
33
|
+
logger.info(f"[LayeredAgent] Created new session: {session_id}")
|
|
34
|
+
return _sessions[session_id]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _clear_session(session_id: str) -> bool:
|
|
38
|
+
"""清除指定 session_id 的 session."""
|
|
39
|
+
if session_id in _sessions:
|
|
40
|
+
del _sessions[session_id]
|
|
41
|
+
logger.info(f"[LayeredAgent] Cleared session: {session_id}")
|
|
42
|
+
return True
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_planner_model() -> str:
|
|
47
|
+
"""获取规划层使用的模型名称,从配置读取."""
|
|
48
|
+
config = config_manager.get_effective_config()
|
|
49
|
+
return config.decision_model_name or "glm-4.7"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
PLANNER_INSTRUCTIONS = """## 核心目标
|
|
53
|
+
你是一个负责操控手机的高级智能中枢。你的任务是将用户的意图转化为**视觉模型(Vision Model)**可以执行的原子操作。
|
|
54
|
+
|
|
55
|
+
## ⚠️ 极其重要的限制:视觉模型的能力边界 (Must Read)
|
|
56
|
+
你的下级(Vision Model)是一个**纯粹的执行者和观察者**。
|
|
57
|
+
1. **无"记忆/笔记"功能**:它没有 `Note` 功能,无法为你保存数据。
|
|
58
|
+
2. **无"系统级"权限**:它不能复制源代码,不能直接提取文本,不能读取剪贴板。
|
|
59
|
+
3. **唯一的输出**:它只能通过**对话**告诉你它看到了什么,或者去**点击/滑动**屏幕。
|
|
60
|
+
|
|
61
|
+
## 交互策略 (Interaction Strategy)
|
|
62
|
+
|
|
63
|
+
### 1. 如果你需要"操作手机" (To Act)
|
|
64
|
+
下达明确的 UI 动作指令。
|
|
65
|
+
- ✅ "点击'设置'图标。"
|
|
66
|
+
- ✅ "向下滑动屏幕。"
|
|
67
|
+
- ✅ "打开微信。"
|
|
68
|
+
|
|
69
|
+
### 2. 如果你需要"获取信息" (To Read/Extract)
|
|
70
|
+
你必须通过**提问**的方式,让视觉模型在对话中把信息"念"给你听。
|
|
71
|
+
- ❌ **错误**: "把验证码保存下来。" (它做不到)
|
|
72
|
+
- ❌ **错误**: "使用 Note 功能记录价格。" (它没有这个功能)
|
|
73
|
+
- ✅ **正确**: 调用 `chat` 询问:"请看屏幕,告诉我现在的订单总金额是多少?"
|
|
74
|
+
- *结果*: 视觉模型会回复 "25.5元"。你需要自己处理这个文本信息。
|
|
75
|
+
|
|
76
|
+
### 3. 如果用户要求"复制/粘贴"
|
|
77
|
+
必须通过模拟手指操作来实现,不能直接操作剪贴板。
|
|
78
|
+
- ✅ **正确**: "长按这段文字,等待弹出菜单,然后点击'复制'按钮。"
|
|
79
|
+
|
|
80
|
+
## 任务拆解原则 (Decomposition Rules)
|
|
81
|
+
|
|
82
|
+
1. **原子化**: 每次只给一个动作。
|
|
83
|
+
2. **可视化**: 指令必须基于屏幕上**看得见**的元素。不要说"点击确认",如果屏幕上显示的按钮叫"OK",请说"点击'OK'按钮"。
|
|
84
|
+
3. **Fail Fast**: 如果视觉模型回复 `ELEMENT_NOT_FOUND`,不要死循环。询问它:"那现在屏幕上有什么?"或者尝试滑动寻找。
|
|
85
|
+
|
|
86
|
+
## 核心工作流 (The Loop)
|
|
87
|
+
1. **Observe (看)**: 调用 `chat` 询问当前状态。
|
|
88
|
+
- "现在屏幕上显示什么?" / "刚才的点击生效了吗?"
|
|
89
|
+
2. **Think (想)**:
|
|
90
|
+
- 用户的目标是什么?
|
|
91
|
+
- 我需要让视觉模型**做什么动作**,还是**回答什么问题**?
|
|
92
|
+
3. **Act (做)**:
|
|
93
|
+
- **Case A (动作)**: 发送指令 `点击[坐标]...`
|
|
94
|
+
- **Case B (询问)**: 发送问题 `请读取...`
|
|
95
|
+
|
|
96
|
+
## 内部思维链示例 (Inner Monologue)
|
|
97
|
+
|
|
98
|
+
**场景 1: 用户让你"把这篇笔记的标题发给我"**
|
|
99
|
+
> **Current State**: 笔记详情页。
|
|
100
|
+
> **Goal**: 获取标题文本。
|
|
101
|
+
> **Constraint**: 视觉模型无法直接提取变量,我必须问它。
|
|
102
|
+
> **Strategy**: 问视觉模型标题是什么,它回答后,我再反馈给用户。
|
|
103
|
+
> **Next Action**: 提问。
|
|
104
|
+
**Output**: `chat(id, "请读取并告诉我屏幕上这篇笔记的标题文字内容是什么?")`
|
|
105
|
+
|
|
106
|
+
**场景 2: 用户让你"复制链接"**
|
|
107
|
+
> **Current State**: 详情页。
|
|
108
|
+
> **Goal**: 把链接复制到系统剪贴板。
|
|
109
|
+
> **Constraint**: 不能直接 Get Link。必须找"分享"或"复制"按钮。
|
|
110
|
+
> **Strategy**: 先点右上角菜单,再找复制链接。
|
|
111
|
+
> **Next Action**: 点击菜单。
|
|
112
|
+
**Output**: `chat(id, "点击屏幕右上角的'...'(三个点)菜单按钮。")`
|
|
113
|
+
|
|
114
|
+
## 工具集 (Tools)
|
|
115
|
+
1. `list_devices()`
|
|
116
|
+
2. `chat(device_id, message)`:
|
|
117
|
+
- 发送操作指令(如"点击红色按钮")。
|
|
118
|
+
- 发送查询问题(如"那个验证码是多少?")。
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ==================== 工具定义 ====================
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _sync_list_devices() -> str:
|
|
127
|
+
"""同步实现:获取所有连接的 ADB 设备列表。"""
|
|
128
|
+
from AutoGLM_GUI.api.devices import _build_device_response_with_agent
|
|
129
|
+
from AutoGLM_GUI.device_manager import DeviceManager
|
|
130
|
+
from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
|
|
131
|
+
|
|
132
|
+
logger.info("[LayeredAgent] list_devices tool called")
|
|
133
|
+
|
|
134
|
+
device_manager = DeviceManager.get_instance()
|
|
135
|
+
agent_manager = PhoneAgentManager.get_instance()
|
|
136
|
+
|
|
137
|
+
# 如果轮询未启动,执行同步刷新
|
|
138
|
+
if not device_manager._poll_thread or not device_manager._poll_thread.is_alive():
|
|
139
|
+
logger.warning("Polling not started, performing sync refresh")
|
|
140
|
+
device_manager.force_refresh()
|
|
141
|
+
|
|
142
|
+
managed_devices = device_manager.get_devices()
|
|
143
|
+
|
|
144
|
+
# 构建设备响应
|
|
145
|
+
devices_with_agents = [
|
|
146
|
+
_build_device_response_with_agent(d, agent_manager) for d in managed_devices
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
return json.dumps(devices_with_agents, ensure_ascii=False, indent=2)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@function_tool
|
|
153
|
+
async def list_devices() -> str:
|
|
154
|
+
"""
|
|
155
|
+
获取所有连接的 ADB 设备列表。
|
|
156
|
+
|
|
157
|
+
返回设备信息包括:
|
|
158
|
+
- id: 设备标识符,用于 chat 工具调用
|
|
159
|
+
- model: 设备型号
|
|
160
|
+
- status: 连接状态
|
|
161
|
+
- connection_type: 连接类型 (usb/wifi/remote)
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
JSON 格式的设备列表
|
|
165
|
+
"""
|
|
166
|
+
return await asyncio.to_thread(_sync_list_devices)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _sync_chat(device_id: str, message: str) -> str:
|
|
170
|
+
"""同步实现:向指定设备的 Phone Agent 发送子任务指令。"""
|
|
171
|
+
from AutoGLM_GUI.exceptions import DeviceBusyError
|
|
172
|
+
from AutoGLM_GUI.phone_agent_manager import PhoneAgentManager
|
|
173
|
+
from AutoGLM_GUI.prompts import MCP_SYSTEM_PROMPT_ZH
|
|
174
|
+
|
|
175
|
+
MCP_MAX_STEPS = 5
|
|
176
|
+
|
|
177
|
+
logger.info(
|
|
178
|
+
f"[LayeredAgent] chat tool called: device_id={device_id}, message={message}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
manager = PhoneAgentManager.get_instance()
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
# use_agent 现在会自动初始化 agent(auto_initialize=True)
|
|
185
|
+
with manager.use_agent(device_id, timeout=None) as agent:
|
|
186
|
+
# 临时覆盖配置
|
|
187
|
+
original_max_steps = agent.agent_config.max_steps
|
|
188
|
+
original_system_prompt = agent.agent_config.system_prompt
|
|
189
|
+
|
|
190
|
+
agent.agent_config.max_steps = MCP_MAX_STEPS
|
|
191
|
+
agent.agent_config.system_prompt = MCP_SYSTEM_PROMPT_ZH
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
# 重置 agent 确保干净状态
|
|
195
|
+
agent.reset()
|
|
196
|
+
|
|
197
|
+
result = agent.run(message)
|
|
198
|
+
steps = agent.step_count
|
|
199
|
+
|
|
200
|
+
# 检查是否达到步数限制
|
|
201
|
+
if steps >= MCP_MAX_STEPS and result == "Max steps reached":
|
|
202
|
+
context_json = json.dumps(
|
|
203
|
+
agent.context, ensure_ascii=False, indent=2
|
|
204
|
+
)
|
|
205
|
+
return json.dumps(
|
|
206
|
+
{
|
|
207
|
+
"result": f"⚠️ 已达到最大步数限制({MCP_MAX_STEPS}步)。视觉模型可能遇到了困难,任务未完成。\n\n执行历史:\n{context_json}\n\n建议: 请重新规划任务或将其拆分为更小的子任务。",
|
|
208
|
+
"steps": MCP_MAX_STEPS,
|
|
209
|
+
"success": False,
|
|
210
|
+
},
|
|
211
|
+
ensure_ascii=False,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return json.dumps(
|
|
215
|
+
{
|
|
216
|
+
"result": result,
|
|
217
|
+
"steps": steps,
|
|
218
|
+
"success": True,
|
|
219
|
+
},
|
|
220
|
+
ensure_ascii=False,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
finally:
|
|
224
|
+
# 恢复原始配置
|
|
225
|
+
agent.agent_config.max_steps = original_max_steps
|
|
226
|
+
agent.agent_config.system_prompt = original_system_prompt
|
|
227
|
+
|
|
228
|
+
except DeviceBusyError:
|
|
229
|
+
return json.dumps(
|
|
230
|
+
{
|
|
231
|
+
"result": f"设备 {device_id} 正忙,请稍后再试。",
|
|
232
|
+
"steps": 0,
|
|
233
|
+
"success": False,
|
|
234
|
+
},
|
|
235
|
+
ensure_ascii=False,
|
|
236
|
+
)
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.error(f"[LayeredAgent] chat tool error: {e}")
|
|
239
|
+
return json.dumps(
|
|
240
|
+
{
|
|
241
|
+
"result": str(e),
|
|
242
|
+
"steps": 0,
|
|
243
|
+
"success": False,
|
|
244
|
+
},
|
|
245
|
+
ensure_ascii=False,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@function_tool
|
|
250
|
+
async def chat(device_id: str, message: str) -> str:
|
|
251
|
+
"""
|
|
252
|
+
向指定设备的 Phone Agent 发送子任务指令。
|
|
253
|
+
|
|
254
|
+
Phone Agent 是一个视觉模型,能够看到手机屏幕并执行操作。
|
|
255
|
+
每次调用会执行一个原子化的子任务(最多 5 步操作)。
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
device_id: 设备标识符,从 list_devices 获取
|
|
259
|
+
message: 子任务指令,例如 "打开微信"、"点击搜索按钮"
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
JSON 格式的执行结果,包含:
|
|
263
|
+
- result: 执行结果描述
|
|
264
|
+
- steps: 执行的步数
|
|
265
|
+
- success: 是否成功
|
|
266
|
+
"""
|
|
267
|
+
return await asyncio.to_thread(_sync_chat, device_id, message)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# ==================== Agent 初始化 ====================
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _setup_openai_client() -> AsyncOpenAI:
|
|
274
|
+
"""设置 OpenAI 客户端,使用 AutoGLM 的配置"""
|
|
275
|
+
config_manager.load_file_config()
|
|
276
|
+
effective_config = config_manager.get_effective_config()
|
|
277
|
+
|
|
278
|
+
if not effective_config.base_url:
|
|
279
|
+
raise ValueError("base_url not configured")
|
|
280
|
+
|
|
281
|
+
planner_model = get_planner_model()
|
|
282
|
+
logger.info(f"[LayeredAgent] API Base URL: {effective_config.base_url}")
|
|
283
|
+
logger.info(f"[LayeredAgent] Planner Model: {planner_model}")
|
|
284
|
+
|
|
285
|
+
return AsyncOpenAI(
|
|
286
|
+
base_url=effective_config.base_url,
|
|
287
|
+
api_key=effective_config.api_key,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _create_planner_agent(client: AsyncOpenAI) -> Agent[Any]:
|
|
292
|
+
"""创建规划 Agent,使用 Chat Completions API"""
|
|
293
|
+
planner_model = get_planner_model()
|
|
294
|
+
model = OpenAIChatCompletionsModel(
|
|
295
|
+
model=planner_model,
|
|
296
|
+
openai_client=client,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
return Agent(
|
|
300
|
+
name="Planner",
|
|
301
|
+
instructions=PLANNER_INSTRUCTIONS,
|
|
302
|
+
model=model,
|
|
303
|
+
tools=[list_devices, chat],
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# Global agent instance (lazy initialized)
|
|
308
|
+
_client: AsyncOpenAI | None = None
|
|
309
|
+
_agent: Agent[Any] | None = None
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _ensure_agent() -> Agent[Any]:
|
|
313
|
+
"""Ensure the planner agent is initialized."""
|
|
314
|
+
global _client, _agent
|
|
315
|
+
if _agent is None:
|
|
316
|
+
_client = _setup_openai_client()
|
|
317
|
+
_agent = _create_planner_agent(_client)
|
|
318
|
+
return _agent
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# ==================== API 路由 ====================
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class LayeredAgentRequest(BaseModel):
|
|
325
|
+
"""Request for layered agent chat."""
|
|
326
|
+
|
|
327
|
+
message: str
|
|
328
|
+
device_id: str | None = None
|
|
329
|
+
session_id: str | None = None # 用于保持对话上下文,前端可传入 deviceId
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
@router.post("/api/layered-agent/chat")
|
|
333
|
+
async def layered_agent_chat(request: LayeredAgentRequest):
|
|
334
|
+
"""
|
|
335
|
+
Layered agent chat API with streaming execution steps.
|
|
336
|
+
|
|
337
|
+
Uses a decision model for planning and autoglm-phone for execution.
|
|
338
|
+
|
|
339
|
+
Returns SSE stream with events:
|
|
340
|
+
- tool_call: Agent is calling a tool (with tool_name and tool_args)
|
|
341
|
+
- tool_result: Tool execution result
|
|
342
|
+
- message: Intermediate message from agent
|
|
343
|
+
- done: Final response
|
|
344
|
+
- error: Error occurred
|
|
345
|
+
"""
|
|
346
|
+
from agents.stream_events import (
|
|
347
|
+
RawResponsesStreamEvent,
|
|
348
|
+
RunItemStreamEvent,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
async def event_generator():
|
|
352
|
+
try:
|
|
353
|
+
# Ensure agent is initialized
|
|
354
|
+
agent = _ensure_agent()
|
|
355
|
+
|
|
356
|
+
# 获取或创建 session 以保持对话上下文
|
|
357
|
+
# 优先使用 session_id,其次使用 device_id,最后使用默认值
|
|
358
|
+
session_id = request.session_id or request.device_id or "default"
|
|
359
|
+
session = _get_or_create_session(session_id)
|
|
360
|
+
|
|
361
|
+
# Run the agent with streaming and session for memory
|
|
362
|
+
result = Runner.run_streamed(
|
|
363
|
+
agent,
|
|
364
|
+
request.message,
|
|
365
|
+
max_turns=50,
|
|
366
|
+
session=session,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
current_tool_call: dict[str, Any] | None = None
|
|
370
|
+
|
|
371
|
+
async for event in result.stream_events():
|
|
372
|
+
if isinstance(event, RawResponsesStreamEvent):
|
|
373
|
+
# Raw response chunk - could contain thinking
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
elif isinstance(event, RunItemStreamEvent):
|
|
377
|
+
item = event.item
|
|
378
|
+
|
|
379
|
+
# Handle different item types
|
|
380
|
+
item_type = getattr(item, "type", None)
|
|
381
|
+
|
|
382
|
+
if item_type == "tool_call_item":
|
|
383
|
+
# Tool call started - extract name from raw_item
|
|
384
|
+
tool_name = "unknown"
|
|
385
|
+
tool_args: dict[str, Any] = {}
|
|
386
|
+
|
|
387
|
+
# Try to get from raw_item
|
|
388
|
+
if hasattr(item, "raw_item") and item.raw_item:
|
|
389
|
+
raw = item.raw_item
|
|
390
|
+
|
|
391
|
+
# Handle dict format (sometimes returned as dict)
|
|
392
|
+
if isinstance(raw, dict):
|
|
393
|
+
tool_name = raw.get(
|
|
394
|
+
"name",
|
|
395
|
+
raw.get("function", {}).get("name", "unknown"),
|
|
396
|
+
)
|
|
397
|
+
args_str = raw.get(
|
|
398
|
+
"arguments",
|
|
399
|
+
raw.get("function", {}).get("arguments", "{}"),
|
|
400
|
+
)
|
|
401
|
+
try:
|
|
402
|
+
tool_args = (
|
|
403
|
+
json.loads(args_str)
|
|
404
|
+
if isinstance(args_str, str)
|
|
405
|
+
else args_str
|
|
406
|
+
)
|
|
407
|
+
except Exception:
|
|
408
|
+
tool_args = {"raw": str(args_str)}
|
|
409
|
+
else:
|
|
410
|
+
# Chat Completions API format: raw_item.function.name
|
|
411
|
+
if hasattr(raw, "function") and raw.function:
|
|
412
|
+
func = raw.function
|
|
413
|
+
if hasattr(func, "name"):
|
|
414
|
+
tool_name = func.name
|
|
415
|
+
if hasattr(func, "arguments"):
|
|
416
|
+
try:
|
|
417
|
+
tool_args = (
|
|
418
|
+
json.loads(func.arguments)
|
|
419
|
+
if isinstance(func.arguments, str)
|
|
420
|
+
else func.arguments
|
|
421
|
+
)
|
|
422
|
+
except Exception:
|
|
423
|
+
tool_args = {"raw": str(func.arguments)}
|
|
424
|
+
# Responses API format: raw_item.name directly
|
|
425
|
+
elif hasattr(raw, "name") and raw.name:
|
|
426
|
+
tool_name = raw.name
|
|
427
|
+
if hasattr(raw, "arguments"):
|
|
428
|
+
try:
|
|
429
|
+
tool_args = (
|
|
430
|
+
json.loads(raw.arguments)
|
|
431
|
+
if isinstance(raw.arguments, str)
|
|
432
|
+
else raw.arguments
|
|
433
|
+
)
|
|
434
|
+
except Exception:
|
|
435
|
+
tool_args = {"raw": str(raw.arguments)}
|
|
436
|
+
|
|
437
|
+
# Fallback to direct item attributes
|
|
438
|
+
if tool_name == "unknown":
|
|
439
|
+
if hasattr(item, "name") and item.name:
|
|
440
|
+
tool_name = item.name
|
|
441
|
+
elif hasattr(item, "call") and item.call:
|
|
442
|
+
call = item.call
|
|
443
|
+
if hasattr(call, "function") and call.function:
|
|
444
|
+
if hasattr(call.function, "name"):
|
|
445
|
+
tool_name = call.function.name
|
|
446
|
+
if hasattr(call.function, "arguments"):
|
|
447
|
+
try:
|
|
448
|
+
tool_args = (
|
|
449
|
+
json.loads(call.function.arguments)
|
|
450
|
+
if isinstance(
|
|
451
|
+
call.function.arguments, str
|
|
452
|
+
)
|
|
453
|
+
else call.function.arguments
|
|
454
|
+
)
|
|
455
|
+
except Exception:
|
|
456
|
+
tool_args = {
|
|
457
|
+
"raw": str(call.function.arguments)
|
|
458
|
+
}
|
|
459
|
+
elif hasattr(call, "name"):
|
|
460
|
+
tool_name = call.name
|
|
461
|
+
if hasattr(call, "arguments"):
|
|
462
|
+
try:
|
|
463
|
+
tool_args = (
|
|
464
|
+
json.loads(call.arguments)
|
|
465
|
+
if isinstance(call.arguments, str)
|
|
466
|
+
else call.arguments
|
|
467
|
+
)
|
|
468
|
+
except Exception:
|
|
469
|
+
tool_args = {"raw": str(call.arguments)}
|
|
470
|
+
|
|
471
|
+
logger.info(
|
|
472
|
+
f"[LayeredAgent] Tool call: {tool_name}, args keys: {list(tool_args.keys()) if isinstance(tool_args, dict) else 'not dict'}"
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
current_tool_call = {
|
|
476
|
+
"name": tool_name,
|
|
477
|
+
"args": tool_args,
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
event_data = {
|
|
481
|
+
"type": "tool_call",
|
|
482
|
+
"tool_name": tool_name,
|
|
483
|
+
"tool_args": tool_args,
|
|
484
|
+
}
|
|
485
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
486
|
+
|
|
487
|
+
elif item_type == "tool_call_output_item":
|
|
488
|
+
# Tool call result
|
|
489
|
+
output = getattr(item, "output", "")
|
|
490
|
+
|
|
491
|
+
# Get tool name from current_tool_call or try to extract from item
|
|
492
|
+
tool_name = (
|
|
493
|
+
current_tool_call["name"]
|
|
494
|
+
if current_tool_call
|
|
495
|
+
else "unknown"
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
# Try to get tool name from raw_item if available
|
|
499
|
+
if (
|
|
500
|
+
tool_name == "unknown"
|
|
501
|
+
and hasattr(item, "raw_item")
|
|
502
|
+
and item.raw_item
|
|
503
|
+
):
|
|
504
|
+
if hasattr(item.raw_item, "name"):
|
|
505
|
+
tool_name = item.raw_item.name
|
|
506
|
+
|
|
507
|
+
logger.info(
|
|
508
|
+
f"[LayeredAgent] Tool result for {tool_name}: {str(output)[:100] if output else 'empty'}..."
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
event_data = {
|
|
512
|
+
"type": "tool_result",
|
|
513
|
+
"tool_name": tool_name,
|
|
514
|
+
"result": output,
|
|
515
|
+
}
|
|
516
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
517
|
+
current_tool_call = None
|
|
518
|
+
|
|
519
|
+
elif item_type == "message_output_item":
|
|
520
|
+
# Final message
|
|
521
|
+
content = ""
|
|
522
|
+
if hasattr(item, "raw_item") and item.raw_item:
|
|
523
|
+
if (
|
|
524
|
+
hasattr(item.raw_item, "content")
|
|
525
|
+
and item.raw_item.content
|
|
526
|
+
):
|
|
527
|
+
for c in item.raw_item.content:
|
|
528
|
+
if hasattr(c, "text"):
|
|
529
|
+
content += c.text
|
|
530
|
+
|
|
531
|
+
if content:
|
|
532
|
+
event_data = {
|
|
533
|
+
"type": "message",
|
|
534
|
+
"content": content,
|
|
535
|
+
}
|
|
536
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
537
|
+
|
|
538
|
+
# Final result
|
|
539
|
+
final_output = (
|
|
540
|
+
result.final_output if hasattr(result, "final_output") else ""
|
|
541
|
+
)
|
|
542
|
+
event_data = {
|
|
543
|
+
"type": "done",
|
|
544
|
+
"content": final_output,
|
|
545
|
+
"success": True,
|
|
546
|
+
}
|
|
547
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
548
|
+
|
|
549
|
+
except Exception as e:
|
|
550
|
+
logger.exception(f"[LayeredAgent] Error: {e}")
|
|
551
|
+
event_data = {
|
|
552
|
+
"type": "error",
|
|
553
|
+
"message": str(e),
|
|
554
|
+
}
|
|
555
|
+
yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
|
|
556
|
+
|
|
557
|
+
return StreamingResponse(
|
|
558
|
+
event_generator(),
|
|
559
|
+
media_type="text/event-stream",
|
|
560
|
+
headers={
|
|
561
|
+
"Cache-Control": "no-cache",
|
|
562
|
+
"Connection": "keep-alive",
|
|
563
|
+
"X-Accel-Buffering": "no",
|
|
564
|
+
},
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
class ResetSessionRequest(BaseModel):
|
|
569
|
+
"""Request for resetting a session."""
|
|
570
|
+
|
|
571
|
+
session_id: str
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
@router.post("/api/layered-agent/reset")
|
|
575
|
+
def reset_session(request: ResetSessionRequest):
|
|
576
|
+
"""
|
|
577
|
+
Reset/clear a session to forget conversation history.
|
|
578
|
+
|
|
579
|
+
This should be called when the user clicks "reset" button
|
|
580
|
+
or refreshes the page.
|
|
581
|
+
"""
|
|
582
|
+
cleared = _clear_session(request.session_id)
|
|
583
|
+
return {
|
|
584
|
+
"success": True,
|
|
585
|
+
"message": f"Session {request.session_id} {'cleared' if cleared else 'not found (already empty)'}",
|
|
586
|
+
}
|