myagent-ai 1.13.6 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/__pycache__/main_agent.cpython-312.pyc +0 -0
- package/agents/main_agent.py +5 -5
- package/core/__pycache__/context_builder.cpython-312.pyc +0 -0
- package/core/__pycache__/deps_checker.cpython-312.pyc +0 -0
- package/core/context_builder.py +9 -2
- package/core/deps_checker.py +6 -64
- package/main.py +2 -54
- package/package.json +1 -1
- package/requirements.txt +3 -2
- package/setup.py +3 -2
- package/web/__pycache__/api_server.cpython-312.pyc +0 -0
- package/web/api_server.py +157 -9
- package/web/ui/chat/chat_main.js +193 -219
- package/web/ui/chat/flow_engine.js +81 -12
- package/web/ui/chat/middle_chat.html +1 -1
|
Binary file
|
package/agents/main_agent.py
CHANGED
|
@@ -42,8 +42,8 @@ class MainAgent(BaseAgent):
|
|
|
42
42
|
严格以XML格式化输出以下内容:
|
|
43
43
|
<output>
|
|
44
44
|
<response>直接回复用户的内容。这是一段友好、自然的话语,用于向用户说明你正在做什么,或者回应用户的问题/问候。要求简洁、有礼貌、符合对话场景。如果用户只是问候,简单回应即可;如果用户有具体任务,要说明你的计划。</response>
|
|
45
|
-
<usersays_correct>根据用户输入的"usersays"
|
|
46
|
-
<task_plan>任务计划(仅复杂任务使用):如"context"包含非空"task_plan",则更新它。否则,先评估任务复杂度——如果预计操作步骤不超过
|
|
45
|
+
<usersays_correct>根据用户输入的"usersays"内容(语音转写文本),结合对话语境优化为更准确的用户意图表达(修正识别错误、补充标点、口语转书面语)。如果"usersays"为空,这里输出为空。</usersays_correct>
|
|
46
|
+
<task_plan>任务计划(仅复杂任务使用):如"context"包含非空"task_plan",则更新它。否则,先评估任务复杂度——如果预计操作步骤不超过3步(如:单次查询、简单问答、格式转换、单文件修改、简单计算等简单任务),则<task_plan>输出为空,不要创建任务列表;只有当任务较复杂(预计超过3步操作,如:多文件修改、需要调研+实现+测试、涉及多个模块联动等),才以Markdown列表格式制定新任务列表。格式:每项用 "- [ ] 任务描述" 或 "- [x] 已完成任务",含完成状态标记。</task_plan>
|
|
47
47
|
|
|
48
48
|
<toolstocal>
|
|
49
49
|
<tool><beforecalltext>连接词,介绍调用什么工具,达到什么目的。</beforecalltext><toolname>工具名</toolname><parms>JSON格式的参数对象,例如: {"query": "搜索关键词", "num": 5}</parms><timeout>预估超时时限(秒)</timeout><callback>true/false,要求解析器在该工具执行完后是否要回调llm大模型,将所有工具输出结果+新构造的"context"输入给llm</callback></tool>
|
|
@@ -62,8 +62,8 @@ class MainAgent(BaseAgent):
|
|
|
62
62
|
## 核心规则
|
|
63
63
|
1. 你必须且只能输出 <output> XML 结构,不要输出任何其他文本
|
|
64
64
|
2. <response>: 必须输出一段直接回复用户的话语(这是用户实际看到的回复),要求简洁友好、自然流畅。不要只输出任务计划而不说话!
|
|
65
|
-
3. <usersays_correct>: 如果 context 中 usersays
|
|
66
|
-
4. <task_plan>: 仅用于复杂任务(预计超过
|
|
65
|
+
3. <usersays_correct>: 如果 context 中 usersays 非空(说明用户通过语音输入),则根据对话语境将语音转写文本优化为更准确的用户意图表达,修正识别错误、补充标点、口语转书面语。如果 usersays 为空,这里输出为空。
|
|
66
|
+
4. <task_plan>: 仅用于复杂任务(预计超过3步操作)。简单任务(≤3步)输出为空。复杂任务使用 Markdown 列表格式,每项包含任务描述和完成状态标记 [x]/[ ]
|
|
67
67
|
5. <toolstocal>: 列出所有需要执行的工具调用,每个工具包含完整的参数说明
|
|
68
68
|
6. <parms>: **必须使用严格合法的JSON格式**,例如 {"query": "关键词", "num": 10},不要使用其他格式
|
|
69
69
|
7. <timeout>: 预估超时秒数(简单操作10-30s,文件操作30-60s,网络请求60-120s,数据处理120-300s)
|
|
@@ -514,7 +514,7 @@ class MainAgent(BaseAgent):
|
|
|
514
514
|
session_id=context.session_id,
|
|
515
515
|
conversation_history=conversation_history,
|
|
516
516
|
user_typed_text=context.user_message,
|
|
517
|
-
user_voice_text="",
|
|
517
|
+
user_voice_text=context.metadata.get("user_voice_text", ""),
|
|
518
518
|
task_plan=current_task_plan,
|
|
519
519
|
agent_override_prompt=agent_override_prompt,
|
|
520
520
|
get_knowledge=get_knowledge_content,
|
|
Binary file
|
|
Binary file
|
package/core/context_builder.py
CHANGED
|
@@ -605,8 +605,15 @@ class ContextBuilder:
|
|
|
605
605
|
Returns:
|
|
606
606
|
<userprint> 和 <usersays> XML 段落字符串
|
|
607
607
|
"""
|
|
608
|
-
|
|
609
|
-
|
|
608
|
+
# 语音输入时:userprint 为空,usersays 存原始语音文本
|
|
609
|
+
# 键盘输入时:userprint 存文本,usersays 为空
|
|
610
|
+
# 两者互斥
|
|
611
|
+
if user_voice_text and user_voice_text.strip():
|
|
612
|
+
safe_typed = ""
|
|
613
|
+
safe_voice = _xml_escape(user_voice_text.strip())
|
|
614
|
+
else:
|
|
615
|
+
safe_typed = _xml_escape(user_typed_text.strip()) if user_typed_text else ""
|
|
616
|
+
safe_voice = ""
|
|
610
617
|
|
|
611
618
|
lines = [
|
|
612
619
|
f"<userprint>",
|
package/core/deps_checker.py
CHANGED
|
@@ -17,7 +17,6 @@ core/deps_checker.py - 自动依赖检测与安装
|
|
|
17
17
|
托盘功能: pystray, PIL
|
|
18
18
|
语音合成: edge_tts
|
|
19
19
|
浏览器自动化: chrome-devtools-mcp (Node.js, Chrome DevTools Protocol)
|
|
20
|
-
(备用) playwright (+ chromium 浏览器二进制)
|
|
21
20
|
桌面GUI自动化: mss, pynput, pygetwindow
|
|
22
21
|
"""
|
|
23
22
|
from __future__ import annotations
|
|
@@ -76,9 +75,9 @@ DEPENDENCIES: List[DepInfo] = [
|
|
|
76
75
|
# ── 语音合成 ──
|
|
77
76
|
DepInfo("edge_tts", "edge-tts", "6.1.0", "tts", "all"),
|
|
78
77
|
|
|
79
|
-
# ── 浏览器自动化 (ChromeDev MCP
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
# ── 浏览器自动化 (ChromeDev MCP) ──
|
|
79
|
+
# Playwright 已移除,浏览器自动化统一使用 ChromeDevTools Protocol (MCP)
|
|
80
|
+
# 需要 Node.js >= 20.19,参见 skills/chromedev_mcp.py
|
|
82
81
|
|
|
83
82
|
# ── 桌面 GUI 自动化 ──
|
|
84
83
|
DepInfo("mss", "mss", "9.0.0", "gui", "all",
|
|
@@ -209,33 +208,6 @@ def _pip_install(pip_names: List[str], category: str = "") -> Tuple[bool, str]:
|
|
|
209
208
|
return False, f"安装失败: {packages} - {last_error}"
|
|
210
209
|
|
|
211
210
|
|
|
212
|
-
def _install_playwright_browsers() -> Tuple[bool, str]:
|
|
213
|
-
"""
|
|
214
|
-
安装 Playwright 浏览器二进制文件(Chromium)。
|
|
215
|
-
这是一个独立的步骤,因为 pip install playwright 只安装 Python 包,
|
|
216
|
-
浏览器二进制需要单独下载。
|
|
217
|
-
"""
|
|
218
|
-
python = _get_python_executable()
|
|
219
|
-
try:
|
|
220
|
-
result = subprocess.run(
|
|
221
|
-
[python, "-m", "playwright", "install", "chromium"],
|
|
222
|
-
capture_output=True,
|
|
223
|
-
timeout=300, # 5分钟超时(浏览器较大)
|
|
224
|
-
)
|
|
225
|
-
stdout = result.stdout.decode("utf-8", errors="replace") if result.stdout else ""
|
|
226
|
-
stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else ""
|
|
227
|
-
if result.returncode == 0:
|
|
228
|
-
return True, "Chromium 浏览器已安装"
|
|
229
|
-
# 检查是否已经安装
|
|
230
|
-
if "Chromium" in stdout and "already" in stdout.lower():
|
|
231
|
-
return True, "Chromium 浏览器已就绪"
|
|
232
|
-
return False, f"Chromium 安装失败: {stderr[:200]}"
|
|
233
|
-
except subprocess.TimeoutExpired:
|
|
234
|
-
return False, "Chromium 安装超时(5分钟),请手动运行: playwright install chromium"
|
|
235
|
-
except Exception as e:
|
|
236
|
-
return False, f"Chromium 安装异常: {e}"
|
|
237
|
-
|
|
238
|
-
|
|
239
211
|
def _check_version(import_name: str, min_version: str) -> bool:
|
|
240
212
|
"""检查模块版本是否满足最低要求"""
|
|
241
213
|
if not min_version:
|
|
@@ -279,7 +251,7 @@ def check_and_install_deps(
|
|
|
279
251
|
"failed": int, # 安装失败的数量
|
|
280
252
|
"skipped_platform": int, # 因平台不匹配而跳过的数量
|
|
281
253
|
"details": {...}, # 每个依赖的状态
|
|
282
|
-
"
|
|
254
|
+
"browser": str, # ChromeDev MCP 状态
|
|
283
255
|
}
|
|
284
256
|
"""
|
|
285
257
|
stats = {
|
|
@@ -290,7 +262,7 @@ def check_and_install_deps(
|
|
|
290
262
|
"failed": 0,
|
|
291
263
|
"skipped_platform": 0,
|
|
292
264
|
"details": {},
|
|
293
|
-
"
|
|
265
|
+
"browser": "not_checked",
|
|
294
266
|
}
|
|
295
267
|
|
|
296
268
|
# 按分类收集缺失的依赖,批量安装以减少 pip 调用次数
|
|
@@ -367,36 +339,6 @@ def check_and_install_deps(
|
|
|
367
339
|
if not silent:
|
|
368
340
|
logger.warning(f" ✗ 安装失败: {dep.import_name} - {message}")
|
|
369
341
|
|
|
370
|
-
# 第三遍:如果 playwright 安装成功,还需要安装 Chromium 浏览器二进制
|
|
371
|
-
playwright_dep = next((d for d in DEPENDENCIES if d.import_name == "playwright"), None)
|
|
372
|
-
if playwright_dep and stats["details"].get("playwright", {}).get("status") == "installed":
|
|
373
|
-
if not silent:
|
|
374
|
-
logger.info("正在安装 Chromium 浏览器二进制...")
|
|
375
|
-
success, message = _install_playwright_browsers()
|
|
376
|
-
stats["playwright_browser"] = "installed" if success else "failed"
|
|
377
|
-
if not silent:
|
|
378
|
-
if success:
|
|
379
|
-
logger.info(f" ✓ {message}")
|
|
380
|
-
else:
|
|
381
|
-
logger.warning(f" ✗ {message}")
|
|
382
|
-
elif playwright_dep and stats["details"].get("playwright", {}).get("status") == "available":
|
|
383
|
-
# playwright 已安装,检查 chromium 是否已安装
|
|
384
|
-
try:
|
|
385
|
-
result = subprocess.run(
|
|
386
|
-
[_get_python_executable(), "-m", "playwright", "install", "--dry-run", "chromium"],
|
|
387
|
-
capture_output=True, timeout=10,
|
|
388
|
-
)
|
|
389
|
-
if result.returncode == 0:
|
|
390
|
-
stats["playwright_browser"] = "ready"
|
|
391
|
-
else:
|
|
392
|
-
# 尝试安装
|
|
393
|
-
if not silent:
|
|
394
|
-
logger.info("正在安装 Chromium 浏览器二进制...")
|
|
395
|
-
success, message = _install_playwright_browsers()
|
|
396
|
-
stats["playwright_browser"] = "installed" if success else "failed"
|
|
397
|
-
except Exception:
|
|
398
|
-
stats["playwright_browser"] = "unknown"
|
|
399
|
-
|
|
400
342
|
# 汇总日志
|
|
401
343
|
if not silent and (total_installed > 0 or total_failed > 0):
|
|
402
344
|
logger.info(
|
|
@@ -414,7 +356,7 @@ def ensure_skill_deps(skill_category: str) -> bool:
|
|
|
414
356
|
|
|
415
357
|
Args:
|
|
416
358
|
skill_category: 技能分类名称
|
|
417
|
-
"browser" - 浏览器自动化 (chrome-devtools-mcp
|
|
359
|
+
"browser" - 浏览器自动化 (chrome-devtools-mcp)
|
|
418
360
|
"gui" - 桌面GUI自动化 (mss, pynput, pygetwindow)
|
|
419
361
|
"search" - 搜索技能
|
|
420
362
|
"tts" - 语音合成
|
package/main.py
CHANGED
|
@@ -73,55 +73,7 @@ def _get_screen_resolution() -> tuple[int, int]:
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
def _open_browser_kiosk(url: str):
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
优先使用 Playwright 打开 app 模式的 Chromium 窗口(无地址栏),
|
|
79
|
-
如果 Playwright 不可用则回退到 webbrowser.open()。
|
|
80
|
-
"""
|
|
81
|
-
try:
|
|
82
|
-
import asyncio
|
|
83
|
-
from playwright.async_api import async_playwright
|
|
84
|
-
|
|
85
|
-
async def _launch():
|
|
86
|
-
pw = await async_playwright().start()
|
|
87
|
-
# 获取实际屏幕分辨率,确保窗口占满屏幕
|
|
88
|
-
# --app 模式下 --start-maximized 不生效,需要手动设置窗口大小
|
|
89
|
-
screen_width, screen_height = _get_screen_resolution()
|
|
90
|
-
browser = await pw.chromium.launch(
|
|
91
|
-
headless=False,
|
|
92
|
-
args=[
|
|
93
|
-
"--no-sandbox",
|
|
94
|
-
f"--app={url}", # app 模式: 无地址栏、无标签页
|
|
95
|
-
f"--window-size={screen_width},{screen_height}",
|
|
96
|
-
"--window-position=0,0",
|
|
97
|
-
],
|
|
98
|
-
)
|
|
99
|
-
# 保持进程运行,浏览器关闭后退出
|
|
100
|
-
page = await browser.new_page()
|
|
101
|
-
await page.goto(url)
|
|
102
|
-
# 监听页面关闭
|
|
103
|
-
while True:
|
|
104
|
-
try:
|
|
105
|
-
await asyncio.sleep(2)
|
|
106
|
-
if not browser.is_connected():
|
|
107
|
-
break
|
|
108
|
-
except Exception:
|
|
109
|
-
break
|
|
110
|
-
try:
|
|
111
|
-
await pw.stop()
|
|
112
|
-
except Exception:
|
|
113
|
-
pass
|
|
114
|
-
|
|
115
|
-
# 在后台线程中运行,避免阻塞主线程
|
|
116
|
-
thread = threading.Thread(target=lambda: asyncio.run(_launch()), daemon=True)
|
|
117
|
-
thread.start()
|
|
118
|
-
return
|
|
119
|
-
except ImportError:
|
|
120
|
-
pass
|
|
121
|
-
except Exception:
|
|
122
|
-
pass
|
|
123
|
-
|
|
124
|
-
# 回退到系统浏览器
|
|
76
|
+
"""打开浏览器窗口,使用系统默认浏览器。"""
|
|
125
77
|
import webbrowser
|
|
126
78
|
webbrowser.open(url)
|
|
127
79
|
|
|
@@ -212,10 +164,6 @@ class MyAgentApp:
|
|
|
212
164
|
self.logger.warning(
|
|
213
165
|
f"{deps_result['failed']} 个依赖安装失败,相关功能可能不可用"
|
|
214
166
|
)
|
|
215
|
-
if deps_result["playwright_browser"] in ("installed", "ready"):
|
|
216
|
-
self.logger.info("Chromium 浏览器已就绪")
|
|
217
|
-
elif deps_result["playwright_browser"] == "failed":
|
|
218
|
-
self.logger.warning("Chromium 浏览器安装失败,浏览器自动化技能可能不可用")
|
|
219
167
|
|
|
220
168
|
# 2. LLM 客户端
|
|
221
169
|
llm_cfg = self.config.llm
|
|
@@ -431,7 +379,7 @@ class MyAgentApp:
|
|
|
431
379
|
]:
|
|
432
380
|
self.skill_registry.register(skill_cls())
|
|
433
381
|
|
|
434
|
-
# 浏览器自动化技能 (
|
|
382
|
+
# 浏览器自动化技能 (ChromeDev MCP)
|
|
435
383
|
for skill_cls in [
|
|
436
384
|
BrowserOpenSkill, BrowserClickSkill, BrowserFillSkill,
|
|
437
385
|
BrowserScreenshotSkill, BrowserEvalSkill, BrowserNavigateSkill, BrowserCloseSkill,
|
package/package.json
CHANGED
package/requirements.txt
CHANGED
|
@@ -17,9 +17,10 @@ lxml>=5.0.0
|
|
|
17
17
|
psutil>=5.9.0
|
|
18
18
|
|
|
19
19
|
# ============================================================
|
|
20
|
-
# 技能系统 - 浏览器自动化 (
|
|
20
|
+
# 技能系统 - 浏览器自动化 (ChromeDev MCP)
|
|
21
21
|
# ============================================================
|
|
22
|
-
|
|
22
|
+
# 浏览器自动化使用 ChromeDevTools Protocol (MCP),无需 Playwright
|
|
23
|
+
# 需要 Node.js >= 20.19,参见 skills/chromedev_mcp.py
|
|
23
24
|
|
|
24
25
|
# ============================================================
|
|
25
26
|
# 技能系统 - 桌面 GUI 自动化 (可选,无头服务器不需要)
|
package/setup.py
CHANGED
|
@@ -37,8 +37,7 @@ setup(
|
|
|
37
37
|
"Pillow>=10.0.0",
|
|
38
38
|
# 语音合成
|
|
39
39
|
"edge-tts>=6.1.0",
|
|
40
|
-
# 浏览器自动化 (
|
|
41
|
-
"playwright>=1.41.0",
|
|
40
|
+
# 浏览器自动化 (ChromeDev MCP, 无需 Playwright)
|
|
42
41
|
# 桌面 GUI 自动化 (内置技能)
|
|
43
42
|
"pynput>=1.7.6",
|
|
44
43
|
"pygetwindow>=0.0.9",
|
|
@@ -49,12 +48,14 @@ setup(
|
|
|
49
48
|
"discord": ["discord.py>=2.3.0"],
|
|
50
49
|
"anthropic": ["anthropic>=0.18.0"],
|
|
51
50
|
"communication": ["cryptography>=41.0.0", "websockets>=12.0"],
|
|
51
|
+
"voice": ["faster-whisper>=1.0.0"],
|
|
52
52
|
"all": [
|
|
53
53
|
"python-telegram-bot>=21.0",
|
|
54
54
|
"discord.py>=2.3.0",
|
|
55
55
|
"anthropic>=0.18.0",
|
|
56
56
|
"cryptography>=41.0.0",
|
|
57
57
|
"websockets>=12.0",
|
|
58
|
+
"faster-whisper>=1.0.0",
|
|
58
59
|
],
|
|
59
60
|
},
|
|
60
61
|
entry_points={
|
|
Binary file
|
package/web/api_server.py
CHANGED
|
@@ -313,6 +313,7 @@ class ApiServer:
|
|
|
313
313
|
r.add_post("/api/chat/stream", self.handle_chat_stream)
|
|
314
314
|
r.add_post("/api/chat/inject", self.handle_chat_inject)
|
|
315
315
|
r.add_post("/api/voice-optimize", self.handle_voice_optimize)
|
|
316
|
+
r.add_post("/api/voice-stt", self.handle_voice_stt)
|
|
316
317
|
r.add_get("/chat", self.handle_chat_page)
|
|
317
318
|
r.add_get("/api/execution/progress", self.handle_execution_progress)
|
|
318
319
|
# ── 组织管理 ──
|
|
@@ -614,6 +615,7 @@ class ApiServer:
|
|
|
614
615
|
session_id = f"{agent_path}_{raw_session_id}"
|
|
615
616
|
chat_mode = data.get("mode", "")
|
|
616
617
|
escalated = data.get("escalated", False)
|
|
618
|
+
voice_text = data.get("voice_text", "").strip() # 语音转文字原始文本(用于 usersays_correct)
|
|
617
619
|
|
|
618
620
|
# ── 检查是否有正在运行的同一会话任务 ──
|
|
619
621
|
running_info = self._running_sessions.get(session_id)
|
|
@@ -718,12 +720,13 @@ class ApiServer:
|
|
|
718
720
|
model_chain, clean_message, session_id,
|
|
719
721
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt,
|
|
720
722
|
chat_mode=chat_mode, stream_response=proxy,
|
|
723
|
+
voice_text=voice_text,
|
|
721
724
|
)
|
|
722
725
|
elif self.core.main_agent and self.core.llm:
|
|
723
726
|
full_response = await self._stream_process_message(
|
|
724
727
|
clean_message, session_id, proxy,
|
|
725
728
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt,
|
|
726
|
-
chat_mode=chat_mode,
|
|
729
|
+
chat_mode=chat_mode, voice_text=voice_text,
|
|
727
730
|
)
|
|
728
731
|
else:
|
|
729
732
|
full_response = await self.core.process_message(clean_message, session_id)
|
|
@@ -768,12 +771,13 @@ class ApiServer:
|
|
|
768
771
|
model_chain, clean_message_q, session_id,
|
|
769
772
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt_q,
|
|
770
773
|
chat_mode=chat_mode, stream_response=proxy,
|
|
774
|
+
voice_text="",
|
|
771
775
|
)
|
|
772
776
|
elif self.core.main_agent and self.core.llm:
|
|
773
777
|
full_response = await self._stream_process_message(
|
|
774
778
|
clean_message_q, session_id, proxy,
|
|
775
779
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt_q,
|
|
776
|
-
chat_mode=chat_mode,
|
|
780
|
+
chat_mode=chat_mode, voice_text="",
|
|
777
781
|
)
|
|
778
782
|
else:
|
|
779
783
|
full_response = await self.core.process_message(clean_message_q, session_id)
|
|
@@ -1026,6 +1030,145 @@ class ApiServer:
|
|
|
1026
1030
|
logger.error(f"Voice optimize failed: {e}")
|
|
1027
1031
|
return web.json_response({"error": str(e)}, status=500)
|
|
1028
1032
|
|
|
1033
|
+
async def handle_voice_stt(self, request):
|
|
1034
|
+
"""POST /api/voice-stt - 轻量级本地语音转文字
|
|
1035
|
+
|
|
1036
|
+
接受音频文件(WAV/WEBM/OGG),使用本地 STT 引擎转录。
|
|
1037
|
+
支持的引擎(按优先级):
|
|
1038
|
+
1. faster-whisper(推荐,需安装:pip install faster-whisper)
|
|
1039
|
+
2. vosk(备选,需安装:pip install vosk)
|
|
1040
|
+
如果都未安装,返回错误提示。
|
|
1041
|
+
"""
|
|
1042
|
+
try:
|
|
1043
|
+
reader = await request.multipart()
|
|
1044
|
+
audio_data = None
|
|
1045
|
+
audio_format = None
|
|
1046
|
+
|
|
1047
|
+
while True:
|
|
1048
|
+
field = await reader.next()
|
|
1049
|
+
if field is None:
|
|
1050
|
+
break
|
|
1051
|
+
if field.name == 'audio':
|
|
1052
|
+
audio_data = await field.read(decode=False)
|
|
1053
|
+
elif field.name == 'format':
|
|
1054
|
+
audio_format = (await field.read(decode=True)).decode('utf-8').strip()
|
|
1055
|
+
|
|
1056
|
+
if not audio_data:
|
|
1057
|
+
# 也支持 JSON body:{"audio": "base64...", "format": "wav"}
|
|
1058
|
+
try:
|
|
1059
|
+
data = await request.json()
|
|
1060
|
+
audio_b64 = data.get("audio", "")
|
|
1061
|
+
audio_format = data.get("format", "wav")
|
|
1062
|
+
if audio_b64:
|
|
1063
|
+
import base64
|
|
1064
|
+
audio_data = base64.b64decode(audio_b64)
|
|
1065
|
+
except Exception:
|
|
1066
|
+
pass
|
|
1067
|
+
|
|
1068
|
+
if not audio_data:
|
|
1069
|
+
return web.json_response({"error": "未收到音频数据"}, status=400)
|
|
1070
|
+
|
|
1071
|
+
import io
|
|
1072
|
+
|
|
1073
|
+
# ── 尝试 faster-whisper ──
|
|
1074
|
+
try:
|
|
1075
|
+
from faster_whisper import WhisperModel
|
|
1076
|
+
whisper_model = getattr(self, '_whisper_model', None)
|
|
1077
|
+
if whisper_model is None:
|
|
1078
|
+
import os
|
|
1079
|
+
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'whisper')
|
|
1080
|
+
# 使用 tiny 模型(最轻量,~39MB),CPU int8 量化
|
|
1081
|
+
self._whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8",
|
|
1082
|
+
download_root=model_dir)
|
|
1083
|
+
whisper_model = self._whisper_model
|
|
1084
|
+
logger.info("faster-whisper tiny 模型已加载 (CPU int8)")
|
|
1085
|
+
|
|
1086
|
+
# faster-whisper 需要 16kHz WAV
|
|
1087
|
+
import wave
|
|
1088
|
+
audio_buf = io.BytesIO(audio_data)
|
|
1089
|
+
|
|
1090
|
+
# 转换为 WAV 16kHz mono
|
|
1091
|
+
wav_buf = io.BytesIO()
|
|
1092
|
+
with wave.open(wav_buf, 'wb') as wf:
|
|
1093
|
+
# 尝试读取原始音频
|
|
1094
|
+
try:
|
|
1095
|
+
with wave.open(audio_buf, 'rb') as rf:
|
|
1096
|
+
wf.setnchannels(1)
|
|
1097
|
+
wf.setsampwidth(2)
|
|
1098
|
+
wf.setframerate(16000)
|
|
1099
|
+
# 读取所有帧并重采样
|
|
1100
|
+
frames = rf.readframes(rf.getnframes())
|
|
1101
|
+
wf.writeframes(frames)
|
|
1102
|
+
except Exception:
|
|
1103
|
+
# 非 WAV 格式,尝试通过 pydub 或直接写入
|
|
1104
|
+
wf.setnchannels(1)
|
|
1105
|
+
wf.setsampwidth(2)
|
|
1106
|
+
wf.setframerate(16000)
|
|
1107
|
+
wf.writeframes(audio_data)
|
|
1108
|
+
|
|
1109
|
+
wav_buf.seek(0)
|
|
1110
|
+
segments, info = whisper_model.transcribe(wav_buf, language="zh", beam_size=1,
|
|
1111
|
+
vad_filter=True, vad_parameters=dict(
|
|
1112
|
+
min_silence_duration_ms=300))
|
|
1113
|
+
text = "".join(seg.text for seg in segments).strip()
|
|
1114
|
+
|
|
1115
|
+
if text:
|
|
1116
|
+
return web.json_response({"text": text, "engine": "faster-whisper"})
|
|
1117
|
+
except ImportError:
|
|
1118
|
+
logger.debug("faster-whisper 未安装,跳过")
|
|
1119
|
+
except Exception as e:
|
|
1120
|
+
logger.warning(f"faster-whisper 转录失败: {e}")
|
|
1121
|
+
|
|
1122
|
+
# ── 尝试 vosk ──
|
|
1123
|
+
try:
|
|
1124
|
+
import vosk
|
|
1125
|
+
model = getattr(self, '_vosk_model', None)
|
|
1126
|
+
if model is None:
|
|
1127
|
+
import os, zipfile
|
|
1128
|
+
model_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models', 'vosk', 'vosk-model-cn')
|
|
1129
|
+
if not os.path.exists(model_dir):
|
|
1130
|
+
# 自动下载 vosk 小型中文模型
|
|
1131
|
+
logger.info("正在下载 vosk 中文模型...")
|
|
1132
|
+
import urllib.request
|
|
1133
|
+
url = "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip"
|
|
1134
|
+
zip_path = model_dir + ".zip"
|
|
1135
|
+
os.makedirs(os.path.dirname(model_dir), exist_ok=True)
|
|
1136
|
+
try:
|
|
1137
|
+
urllib.request.urlretrieve(url, zip_path)
|
|
1138
|
+
with zipfile.ZipFile(zip_path, 'r') as zf:
|
|
1139
|
+
zf.extractall(os.path.dirname(model_dir))
|
|
1140
|
+
os.remove(zip_path)
|
|
1141
|
+
except Exception as de:
|
|
1142
|
+
logger.warning(f"vosk 模型下载失败: {de}")
|
|
1143
|
+
if os.path.exists(model_dir):
|
|
1144
|
+
self._vosk_model = vosk.Model(model_dir)
|
|
1145
|
+
model = self._vosk_model
|
|
1146
|
+
|
|
1147
|
+
if model:
|
|
1148
|
+
import json as _json
|
|
1149
|
+
rec = vosk.KaldiRecognizer(model, 16000)
|
|
1150
|
+
rec.AcceptWaveform(audio_data)
|
|
1151
|
+
result = _json.loads(rec.Result())
|
|
1152
|
+
text = result.get("text", "").strip()
|
|
1153
|
+
if text:
|
|
1154
|
+
return web.json_response({"text": text, "engine": "vosk"})
|
|
1155
|
+
except ImportError:
|
|
1156
|
+
logger.debug("vosk 未安装,跳过")
|
|
1157
|
+
except Exception as e:
|
|
1158
|
+
logger.warning(f"vosk 转录失败: {e}")
|
|
1159
|
+
|
|
1160
|
+
# ── 没有可用的 STT 引擎 ──
|
|
1161
|
+
return web.json_response({
|
|
1162
|
+
"error": "未检测到本地 STT 引擎。请安装 faster-whisper(推荐)或 vosk:\n"
|
|
1163
|
+
" pip install faster-whisper (首次使用会自动下载 tiny 模型 ~39MB)\n"
|
|
1164
|
+
" 或 pip install vosk",
|
|
1165
|
+
"available": False,
|
|
1166
|
+
}, status=503)
|
|
1167
|
+
|
|
1168
|
+
except Exception as e:
|
|
1169
|
+
logger.error(f"Voice STT failed: {e}", exc_info=True)
|
|
1170
|
+
return web.json_response({"error": str(e)}, status=500)
|
|
1171
|
+
|
|
1029
1172
|
def _build_task_plan_context(self, agent_path: str, chat_mode: str, original_message: str, session_id: str = "") -> str:
|
|
1030
1173
|
"""构建任务规划上下文(仅 exec 模式,注入到 system_prompt 中)"""
|
|
1031
1174
|
if chat_mode != "exec":
|
|
@@ -1035,8 +1178,8 @@ class ApiServer:
|
|
|
1035
1178
|
"你当前处于【执行模式】(Execution Mode)。\n\n"
|
|
1036
1179
|
"## 核心规则\n"
|
|
1037
1180
|
"1. **任务列表(按复杂度决定)**:\n"
|
|
1038
|
-
" - 如果用户的需求是简单任务(预计操作步骤不超过
|
|
1039
|
-
" - 只有当任务较复杂(预计需要超过
|
|
1181
|
+
" - 如果用户的需求是简单任务(预计操作步骤不超过3步,如:单次查询、简单计算、问答题、格式转换、文件读取等),【不要】使用 ```tasklist```,直接用纯文本回复并执行即可。\n"
|
|
1182
|
+
" - 只有当任务较复杂(预计需要超过3步操作,如:多文件修改、需要调研+实现+测试、涉及多个模块联动等),才使用 ```tasklist``` 代码块来跟踪进度。\n"
|
|
1040
1183
|
" - 格式:```tasklist\\n[{\"text\": \"步骤描述\", \"status\": \"pending\"}]\\n```\n"
|
|
1041
1184
|
" - status 可选值:pending(待执行)、running(进行中)、done(已完成)、blocked(受阻)\n"
|
|
1042
1185
|
" - 首次收到复杂任务时,拆分为多个步骤,全部标记为 pending\n"
|
|
@@ -1052,7 +1195,7 @@ class ApiServer:
|
|
|
1052
1195
|
store_key = session_id or agent_path
|
|
1053
1196
|
tasks = self._task_list_store.get(store_key, [])
|
|
1054
1197
|
if not tasks:
|
|
1055
|
-
return base_instruction + "\n## 当前状态\n暂无任务计划。如果是简单任务(不超过
|
|
1198
|
+
return base_instruction + "\n## 当前状态\n暂无任务计划。如果是简单任务(不超过3步),直接执行即可,无需创建任务列表。如果是复杂任务(超过3步),请先分析用户需求,拆分为具体步骤,然后用 ```tasklist``` 输出计划。"
|
|
1056
1199
|
|
|
1057
1200
|
pending = [f" - ⏳ {t['text']}" for t in tasks if t.get("status") in ("pending", "running", "blocked")]
|
|
1058
1201
|
done = [f" - ✅ {t['text']}" for t in tasks if t.get("status") == "done"]
|
|
@@ -3168,7 +3311,8 @@ class ApiServer:
|
|
|
3168
3311
|
|
|
3169
3312
|
async def _try_model_chain_stream(self, model_chain, message, session_id,
|
|
3170
3313
|
agent_path=None, agent_system_prompt=None,
|
|
3171
|
-
chat_mode="", stream_response=None
|
|
3314
|
+
chat_mode="", stream_response=None,
|
|
3315
|
+
voice_text=""):
|
|
3172
3316
|
"""流式版本的模型链调用,逐token输出到SSE
|
|
3173
3317
|
|
|
3174
3318
|
使用 asyncio.Lock 保护共享的 self.core.llm,防止并发请求互相干扰。
|
|
@@ -3183,11 +3327,13 @@ class ApiServer:
|
|
|
3183
3327
|
model_chain, message, session_id,
|
|
3184
3328
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt,
|
|
3185
3329
|
chat_mode=chat_mode, stream_response=stream_response,
|
|
3330
|
+
voice_text=voice_text,
|
|
3186
3331
|
)
|
|
3187
3332
|
|
|
3188
3333
|
async def _try_model_chain_stream_inner(self, model_chain, message, session_id,
|
|
3189
3334
|
agent_path=None, agent_system_prompt=None,
|
|
3190
|
-
chat_mode="", stream_response=None
|
|
3335
|
+
chat_mode="", stream_response=None,
|
|
3336
|
+
voice_text=""):
|
|
3191
3337
|
"""_try_model_chain_stream 的实际执行体(已在 _model_chain_lock 保护下)"""
|
|
3192
3338
|
llm = self.core.llm
|
|
3193
3339
|
full_text = ""
|
|
@@ -3212,7 +3358,7 @@ class ApiServer:
|
|
|
3212
3358
|
result = await self._stream_process_message(
|
|
3213
3359
|
message, session_id, stream_response,
|
|
3214
3360
|
agent_path=agent_path, agent_system_prompt=agent_system_prompt,
|
|
3215
|
-
chat_mode=chat_mode,
|
|
3361
|
+
chat_mode=chat_mode, voice_text=voice_text,
|
|
3216
3362
|
)
|
|
3217
3363
|
if result and not result.startswith("⚠️") and not result.startswith("❌"):
|
|
3218
3364
|
return result
|
|
@@ -3243,7 +3389,8 @@ class ApiServer:
|
|
|
3243
3389
|
await asyncio.sleep(delay)
|
|
3244
3390
|
|
|
3245
3391
|
async def _stream_process_message(self, user_message, session_id, stream_response,
|
|
3246
|
-
agent_path=None, agent_system_prompt=None, chat_mode=""
|
|
3392
|
+
agent_path=None, agent_system_prompt=None, chat_mode="",
|
|
3393
|
+
voice_text=""):
|
|
3247
3394
|
"""使用流式LLM调用处理消息,支持完整的agent循环(工具调用/操作执行)+ 实时流式输出
|
|
3248
3395
|
|
|
3249
3396
|
核心改进:
|
|
@@ -3269,6 +3416,7 @@ class ApiServer:
|
|
|
3269
3416
|
context.metadata["agent_override_prompt"] = agent_system_prompt
|
|
3270
3417
|
context.metadata["agent_override_path"] = agent_path
|
|
3271
3418
|
context.metadata["chat_mode"] = chat_mode
|
|
3419
|
+
context.metadata["user_voice_text"] = voice_text # 语音输入原始文本(用于 usersays_correct)
|
|
3272
3420
|
|
|
3273
3421
|
# ── 根据 Agent 配置设置执行引擎参数(execution_mode 等)──
|
|
3274
3422
|
agent_cfg_for_exec = self._read_agent_config(agent_path)
|
package/web/ui/chat/chat_main.js
CHANGED
|
@@ -2429,7 +2429,11 @@ function _renderMessagesInner() {
|
|
|
2429
2429
|
? (content ? `<div class="message-bubble msg-bubble-wrapper">${content}${ttsIndicator}</div>` : '')
|
|
2430
2430
|
: '';
|
|
2431
2431
|
|
|
2432
|
-
//
|
|
2432
|
+
// ── Task Plan (historical view only — hidden during streaming, shown after completion) ──
|
|
2433
|
+
var taskPlanHtml = '';
|
|
2434
|
+
if (!msg.streaming && msg._v2TaskPlan && msg._v2TaskPlan.trim()) {
|
|
2435
|
+
taskPlanHtml = '<div class="v2-task-plan" style="margin-bottom:8px"><div class="v2-task-plan-header" style="font-size:12px;font-weight:600;color:var(--text3);margin-bottom:4px">📋 任务计划</div><div class="v2-task-plan-body">' + renderMarkdown(msg._v2TaskPlan) + '</div></div>';
|
|
2436
|
+
}
|
|
2433
2437
|
const execEventsHtml = (!isUser && !hasParts && msg.exec_events && msg.exec_events.length > 0)
|
|
2434
2438
|
? renderExecEvents(msg.exec_events, i) : '';
|
|
2435
2439
|
html += `
|
|
@@ -2438,6 +2442,7 @@ function _renderMessagesInner() {
|
|
|
2438
2442
|
<div class="message-content" style="flex:1;min-width:0">
|
|
2439
2443
|
${reasoningHtml}
|
|
2440
2444
|
${thoughtHtml}
|
|
2445
|
+
${taskPlanHtml}
|
|
2441
2446
|
${timelineHtml}
|
|
2442
2447
|
${singleBubbleHtml}
|
|
2443
2448
|
${streamingIndicator}
|
|
@@ -4011,28 +4016,38 @@ if (document.readyState === 'loading') {
|
|
|
4011
4016
|
var VoiceInput = {
|
|
4012
4017
|
mode: 'text', // 'text' | 'voice'
|
|
4013
4018
|
isRecording: false,
|
|
4014
|
-
|
|
4019
|
+
mediaRecorder: null,
|
|
4020
|
+
audioChunks: [],
|
|
4015
4021
|
rawText: '',
|
|
4016
|
-
optimizedText: '',
|
|
4017
|
-
isOptimizing: false,
|
|
4018
4022
|
_micPermissionGranted: false, // 麦克风权限是否已确认
|
|
4019
|
-
|
|
4020
|
-
|
|
4021
|
-
|
|
4022
|
-
|
|
4023
|
-
|
|
4024
|
-
|
|
4025
|
-
|
|
4026
|
-
|
|
4027
|
-
|
|
4023
|
+
_audioStream: null, // 当前活跃的音频流
|
|
4024
|
+
_sttEngine: null, // 检测到的STT引擎名称
|
|
4025
|
+
|
|
4026
|
+
/** 检查STT引擎是否可用 */
|
|
4027
|
+
checkSTTAvailable: async function() {
|
|
4028
|
+
try {
|
|
4029
|
+
var resp = await fetch('/api/voice-stt', { method: 'OPTIONS' }).catch(function() { return { ok: false }; });
|
|
4030
|
+
// OPTIONS might not be supported, try a small test
|
|
4031
|
+
var testData = new FormData();
|
|
4032
|
+
testData.append('audio', new Blob([], { type: 'audio/wav' }));
|
|
4033
|
+
var testResp = await fetch('/api/voice-stt', {
|
|
4034
|
+
method: 'POST',
|
|
4035
|
+
body: testData,
|
|
4036
|
+
});
|
|
4037
|
+
if (testResp.status === 400) {
|
|
4038
|
+
// 400 means "no audio data" — endpoint exists and works
|
|
4039
|
+
return true;
|
|
4040
|
+
}
|
|
4041
|
+
return testResp.ok;
|
|
4042
|
+
} catch (e) {
|
|
4043
|
+
return false;
|
|
4044
|
+
}
|
|
4028
4045
|
},
|
|
4029
4046
|
|
|
4030
|
-
/**
|
|
4047
|
+
/** 主动请求麦克风权限 */
|
|
4031
4048
|
_ensureMicPermission: async function() {
|
|
4032
|
-
// 如果已经确认有权限,跳过
|
|
4033
4049
|
if (this._micPermissionGranted) return true;
|
|
4034
4050
|
|
|
4035
|
-
// 检查 navigator.permissions API
|
|
4036
4051
|
if (navigator.permissions && navigator.permissions.query) {
|
|
4037
4052
|
try {
|
|
4038
4053
|
var result = await navigator.permissions.query({ name: 'microphone' });
|
|
@@ -4043,16 +4058,12 @@ var VoiceInput = {
|
|
|
4043
4058
|
if (result.state === 'denied') {
|
|
4044
4059
|
return false;
|
|
4045
4060
|
}
|
|
4046
|
-
} catch (_) {
|
|
4047
|
-
// permissions.query 可能不支持 microphone,继续尝试 getUserMedia
|
|
4048
|
-
}
|
|
4061
|
+
} catch (_) {}
|
|
4049
4062
|
}
|
|
4050
4063
|
|
|
4051
|
-
// 通过 getUserMedia 主动请求麦克风权限
|
|
4052
4064
|
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
4053
4065
|
try {
|
|
4054
4066
|
var stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
4055
|
-
// 获取成功,立即释放(SpeechRecognition 会自己管理音频流)
|
|
4056
4067
|
stream.getTracks().forEach(function(t) { t.stop(); });
|
|
4057
4068
|
this._micPermissionGranted = true;
|
|
4058
4069
|
return true;
|
|
@@ -4062,88 +4073,6 @@ var VoiceInput = {
|
|
|
4062
4073
|
return false;
|
|
4063
4074
|
}
|
|
4064
4075
|
}
|
|
4065
|
-
|
|
4066
|
-
// 没有 mediaDevices API(HTTP 环境),但 SpeechRecognition 可能仍可用
|
|
4067
|
-
return true;
|
|
4068
|
-
},
|
|
4069
|
-
|
|
4070
|
-
/** Initialize Web Speech API */
|
|
4071
|
-
init: function() {
|
|
4072
|
-
var SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
|
4073
|
-
if (!SpeechRecognition) {
|
|
4074
|
-
console.warn('Web Speech API not supported');
|
|
4075
|
-
return false;
|
|
4076
|
-
}
|
|
4077
|
-
this.recognition = new SpeechRecognition();
|
|
4078
|
-
this.recognition.continuous = true;
|
|
4079
|
-
this.recognition.interimResults = true;
|
|
4080
|
-
this.recognition.lang = 'zh-CN';
|
|
4081
|
-
this.recognition.maxAlternatives = 1;
|
|
4082
|
-
|
|
4083
|
-
var self = this;
|
|
4084
|
-
this.recognition.onresult = function(event) {
|
|
4085
|
-
var transcript = '';
|
|
4086
|
-
for (var i = 0; i < event.results.length; i++) {
|
|
4087
|
-
transcript += event.results[i][0].transcript;
|
|
4088
|
-
}
|
|
4089
|
-
self.rawText = transcript;
|
|
4090
|
-
self._startRetries = 0; // 成功获取结果,重置重试计数
|
|
4091
|
-
var statusEl = document.getElementById('voiceStatus');
|
|
4092
|
-
if (statusEl) {
|
|
4093
|
-
statusEl.textContent = transcript || '正在聆听...';
|
|
4094
|
-
statusEl.style.color = transcript ? 'var(--text)' : 'var(--text3)';
|
|
4095
|
-
}
|
|
4096
|
-
};
|
|
4097
|
-
|
|
4098
|
-
this.recognition.onerror = function(event) {
|
|
4099
|
-
console.error('Speech recognition error:', event.error);
|
|
4100
|
-
var statusEl = document.getElementById('voiceStatus');
|
|
4101
|
-
var errMsg = '';
|
|
4102
|
-
|
|
4103
|
-
switch (event.error) {
|
|
4104
|
-
case 'no-speech':
|
|
4105
|
-
// 用户没有说话,静默处理
|
|
4106
|
-
self._showStatus('未检测到语音,请重试', 'var(--text3)');
|
|
4107
|
-
break;
|
|
4108
|
-
case 'aborted':
|
|
4109
|
-
// 用户取消,不显示错误
|
|
4110
|
-
break;
|
|
4111
|
-
case 'not-allowed':
|
|
4112
|
-
errMsg = '麦克风权限被拒绝,请在浏览器地址栏左侧点击权限图标允许麦克风访问';
|
|
4113
|
-
self._micPermissionGranted = false;
|
|
4114
|
-
break;
|
|
4115
|
-
case 'service-not-available':
|
|
4116
|
-
errMsg = '语音识别服务不可用,请检查网络连接或尝试刷新页面';
|
|
4117
|
-
break;
|
|
4118
|
-
case 'service-not-allowed':
|
|
4119
|
-
errMsg = '语音识别服务未授权,请检查浏览器设置是否允许语音识别';
|
|
4120
|
-
break;
|
|
4121
|
-
case 'audio-capture':
|
|
4122
|
-
errMsg = '未找到麦克风设备,请确认已连接麦克风';
|
|
4123
|
-
break;
|
|
4124
|
-
case 'network':
|
|
4125
|
-
errMsg = '语音识别网络错误,请检查网络连接后重试';
|
|
4126
|
-
break;
|
|
4127
|
-
default:
|
|
4128
|
-
errMsg = '语音识别出错 (' + event.error + '),请重试';
|
|
4129
|
-
break;
|
|
4130
|
-
}
|
|
4131
|
-
|
|
4132
|
-
if (errMsg) {
|
|
4133
|
-
self._showStatus(errMsg, 'var(--danger)');
|
|
4134
|
-
}
|
|
4135
|
-
self.stopRecording();
|
|
4136
|
-
};
|
|
4137
|
-
|
|
4138
|
-
this.recognition.onend = function() {
|
|
4139
|
-
if (self.isRecording) {
|
|
4140
|
-
self.stopRecording();
|
|
4141
|
-
}
|
|
4142
|
-
};
|
|
4143
|
-
|
|
4144
|
-
// Set up press-and-hold for the record button
|
|
4145
|
-
this._setupHoldButton();
|
|
4146
|
-
|
|
4147
4076
|
return true;
|
|
4148
4077
|
},
|
|
4149
4078
|
|
|
@@ -4156,6 +4085,12 @@ var VoiceInput = {
|
|
|
4156
4085
|
}
|
|
4157
4086
|
},
|
|
4158
4087
|
|
|
4088
|
+
/** 初始化(设置按钮事件) */
|
|
4089
|
+
init: function() {
|
|
4090
|
+
this._setupHoldButton();
|
|
4091
|
+
return true;
|
|
4092
|
+
},
|
|
4093
|
+
|
|
4159
4094
|
/** Set up press-and-hold behavior on the voice record button */
|
|
4160
4095
|
_setupHoldButton: function() {
|
|
4161
4096
|
var btn = document.getElementById('voiceRecordBtn');
|
|
@@ -4210,45 +4145,38 @@ var VoiceInput = {
|
|
|
4210
4145
|
if (textBtn) textBtn.classList.remove('active');
|
|
4211
4146
|
if (voiceBtn) voiceBtn.classList.add('active');
|
|
4212
4147
|
if (inputBox) inputBox.style.borderColor = '';
|
|
4213
|
-
//
|
|
4214
|
-
if (!this.
|
|
4148
|
+
// Init if not done
|
|
4149
|
+
if (!this._setupDone) {
|
|
4215
4150
|
this.init();
|
|
4151
|
+
this._setupDone = true;
|
|
4216
4152
|
}
|
|
4153
|
+
// Check STT availability
|
|
4154
|
+
this._showStatus('按住麦克风开始录音', 'var(--text3)');
|
|
4217
4155
|
} else {
|
|
4218
4156
|
if (textArea) textArea.style.display = 'flex';
|
|
4219
4157
|
if (voiceArea) voiceArea.style.display = 'none';
|
|
4220
4158
|
if (voicePreview) voicePreview.style.display = 'none';
|
|
4221
4159
|
if (textBtn) textBtn.classList.add('active');
|
|
4222
4160
|
if (voiceBtn) voiceBtn.classList.remove('active');
|
|
4223
|
-
// Cancel any ongoing recording
|
|
4224
4161
|
if (this.isRecording) {
|
|
4225
4162
|
this.cancelRecording();
|
|
4226
4163
|
}
|
|
4227
4164
|
}
|
|
4228
4165
|
},
|
|
4229
4166
|
|
|
4230
|
-
/** Start recording
|
|
4167
|
+
/** Start recording(使用 MediaRecorder) */
|
|
4231
4168
|
startRecording: async function() {
|
|
4232
|
-
if (this.isRecording
|
|
4233
|
-
if (this.isOptimizing) return;
|
|
4234
|
-
|
|
4235
|
-
// ── Step 1: 检查安全上下文 ──
|
|
4236
|
-
if (!this._isSecureContext()) {
|
|
4237
|
-
this._showStatus('语音识别需要 HTTPS 环境,当前页面不安全', 'var(--danger)');
|
|
4238
|
-
if (typeof toast === 'function') {
|
|
4239
|
-
toast('语音输入需要 HTTPS 环境,请通过 HTTPS 地址访问', 'error');
|
|
4240
|
-
}
|
|
4241
|
-
return;
|
|
4242
|
-
}
|
|
4169
|
+
if (this.isRecording) return;
|
|
4243
4170
|
|
|
4244
4171
|
this.isRecording = true;
|
|
4172
|
+
this.audioChunks = [];
|
|
4245
4173
|
this.rawText = '';
|
|
4246
4174
|
|
|
4247
4175
|
var btn = document.getElementById('voiceRecordBtn');
|
|
4248
4176
|
if (btn) btn.classList.add('recording');
|
|
4249
4177
|
this._showStatus('正在请求麦克风权限...', 'var(--text3)');
|
|
4250
4178
|
|
|
4251
|
-
//
|
|
4179
|
+
// 获取麦克风权限
|
|
4252
4180
|
var hasPermission = await this._ensureMicPermission();
|
|
4253
4181
|
if (!hasPermission) {
|
|
4254
4182
|
this.isRecording = false;
|
|
@@ -4260,121 +4188,179 @@ var VoiceInput = {
|
|
|
4260
4188
|
return;
|
|
4261
4189
|
}
|
|
4262
4190
|
|
|
4263
|
-
this._showStatus('正在聆听...', 'var(--text3)');
|
|
4264
|
-
|
|
4265
|
-
// ── Step 3: 启动语音识别(带重试) ──
|
|
4266
4191
|
try {
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
|
|
4270
|
-
|
|
4271
|
-
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
}
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
|
|
4287
|
-
this._startRetries = 0;
|
|
4192
|
+
// 创建音频流
|
|
4193
|
+
this._audioStream = await navigator.mediaDevices.getUserMedia({
|
|
4194
|
+
audio: {
|
|
4195
|
+
channelCount: 1,
|
|
4196
|
+
sampleRate: 16000,
|
|
4197
|
+
echoCancellation: true,
|
|
4198
|
+
noiseSuppression: true,
|
|
4199
|
+
}
|
|
4200
|
+
});
|
|
4201
|
+
|
|
4202
|
+
// 创建 MediaRecorder(优先使用 WAV 格式,回退到 WEBM)
|
|
4203
|
+
var mimeType = 'audio/webm;codecs=opus';
|
|
4204
|
+
if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported) {
|
|
4205
|
+
if (MediaRecorder.isTypeSupported('audio/webm;codecs=opus')) {
|
|
4206
|
+
mimeType = 'audio/webm;codecs=opus';
|
|
4207
|
+
} else if (MediaRecorder.isTypeSupported('audio/webm')) {
|
|
4208
|
+
mimeType = 'audio/webm';
|
|
4209
|
+
} else if (MediaRecorder.isTypeSupported('audio/ogg;codecs=opus')) {
|
|
4210
|
+
mimeType = 'audio/ogg;codecs=opus';
|
|
4211
|
+
}
|
|
4288
4212
|
}
|
|
4213
|
+
|
|
4214
|
+
this.mediaRecorder = new MediaRecorder(this._audioStream, { mimeType: mimeType });
|
|
4215
|
+
var self = this;
|
|
4216
|
+
|
|
4217
|
+
this.mediaRecorder.ondataavailable = function(e) {
|
|
4218
|
+
if (e.data && e.data.size > 0) {
|
|
4219
|
+
self.audioChunks.push(e.data);
|
|
4220
|
+
}
|
|
4221
|
+
};
|
|
4222
|
+
|
|
4223
|
+
this.mediaRecorder.onstop = function() {
|
|
4224
|
+
self._processAudio();
|
|
4225
|
+
};
|
|
4226
|
+
|
|
4227
|
+
this.mediaRecorder.onerror = function(e) {
|
|
4228
|
+
console.error('MediaRecorder error:', e.error);
|
|
4229
|
+
self.isRecording = false;
|
|
4230
|
+
if (btn) btn.classList.remove('recording');
|
|
4231
|
+
self._showStatus('录音出错,请重试', 'var(--danger)');
|
|
4232
|
+
self._cleanupStream();
|
|
4233
|
+
};
|
|
4234
|
+
|
|
4235
|
+
// 开始录音(每100ms收集一次数据)
|
|
4236
|
+
this.mediaRecorder.start(100);
|
|
4237
|
+
this._showStatus('正在录音...', 'var(--text3)');
|
|
4238
|
+
this._recordingStartTime = Date.now();
|
|
4239
|
+
|
|
4240
|
+
} catch (e) {
|
|
4241
|
+
this.isRecording = false;
|
|
4242
|
+
if (btn) btn.classList.remove('recording');
|
|
4243
|
+
this._showStatus('无法启动录音: ' + (e.message || '未知错误'), 'var(--danger)');
|
|
4244
|
+
this._cleanupStream();
|
|
4289
4245
|
}
|
|
4290
4246
|
},
|
|
4291
4247
|
|
|
4292
|
-
/** Stop recording and
|
|
4248
|
+
/** Stop recording and process audio */
|
|
4293
4249
|
stopRecording: function() {
|
|
4294
|
-
if (!this.isRecording || !this.
|
|
4295
|
-
this.isRecording = false;
|
|
4296
|
-
this._startRetries = 0;
|
|
4250
|
+
if (!this.isRecording || !this.mediaRecorder) return;
|
|
4297
4251
|
|
|
4298
4252
|
var btn = document.getElementById('voiceRecordBtn');
|
|
4299
4253
|
if (btn) btn.classList.remove('recording');
|
|
4300
4254
|
|
|
4255
|
+
// 检查录音时长(太短则忽略)
|
|
4256
|
+
var duration = Date.now() - (this._recordingStartTime || 0);
|
|
4257
|
+
if (duration < 500) {
|
|
4258
|
+
this._showStatus('录音时间太短,请按住麦克风说话', 'var(--text3)');
|
|
4259
|
+
this.isRecording = false;
|
|
4260
|
+
try { this.mediaRecorder.stop(); } catch (e) {}
|
|
4261
|
+
this._cleanupStream();
|
|
4262
|
+
return;
|
|
4263
|
+
}
|
|
4264
|
+
|
|
4265
|
+
this._showStatus('正在识别...', 'var(--text3)');
|
|
4266
|
+
this.isRecording = false;
|
|
4267
|
+
|
|
4301
4268
|
try {
|
|
4302
|
-
this.
|
|
4269
|
+
this.mediaRecorder.stop();
|
|
4303
4270
|
} catch (e) {}
|
|
4304
|
-
|
|
4305
|
-
// Only proceed if we have some text
|
|
4306
|
-
if (this.rawText && this.rawText.trim()) {
|
|
4307
|
-
this.optimizeAndPreview();
|
|
4308
|
-
} else {
|
|
4309
|
-
this._showStatus('未检测到语音,请重试', 'var(--text3)');
|
|
4310
|
-
}
|
|
4311
4271
|
},
|
|
4312
4272
|
|
|
4313
|
-
/**
|
|
4314
|
-
|
|
4315
|
-
this.
|
|
4316
|
-
|
|
4317
|
-
|
|
4273
|
+
/** 清理音频流 */
|
|
4274
|
+
_cleanupStream: function() {
|
|
4275
|
+
if (this._audioStream) {
|
|
4276
|
+
this._audioStream.getTracks().forEach(function(t) { t.stop(); });
|
|
4277
|
+
this._audioStream = null;
|
|
4318
4278
|
}
|
|
4319
|
-
var btn = document.getElementById('voiceRecordBtn');
|
|
4320
|
-
if (btn) btn.classList.remove('recording');
|
|
4321
|
-
var statusEl = document.getElementById('voiceStatus');
|
|
4322
|
-
if (statusEl) {
|
|
4323
|
-
statusEl.textContent = '';
|
|
4324
|
-
}
|
|
4325
|
-
this.rawText = '';
|
|
4326
4279
|
},
|
|
4327
4280
|
|
|
4328
|
-
/**
|
|
4329
|
-
|
|
4330
|
-
if (
|
|
4281
|
+
/** 处理录音数据:发送到后端 STT */
|
|
4282
|
+
_processAudio: async function() {
|
|
4283
|
+
if (this.audioChunks.length === 0) {
|
|
4284
|
+
this._showStatus('未检测到语音,请重试', 'var(--text3)');
|
|
4285
|
+
this._cleanupStream();
|
|
4286
|
+
return;
|
|
4287
|
+
}
|
|
4331
4288
|
|
|
4332
|
-
this.isOptimizing = true;
|
|
4333
4289
|
var voiceArea = document.getElementById('voiceInputArea');
|
|
4334
4290
|
var voicePreview = document.getElementById('voicePreview');
|
|
4335
4291
|
var previewText = document.getElementById('voicePreviewText');
|
|
4336
4292
|
var previewHint = document.getElementById('voicePreviewHint');
|
|
4337
4293
|
var previewSend = document.getElementById('voicePreviewSend');
|
|
4338
4294
|
|
|
4339
|
-
//
|
|
4295
|
+
// 显示预览区域
|
|
4340
4296
|
if (voiceArea) voiceArea.style.display = 'none';
|
|
4341
4297
|
if (voicePreview) voicePreview.style.display = 'block';
|
|
4342
|
-
if (previewText) previewText.textContent =
|
|
4343
|
-
if (previewHint) previewHint.textContent = '
|
|
4298
|
+
if (previewText) previewText.textContent = '识别中...';
|
|
4299
|
+
if (previewHint) previewHint.textContent = '正在发送到本地STT引擎';
|
|
4344
4300
|
if (previewSend) previewSend.disabled = true;
|
|
4345
4301
|
|
|
4346
|
-
// Show raw text in input for now
|
|
4347
|
-
this.optimizedText = this.rawText;
|
|
4348
|
-
|
|
4349
4302
|
try {
|
|
4350
|
-
var
|
|
4351
|
-
|
|
4303
|
+
var audioBlob = new Blob(this.audioChunks, { type: this.mediaRecorder ? this.mediaRecorder.mimeType : 'audio/webm' });
|
|
4304
|
+
|
|
4305
|
+
// 发送音频到后端 STT 端点
|
|
4306
|
+
var formData = new FormData();
|
|
4307
|
+
formData.append('audio', audioBlob, 'recording.webm');
|
|
4308
|
+
formData.append('format', 'webm');
|
|
4309
|
+
|
|
4310
|
+
var resp = await fetch('/api/voice-stt', {
|
|
4352
4311
|
method: 'POST',
|
|
4353
|
-
body:
|
|
4354
|
-
text: this.rawText,
|
|
4355
|
-
agent_path: state.activeAgent,
|
|
4356
|
-
session_id: sessionId,
|
|
4357
|
-
mode: state.chatMode,
|
|
4358
|
-
})
|
|
4312
|
+
body: formData,
|
|
4359
4313
|
});
|
|
4360
4314
|
|
|
4361
|
-
|
|
4362
|
-
|
|
4363
|
-
|
|
4364
|
-
|
|
4315
|
+
var data = await resp.json();
|
|
4316
|
+
|
|
4317
|
+
if (data && data.text && data.text.trim()) {
|
|
4318
|
+
this.rawText = data.text.trim();
|
|
4319
|
+
this._sttEngine = data.engine || 'unknown';
|
|
4320
|
+
if (previewText) previewText.textContent = this.rawText;
|
|
4321
|
+
if (previewHint) previewHint.textContent = '已识别 (' + (this._sttEngine || 'local') + ')';
|
|
4365
4322
|
} else if (data && data.error) {
|
|
4366
|
-
if (previewHint) previewHint.textContent = '
|
|
4367
|
-
|
|
4323
|
+
if (previewHint) previewHint.textContent = '识别失败';
|
|
4324
|
+
if (previewText) previewText.textContent = data.error;
|
|
4325
|
+
console.warn('Voice STT error:', data.error);
|
|
4326
|
+
// 如果没有STT引擎,给出提示
|
|
4327
|
+
if (data.available === false) {
|
|
4328
|
+
if (previewHint) previewHint.textContent = '未检测到STT引擎';
|
|
4329
|
+
if (typeof toast === 'function') {
|
|
4330
|
+
toast('请安装语音识别引擎: pip install faster-whisper', 'error');
|
|
4331
|
+
}
|
|
4332
|
+
}
|
|
4333
|
+
} else {
|
|
4334
|
+
if (previewHint) previewHint.textContent = '未识别到文字';
|
|
4335
|
+
if (previewText) previewText.textContent = '未识别到文字,请重试';
|
|
4368
4336
|
}
|
|
4369
4337
|
} catch (e) {
|
|
4370
|
-
|
|
4371
|
-
|
|
4338
|
+
console.error('Voice STT request error:', e);
|
|
4339
|
+
if (previewHint) previewHint.textContent = '网络错误';
|
|
4340
|
+
if (previewText) previewText.textContent = 'STT请求失败,请重试';
|
|
4372
4341
|
}
|
|
4373
4342
|
|
|
4374
|
-
this.
|
|
4343
|
+
this._cleanupStream();
|
|
4375
4344
|
if (previewSend) previewSend.disabled = false;
|
|
4376
4345
|
},
|
|
4377
4346
|
|
|
4347
|
+
/** Cancel recording without processing */
|
|
4348
|
+
cancelRecording: function() {
|
|
4349
|
+
this.isRecording = false;
|
|
4350
|
+
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
|
|
4351
|
+
try { this.mediaRecorder.abort(); } catch(e) {}
|
|
4352
|
+
}
|
|
4353
|
+
this._cleanupStream();
|
|
4354
|
+
var btn = document.getElementById('voiceRecordBtn');
|
|
4355
|
+
if (btn) btn.classList.remove('recording');
|
|
4356
|
+
var statusEl = document.getElementById('voiceStatus');
|
|
4357
|
+
if (statusEl) {
|
|
4358
|
+
statusEl.textContent = '';
|
|
4359
|
+
}
|
|
4360
|
+
this.rawText = '';
|
|
4361
|
+
this.audioChunks = [];
|
|
4362
|
+
},
|
|
4363
|
+
|
|
4378
4364
|
/** Cancel voice preview and return to voice input mode */
|
|
4379
4365
|
cancelPreview: function() {
|
|
4380
4366
|
var voiceArea = document.getElementById('voiceInputArea');
|
|
@@ -4386,34 +4372,32 @@ var VoiceInput = {
|
|
|
4386
4372
|
if (statusEl) statusEl.textContent = '';
|
|
4387
4373
|
|
|
4388
4374
|
this.rawText = '';
|
|
4389
|
-
this.
|
|
4390
|
-
this.isOptimizing = false;
|
|
4375
|
+
this.audioChunks = [];
|
|
4391
4376
|
},
|
|
4392
4377
|
|
|
4393
|
-
/** Send the
|
|
4378
|
+
/** Send the voice text as a message(直接发送原始STT文本,由后端usersays_correct纠错) */
|
|
4394
4379
|
sendMessage: function() {
|
|
4395
|
-
if (!this.
|
|
4380
|
+
if (!this.rawText || !this.rawText.trim()) return;
|
|
4396
4381
|
|
|
4397
|
-
var text = this.
|
|
4382
|
+
var text = this.rawText.trim();
|
|
4398
4383
|
|
|
4399
|
-
//
|
|
4384
|
+
// 切回文本模式
|
|
4400
4385
|
this.switchMode('text');
|
|
4401
4386
|
|
|
4402
|
-
//
|
|
4387
|
+
// 将文本放入输入框并触发发送
|
|
4403
4388
|
var input = document.getElementById('userInput');
|
|
4404
4389
|
if (input) {
|
|
4405
4390
|
input.value = text;
|
|
4406
4391
|
input.dispatchEvent(new Event('input'));
|
|
4407
4392
|
}
|
|
4408
4393
|
|
|
4409
|
-
//
|
|
4394
|
+
// 重置语音状态
|
|
4410
4395
|
this.rawText = '';
|
|
4411
|
-
this.
|
|
4412
|
-
this.isOptimizing = false;
|
|
4396
|
+
this.audioChunks = [];
|
|
4413
4397
|
|
|
4414
|
-
//
|
|
4398
|
+
// 使用 sendMessage 发送(附带 voice_text 标记)
|
|
4415
4399
|
if (typeof sendMessage === 'function') {
|
|
4416
|
-
sendMessage();
|
|
4400
|
+
sendMessage({ voiceText: text });
|
|
4417
4401
|
}
|
|
4418
4402
|
}
|
|
4419
4403
|
};
|
|
@@ -4435,18 +4419,8 @@ function sendVoiceMessage() {
|
|
|
4435
4419
|
|
|
4436
4420
|
// Initialize voice input on DOM ready
|
|
4437
4421
|
(function() {
|
|
4438
|
-
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
voiceBtn.style.opacity = '0.3';
|
|
4442
|
-
voiceBtn.style.cursor = 'not-allowed';
|
|
4443
|
-
voiceBtn.title = '当前浏览器不支持语音输入';
|
|
4444
|
-
voiceBtn.onclick = function(e) {
|
|
4445
|
-
e.preventDefault();
|
|
4446
|
-
if (typeof toast === 'function') {
|
|
4447
|
-
toast('当前浏览器不支持语音识别,请使用 Chrome 或 Edge', 'error');
|
|
4448
|
-
}
|
|
4449
|
-
};
|
|
4450
|
-
}
|
|
4422
|
+
// Voice input now uses MediaRecorder (always available) + backend STT
|
|
4423
|
+
// No need to check for SpeechRecognition API
|
|
4424
|
+
// The voice button is always enabled; STT engine availability is checked when recording
|
|
4451
4425
|
})();
|
|
4452
4426
|
|
|
@@ -671,17 +671,13 @@ function updateStreamingMessage(msgIdx) {
|
|
|
671
671
|
indicator.remove();
|
|
672
672
|
}
|
|
673
673
|
|
|
674
|
-
// V2 Task Plan
|
|
674
|
+
// V2 Task Plan: NOT rendered in message bubble during streaming
|
|
675
|
+
// (task plan is displayed in the dedicated collapsible task panel instead)
|
|
676
|
+
// Only store _v2TaskPlan for historical message display
|
|
675
677
|
if (msg._v2TaskPlan) {
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
taskPlanEl.className = 'v2-task-plan';
|
|
680
|
-
// Insert before timeline or at appropriate position
|
|
681
|
-
var insertTarget = contentArea.querySelector('.msg-timeline') || contentArea;
|
|
682
|
-
insertTarget.insertBefore(taskPlanEl, insertTarget.firstChild);
|
|
683
|
-
}
|
|
684
|
-
taskPlanEl.innerHTML = '<div class="v2-task-plan-header">📋 任务计划</div><div class="v2-task-plan-body">' + renderMarkdown(msg._v2TaskPlan) + '</div>';
|
|
678
|
+
// Remove any existing task plan element from bubble (in case it was added before)
|
|
679
|
+
const existingPlan = contentArea.querySelector('.v2-task-plan');
|
|
680
|
+
if (existingPlan) existingPlan.remove();
|
|
685
681
|
}
|
|
686
682
|
|
|
687
683
|
// V2 Ask User rendering
|
|
@@ -1128,16 +1124,63 @@ function _assembleV2Content(msg, msgParts) {
|
|
|
1128
1124
|
return '(无回复)';
|
|
1129
1125
|
}
|
|
1130
1126
|
|
|
1127
|
+
// ══════════════════════════════════════════════════════
|
|
1128
|
+
// ── Voice Input: User Bubble Replacement ──
|
|
1129
|
+
// ══════════════════════════════════════════════════════
|
|
1130
|
+
|
|
1131
|
+
/**
|
|
1132
|
+
* 替换指定索引的用户气泡文本(用于 usersays_correct 纠错)
|
|
1133
|
+
* 直接操作 DOM,不触发 renderMessages(),避免干扰流式输出
|
|
1134
|
+
* @param {number} idx - state.messages 中的用户消息索引
|
|
1135
|
+
* @param {string} newText - 纠错后的文本
|
|
1136
|
+
*/
|
|
1137
|
+
function _replaceUserBubble(idx, newText) {
|
|
1138
|
+
var container = document.getElementById('messagesInner');
|
|
1139
|
+
if (!container) return;
|
|
1140
|
+
|
|
1141
|
+
// 找到第 idx+1 个 message-row.user 元素(跳过 tool 消息)
|
|
1142
|
+
var userRows = container.querySelectorAll('.message-row.user');
|
|
1143
|
+
var userCount = 0;
|
|
1144
|
+
for (var i = 0; i < state.messages.length && i <= idx; i++) {
|
|
1145
|
+
if (state.messages[i].role === 'user') {
|
|
1146
|
+
if (i === idx) {
|
|
1147
|
+
// 找到目标行
|
|
1148
|
+
if (userCount < userRows.length) {
|
|
1149
|
+
var row = userRows[userCount];
|
|
1150
|
+
var bubble = row.querySelector('.message-bubble');
|
|
1151
|
+
if (bubble) {
|
|
1152
|
+
// 平滑替换:先淡出,再更新内容,再淡入
|
|
1153
|
+
bubble.style.transition = 'opacity 0.2s ease';
|
|
1154
|
+
bubble.style.opacity = '0.4';
|
|
1155
|
+
setTimeout(function() {
|
|
1156
|
+
// 使用 renderMarkdown 渲染新文本
|
|
1157
|
+
if (typeof renderMarkdown === 'function') {
|
|
1158
|
+
bubble.innerHTML = renderMarkdown(newText);
|
|
1159
|
+
} else {
|
|
1160
|
+
bubble.textContent = newText;
|
|
1161
|
+
}
|
|
1162
|
+
bubble.style.opacity = '1';
|
|
1163
|
+
}, 200);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
break;
|
|
1167
|
+
}
|
|
1168
|
+
userCount++;
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1131
1173
|
// ══════════════════════════════════════════════════════
|
|
1132
1174
|
// ── Send Message (核心 SSE 流式消息发送) ──
|
|
1133
1175
|
// ══════════════════════════════════════════════════════
|
|
1134
1176
|
|
|
1135
|
-
async function sendMessage() {
|
|
1177
|
+
async function sendMessage(opts) {
|
|
1136
1178
|
if (currentView === 'group') {
|
|
1137
1179
|
return sendGroupChat();
|
|
1138
1180
|
}
|
|
1139
1181
|
const input = document.getElementById('userInput');
|
|
1140
1182
|
const text = input.value.trim();
|
|
1183
|
+
const voiceText = (opts && opts.voiceText) ? opts.voiceText : ''; // 语音输入原始文本
|
|
1141
1184
|
// ── 如果正在生成,弹出处理选择框 ──
|
|
1142
1185
|
if (state.isGenerating) {
|
|
1143
1186
|
state.tempInputText = text;
|
|
@@ -1177,7 +1220,7 @@ async function sendMessage() {
|
|
|
1177
1220
|
}
|
|
1178
1221
|
|
|
1179
1222
|
// Add user message
|
|
1180
|
-
state.messages.push({ role: 'user', content: text, time: new Date().toISOString() });
|
|
1223
|
+
state.messages.push({ role: 'user', content: text, time: new Date().toISOString(), _voiceText: voiceText });
|
|
1181
1224
|
renderMessages();
|
|
1182
1225
|
|
|
1183
1226
|
// Clear input
|
|
@@ -1211,6 +1254,7 @@ async function sendMessage() {
|
|
|
1211
1254
|
agent_path: state.activeAgent,
|
|
1212
1255
|
mode: state.chatMode,
|
|
1213
1256
|
escalated: state.escalated,
|
|
1257
|
+
voice_text: voiceText, // 语音转文字原始文本(用于后端 usersays_correct)
|
|
1214
1258
|
}),
|
|
1215
1259
|
signal: state.abortController.signal,
|
|
1216
1260
|
});
|
|
@@ -1406,6 +1450,31 @@ async function sendMessage() {
|
|
|
1406
1450
|
// evt.data contains: {usersays_correct, task_plan, tools_to_call, remember, recall, ask_user, finish}
|
|
1407
1451
|
// Store for rendering
|
|
1408
1452
|
state.messages[msgIdx]._v2Parsed = evt.data;
|
|
1453
|
+
// ── usersays_correct:语音输入纠错 — 替换用户气泡文本 ──
|
|
1454
|
+
if (evt.data && evt.data.usersays_correct && evt.data.usersays_correct.trim()) {
|
|
1455
|
+
var correctedText = evt.data.usersays_correct.trim();
|
|
1456
|
+
// 找到对应的用户消息(当前消息的前一条)
|
|
1457
|
+
var userMsgIdx = msgIdx - 1;
|
|
1458
|
+
// 确认是语音消息(有 _voiceText 标记)
|
|
1459
|
+
if (userMsgIdx >= 0 && state.messages[userMsgIdx] &&
|
|
1460
|
+
state.messages[userMsgIdx].role === 'user' &&
|
|
1461
|
+
state.messages[userMsgIdx]._voiceText) {
|
|
1462
|
+
var oldContent = state.messages[userMsgIdx].content;
|
|
1463
|
+
if (oldContent !== correctedText) {
|
|
1464
|
+
state.messages[userMsgIdx].content = correctedText;
|
|
1465
|
+
state.messages[userMsgIdx]._voiceCorrected = true;
|
|
1466
|
+
// 更新用户气泡的 DOM(不重绘整个列表,直接替换文本)
|
|
1467
|
+
_replaceUserBubble(userMsgIdx, correctedText);
|
|
1468
|
+
// 更新侧边栏会话预览
|
|
1469
|
+
if (state.sessions && state.sessions.length > 0) {
|
|
1470
|
+
state.sessions[0].preview = correctedText.length > 40 ? correctedText.slice(0, 40) + '...' : correctedText;
|
|
1471
|
+
if (typeof renderSessions === 'function') {
|
|
1472
|
+
renderSessions();
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1409
1478
|
// Render task plan if updated
|
|
1410
1479
|
if (evt.data && evt.data.task_plan) {
|
|
1411
1480
|
state.messages[msgIdx]._v2TaskPlan = evt.data.task_plan;
|
|
@@ -134,7 +134,7 @@
|
|
|
134
134
|
</div>
|
|
135
135
|
<!-- Voice preview area (shown after recording, before sending) -->
|
|
136
136
|
<div class="voice-preview" id="voicePreview" style="display:none">
|
|
137
|
-
<div class="voice-preview-label">语音输入 · <span id="voicePreviewHint"
|
|
137
|
+
<div class="voice-preview-label">语音输入 · <span id="voicePreviewHint">识别中...</span></div>
|
|
138
138
|
<div class="voice-preview-text" id="voicePreviewText"></div>
|
|
139
139
|
<div class="voice-preview-actions">
|
|
140
140
|
<button class="voice-preview-cancel" onclick="cancelVoicePreview()">取消</button>
|