myagent-ai 1.47.16 → 1.47.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/main_agent.py +194 -2
- package/aiskills/browser_stealth.py +188 -93
- package/package.json +1 -1
- package/web/api_server.py +94 -2
- package/web/ui/chat/chat_main.js +9 -7
- package/web/ui/chat/flow_engine.js +11 -7
package/agents/main_agent.py
CHANGED
|
@@ -1222,8 +1222,200 @@ class MainAgent(BaseAgent):
|
|
|
1222
1222
|
continue
|
|
1223
1223
|
|
|
1224
1224
|
else:
|
|
1225
|
-
#
|
|
1226
|
-
|
|
1225
|
+
# 没有原生工具调用 → 检查是否为旧格式 <output> XML(某些模型不支持 tool_calling)
|
|
1226
|
+
raw_content = (response.content or "").strip()
|
|
1227
|
+
|
|
1228
|
+
# [v1.47.16] 兼容旧格式:当 LLM 输出 <output> XML 时,用 output_parser 解析
|
|
1229
|
+
if raw_content.startswith("<output") or ("<output>" in raw_content and "<toolstocal>" in raw_content):
|
|
1230
|
+
logger.info(f"[{task_id}] 检测到旧格式 <output> XML 输出,启用 output_parser 解析")
|
|
1231
|
+
parsed = parse_output(raw_content)
|
|
1232
|
+
|
|
1233
|
+
if parsed.parse_success:
|
|
1234
|
+
# 1) 处理 mainsubject → 更新会话标题
|
|
1235
|
+
if parsed.mainsubject and self.dispatcher:
|
|
1236
|
+
try:
|
|
1237
|
+
await self.dispatcher.dispatch(
|
|
1238
|
+
tool_name="update_conversation_title",
|
|
1239
|
+
params={"title": parsed.mainsubject, "session_id": context.session_id},
|
|
1240
|
+
timeout=10,
|
|
1241
|
+
)
|
|
1242
|
+
except Exception:
|
|
1243
|
+
pass
|
|
1244
|
+
|
|
1245
|
+
# 2) 处理 remember → 保存记忆
|
|
1246
|
+
if parsed.remember and self.dispatcher:
|
|
1247
|
+
try:
|
|
1248
|
+
await self.dispatcher.dispatch(
|
|
1249
|
+
tool_name="save_memory",
|
|
1250
|
+
params={
|
|
1251
|
+
"content": parsed.remember,
|
|
1252
|
+
"type": parsed.remember_type or "session",
|
|
1253
|
+
"session_id": context.session_id,
|
|
1254
|
+
},
|
|
1255
|
+
timeout=10,
|
|
1256
|
+
)
|
|
1257
|
+
except Exception:
|
|
1258
|
+
pass
|
|
1259
|
+
|
|
1260
|
+
# 3) 处理 task_plan
|
|
1261
|
+
if parsed.task_plan and self.dispatcher:
|
|
1262
|
+
try:
|
|
1263
|
+
await self.dispatcher.dispatch(
|
|
1264
|
+
tool_name="task_plan",
|
|
1265
|
+
params={"action": "create", "plan": parsed.task_plan},
|
|
1266
|
+
timeout=10,
|
|
1267
|
+
)
|
|
1268
|
+
current_task_plan = parsed.task_plan
|
|
1269
|
+
await self._emit_v2_event(
|
|
1270
|
+
"v2_task_plan",
|
|
1271
|
+
{"plan": truncate_str(current_task_plan, 2000)},
|
|
1272
|
+
stream_callback,
|
|
1273
|
+
)
|
|
1274
|
+
except Exception:
|
|
1275
|
+
pass
|
|
1276
|
+
|
|
1277
|
+
# 4) 处理 tools_to_call → 执行工具
|
|
1278
|
+
if parsed.tools_to_call:
|
|
1279
|
+
logger.info(f"[{task_id}] 从 <output> XML 提取到 {len(parsed.tools_to_call)} 个工具调用")
|
|
1280
|
+
|
|
1281
|
+
# 添加 assistant 消息到消息列表
|
|
1282
|
+
messages.append(Message(
|
|
1283
|
+
role="assistant",
|
|
1284
|
+
content=raw_content,
|
|
1285
|
+
))
|
|
1286
|
+
|
|
1287
|
+
# 保存 LLM 原始输出
|
|
1288
|
+
if self.memory:
|
|
1289
|
+
self.memory.add_session(agent_id=_effective_agent_id,
|
|
1290
|
+
session_id=context.session_id,
|
|
1291
|
+
role="assistant",
|
|
1292
|
+
content=raw_content,
|
|
1293
|
+
key="llm_output",
|
|
1294
|
+
importance=0.3,
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
for tool_desc in parsed.tools_to_call:
|
|
1298
|
+
_tc_name = tool_desc.get("toolname", "")
|
|
1299
|
+
_tc_parms = tool_desc.get("parms", "{}")
|
|
1300
|
+
_tc_timeout = int(tool_desc.get("timeout", 120))
|
|
1301
|
+
|
|
1302
|
+
if not _tc_name:
|
|
1303
|
+
continue
|
|
1304
|
+
|
|
1305
|
+
# 注入 session_id
|
|
1306
|
+
if _tc_name in ("save_memory", "recall_memory", "update_conversation_title"):
|
|
1307
|
+
if isinstance(_tc_parms, str):
|
|
1308
|
+
try:
|
|
1309
|
+
_tc_parms_dict = json.loads(_tc_parms)
|
|
1310
|
+
except (json.JSONDecodeError, TypeError):
|
|
1311
|
+
_tc_parms_dict = {"raw_input": _tc_parms}
|
|
1312
|
+
else:
|
|
1313
|
+
_tc_parms_dict = _tc_parms
|
|
1314
|
+
_tc_parms_dict.setdefault("session_id", context.session_id)
|
|
1315
|
+
_tc_parms = json.dumps(_tc_parms_dict, ensure_ascii=False)
|
|
1316
|
+
|
|
1317
|
+
# 发送工具开始事件
|
|
1318
|
+
await self._emit_v2_event(
|
|
1319
|
+
"v2_tool_start",
|
|
1320
|
+
{"tool": {"toolname": _tc_name, "parms": truncate_str(str(_tc_parms), 500)}},
|
|
1321
|
+
stream_callback,
|
|
1322
|
+
)
|
|
1323
|
+
|
|
1324
|
+
self._add_exec_event("tool_call", {
|
|
1325
|
+
"title": f"调用工具: {_tc_name}",
|
|
1326
|
+
"tool_name": _tc_name,
|
|
1327
|
+
"arguments": str(_tc_parms),
|
|
1328
|
+
})
|
|
1329
|
+
|
|
1330
|
+
# 执行工具
|
|
1331
|
+
tool_result = await self._execute_v2_tool(
|
|
1332
|
+
_tc_name, str(_tc_parms), _tc_timeout,
|
|
1333
|
+
context, task_id,
|
|
1334
|
+
stream_callback=stream_callback,
|
|
1335
|
+
sent_files=_sent_files,
|
|
1336
|
+
agent_path=agent_path,
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
# 提取输出
|
|
1340
|
+
if tool_result is None:
|
|
1341
|
+
tool_result = {"success": False, "error": "工具返回了空结果"}
|
|
1342
|
+
_output_text = (
|
|
1343
|
+
tool_result.get("output", "")
|
|
1344
|
+
or tool_result.get("message", "")
|
|
1345
|
+
or tool_result.get("stdout", "")
|
|
1346
|
+
or tool_result.get("error", "")
|
|
1347
|
+
)
|
|
1348
|
+
if not _output_text and tool_result.get("data"):
|
|
1349
|
+
try:
|
|
1350
|
+
_output_text = json.dumps(tool_result["data"], ensure_ascii=False, default=str)[:30000]
|
|
1351
|
+
except Exception:
|
|
1352
|
+
_output_text = str(tool_result["data"])[:30000]
|
|
1353
|
+
|
|
1354
|
+
# 发送工具结果事件
|
|
1355
|
+
await self._emit_v2_event(
|
|
1356
|
+
"v2_tool_result",
|
|
1357
|
+
{"tool": {"toolname": _tc_name}, "result": {
|
|
1358
|
+
"success": tool_result.get("success", False),
|
|
1359
|
+
"output": truncate_str(_output_text, 30000),
|
|
1360
|
+
"error": truncate_str(tool_result.get("error", ""), 30000),
|
|
1361
|
+
}},
|
|
1362
|
+
stream_callback,
|
|
1363
|
+
)
|
|
1364
|
+
|
|
1365
|
+
self._add_exec_event("tool_result", {
|
|
1366
|
+
"title": f"工具结果: {_tc_name}",
|
|
1367
|
+
"tool_name": _tc_name,
|
|
1368
|
+
"success": tool_result.get("success", False),
|
|
1369
|
+
"summary": truncate_str(_output_text, 30000),
|
|
1370
|
+
})
|
|
1371
|
+
|
|
1372
|
+
# 添加 tool result 消息
|
|
1373
|
+
messages.append(Message(
|
|
1374
|
+
role="user",
|
|
1375
|
+
content=f"[工具结果: {_tc_name}] {truncate_str(_output_text, 5000)}",
|
|
1376
|
+
))
|
|
1377
|
+
|
|
1378
|
+
# 工具执行完毕 → 继续循环让 LLM 处理结果
|
|
1379
|
+
continue
|
|
1380
|
+
|
|
1381
|
+
# 5) 没有工具但有 reply → 提取纯文本回复
|
|
1382
|
+
if parsed.reply:
|
|
1383
|
+
reply_text = parsed.reply.strip()
|
|
1384
|
+
else:
|
|
1385
|
+
# 兜底:去除所有 XML 标签
|
|
1386
|
+
import re as _re_xml
|
|
1387
|
+
reply_text = _re_xml.sub(r'<[^>]+>', '', raw_content).strip()
|
|
1388
|
+
|
|
1389
|
+
if not reply_text:
|
|
1390
|
+
reply_text = "处理完毕。"
|
|
1391
|
+
|
|
1392
|
+
context.working_memory["final_response"] = reply_text
|
|
1393
|
+
await self._emit_v2_event("v2_reasoning", {"content": truncate_str(reply_text, 3000)}, stream_callback)
|
|
1394
|
+
|
|
1395
|
+
# 保存回复到会话记忆
|
|
1396
|
+
if self.memory:
|
|
1397
|
+
self.memory.add_session(agent_id=_effective_agent_id,
|
|
1398
|
+
session_id=context.session_id,
|
|
1399
|
+
role="assistant",
|
|
1400
|
+
content=reply_text,
|
|
1401
|
+
key="reply",
|
|
1402
|
+
importance=0.5,
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
# 保存 LLM 原始输出
|
|
1406
|
+
if self.memory:
|
|
1407
|
+
self.memory.add_session(agent_id=_effective_agent_id,
|
|
1408
|
+
session_id=context.session_id,
|
|
1409
|
+
role="assistant",
|
|
1410
|
+
content=raw_content,
|
|
1411
|
+
key="llm_output",
|
|
1412
|
+
importance=0.3,
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
break
|
|
1416
|
+
|
|
1417
|
+
# 纯文本回复(非 XML 格式)
|
|
1418
|
+
reply_text = raw_content
|
|
1227
1419
|
logger.info(f"[{task_id}] 无工具调用,任务完成 (reply长度={len(reply_text)})")
|
|
1228
1420
|
|
|
1229
1421
|
if not reply_text:
|
|
@@ -544,6 +544,73 @@ class StealthBrowser:
|
|
|
544
544
|
|
|
545
545
|
async def start(self) -> SkillResult:
|
|
546
546
|
"""启动反检测浏览器"""
|
|
547
|
+
# [v1.47.16] VNC/Termux 模式下直接使用 Firefox,不走 DrissionPage/Chromium
|
|
548
|
+
# Chromium 在 proot ARM64 下与 DrissionPage 不兼容,
|
|
549
|
+
# 所以 VNC 模式下先检测环境,直接走 Firefox 路径,不需要 import DrissionPage
|
|
550
|
+
_is_vnc_mode = False
|
|
551
|
+
_is_termux_env = False
|
|
552
|
+
try:
|
|
553
|
+
from core.env_detect import is_termux
|
|
554
|
+
_is_termux_env = is_termux()
|
|
555
|
+
except ImportError:
|
|
556
|
+
pass
|
|
557
|
+
try:
|
|
558
|
+
from core.env_detect import is_desktop
|
|
559
|
+
if not is_desktop():
|
|
560
|
+
_is_vnc_mode = True
|
|
561
|
+
except ImportError:
|
|
562
|
+
if not _has_display():
|
|
563
|
+
_is_vnc_mode = True
|
|
564
|
+
|
|
565
|
+
# VNC 模式:跳过所有 Chrome/DrissionPage 逻辑,直接用 Firefox
|
|
566
|
+
if _is_vnc_mode:
|
|
567
|
+
logger.info("VNC/非桌面环境: 直接启动 Firefox(跳过 Chromium 检测)")
|
|
568
|
+
if not self._headless:
|
|
569
|
+
display = _ensure_display()
|
|
570
|
+
if display:
|
|
571
|
+
self._vnc_used = display.get("vnc", False)
|
|
572
|
+
self._xvfb_started_by_us = display.get("xvfb_standalone", False)
|
|
573
|
+
if self._vnc_used:
|
|
574
|
+
return self._start_firefox_in_vnc()
|
|
575
|
+
# VNC/Xvfb 不可用
|
|
576
|
+
if _is_termux_env:
|
|
577
|
+
return SkillResult(
|
|
578
|
+
success=False,
|
|
579
|
+
error=(
|
|
580
|
+
"Termux+Ubuntu 环境仅支持通过 VNC 启动浏览器,"
|
|
581
|
+
"VNC 启动失败。请先启动 VNC 远程桌面后再使用浏览器功能。"
|
|
582
|
+
),
|
|
583
|
+
)
|
|
584
|
+
# 非 Termux:尝试 Xvfb
|
|
585
|
+
if display and display.get("xvfb_standalone"):
|
|
586
|
+
# 有 Xvfb 但没有 VNC → 用 Xvfb + Chromium(走下面的正常流程)
|
|
587
|
+
pass
|
|
588
|
+
else:
|
|
589
|
+
# 没有 Xvfb 也没有 VNC → 降级 headless 或报错
|
|
590
|
+
logger.warning("无显示环境且 VNC/Xvfb 均不可用,尝试 Firefox headless 模式")
|
|
591
|
+
return self._start_firefox_in_vnc()
|
|
592
|
+
else:
|
|
593
|
+
# headless=True 被显式请求
|
|
594
|
+
if _is_termux_env:
|
|
595
|
+
logger.info("Termux+Ubuntu 环境: 忽略 headless 请求,强制使用 VNC 模式")
|
|
596
|
+
self._headless = False
|
|
597
|
+
display = _ensure_display()
|
|
598
|
+
if display:
|
|
599
|
+
self._vnc_used = display.get("vnc", False)
|
|
600
|
+
self._xvfb_started_by_us = display.get("xvfb_standalone", False)
|
|
601
|
+
if self._vnc_used:
|
|
602
|
+
return self._start_firefox_in_vnc()
|
|
603
|
+
return SkillResult(
|
|
604
|
+
success=False,
|
|
605
|
+
error=(
|
|
606
|
+
"Termux+Ubuntu 环境仅支持通过 VNC 启动浏览器,"
|
|
607
|
+
"VNC 启动失败。headless 模式在此环境下不可用。"
|
|
608
|
+
),
|
|
609
|
+
)
|
|
610
|
+
# 非 Termux headless → 仍然尝试 Firefox
|
|
611
|
+
return self._start_firefox_in_vnc()
|
|
612
|
+
|
|
613
|
+
# ── 桌面环境:使用 DrissionPage + Chromium ──
|
|
547
614
|
try:
|
|
548
615
|
from DrissionPage import Chromium, ChromiumOptions
|
|
549
616
|
except ImportError:
|
|
@@ -613,98 +680,10 @@ class StealthBrowser:
|
|
|
613
680
|
else:
|
|
614
681
|
logger.info("桌面环境,使用系统 Chrome 原生参数")
|
|
615
682
|
|
|
616
|
-
# ──
|
|
617
|
-
#
|
|
618
|
-
# Termux+Ubuntu: 仅支持 VNC,不降级到 headless
|
|
619
|
-
# 非 Termux 容器: VNC > Xvfb > headless 降级
|
|
620
|
-
_is_termux_env = False
|
|
621
|
-
try:
|
|
622
|
-
from core.env_detect import is_termux
|
|
623
|
-
_is_termux_env = is_termux()
|
|
624
|
-
except ImportError:
|
|
625
|
-
pass
|
|
626
|
-
|
|
683
|
+
# ── 显示环境处理(仅桌面环境到达此代码路径)──
|
|
684
|
+
# VNC/Termux 模式已在方法开头走 Firefox 分支,这里只处理桌面环境
|
|
627
685
|
if not self._headless:
|
|
628
|
-
|
|
629
|
-
from core.env_detect import is_desktop
|
|
630
|
-
if is_desktop():
|
|
631
|
-
# 桌面环境: 直接用系统 Chrome
|
|
632
|
-
logger.info("桌面环境,直接使用系统浏览器,跳过 VNC/Xvfb")
|
|
633
|
-
else:
|
|
634
|
-
# 非桌面环境 (容器/Termux): 通过 _ensure_display() 获取显示
|
|
635
|
-
display = _ensure_display()
|
|
636
|
-
if display:
|
|
637
|
-
self._vnc_used = display.get("vnc", False)
|
|
638
|
-
self._xvfb_started_by_us = display.get("xvfb_standalone", False)
|
|
639
|
-
if self._vnc_used:
|
|
640
|
-
logger.info(f"复用 VNC 远程桌面显示 ({display['display']}),可在 VNC 中查看浏览器操作")
|
|
641
|
-
# [v1.47.16] VNC 模式下直接用 Firefox,不走 DrissionPage/Chromium
|
|
642
|
-
# Chromium 在 proot ARM64 下与 DrissionPage 不兼容
|
|
643
|
-
logger.info("VNC 模式: 直接启动 Firefox(跳过 Chromium 检测)")
|
|
644
|
-
return self._start_firefox_in_vnc()
|
|
645
|
-
else:
|
|
646
|
-
# ── Termux+Ubuntu: VNC 失败 → 直接报错,不降级 headless ──
|
|
647
|
-
if _is_termux_env:
|
|
648
|
-
return SkillResult(
|
|
649
|
-
success=False,
|
|
650
|
-
error=(
|
|
651
|
-
"Termux+Ubuntu 环境仅支持通过 VNC 启动浏览器,"
|
|
652
|
-
"VNC 启动失败。请先启动 VNC 远程桌面,"
|
|
653
|
-
"或通过 Web 管理面板打开 VNC 后再使用浏览器功能。"
|
|
654
|
-
),
|
|
655
|
-
)
|
|
656
|
-
# ── 非 Termux 容器: 降级到 headless ──
|
|
657
|
-
self._headless = True
|
|
658
|
-
logger.warning(
|
|
659
|
-
"无显示环境且 VNC/Xvfb 均不可用,自动降级为 headless 模式"
|
|
660
|
-
)
|
|
661
|
-
except ImportError:
|
|
662
|
-
# env_detect 不可用时,降级为原有 X11 检测逻辑
|
|
663
|
-
if not _has_display():
|
|
664
|
-
display = _ensure_display()
|
|
665
|
-
if display:
|
|
666
|
-
self._vnc_used = display.get("vnc", False)
|
|
667
|
-
self._xvfb_started_by_us = display.get("xvfb_standalone", False)
|
|
668
|
-
if self._vnc_used:
|
|
669
|
-
# [v1.47.16] VNC 模式下直接用 Firefox
|
|
670
|
-
logger.info("VNC 模式 (env_detect不可用): 直接启动 Firefox")
|
|
671
|
-
return self._start_firefox_in_vnc()
|
|
672
|
-
else:
|
|
673
|
-
if _is_termux_env:
|
|
674
|
-
return SkillResult(
|
|
675
|
-
success=False,
|
|
676
|
-
error=(
|
|
677
|
-
"Termux+Ubuntu 环境仅支持通过 VNC 启动浏览器,"
|
|
678
|
-
"VNC 启动失败。请先启动 VNC 远程桌面后再使用浏览器功能。"
|
|
679
|
-
),
|
|
680
|
-
)
|
|
681
|
-
self._headless = True
|
|
682
|
-
logger.warning(
|
|
683
|
-
"无 DISPLAY 环境且 VNC/Xvfb 均不可用,自动降级为 headless 模式"
|
|
684
|
-
)
|
|
685
|
-
else:
|
|
686
|
-
# headless=True 被显式请求,但 Termux 环境下仍强制使用 VNC
|
|
687
|
-
# 因为 headless Chromium 在 Termux 下容易被 OOM Kill
|
|
688
|
-
if _is_termux_env:
|
|
689
|
-
logger.info("Termux+Ubuntu 环境: 忽略 headless 请求,强制使用 VNC 模式")
|
|
690
|
-
self._headless = False
|
|
691
|
-
display = _ensure_display()
|
|
692
|
-
if display:
|
|
693
|
-
self._vnc_used = display.get("vnc", False)
|
|
694
|
-
self._xvfb_started_by_us = display.get("xvfb_standalone", False)
|
|
695
|
-
if self._vnc_used:
|
|
696
|
-
# [v1.47.16] VNC 模式下直接用 Firefox
|
|
697
|
-
logger.info(f"Termux+Ubuntu VNC 模式: 直接启动 Firefox")
|
|
698
|
-
return self._start_firefox_in_vnc()
|
|
699
|
-
else:
|
|
700
|
-
return SkillResult(
|
|
701
|
-
success=False,
|
|
702
|
-
error=(
|
|
703
|
-
"Termux+Ubuntu 环境仅支持通过 VNC 启动浏览器,"
|
|
704
|
-
"VNC 启动失败。headless 模式在此环境下不可用(容易被 OOM Kill)。"
|
|
705
|
-
"请先启动 VNC 远程桌面后再使用浏览器功能。"
|
|
706
|
-
),
|
|
707
|
-
)
|
|
686
|
+
logger.info("桌面环境,直接使用系统浏览器,跳过 VNC/Xvfb")
|
|
708
687
|
|
|
709
688
|
# 无头模式(co.headless() 内部设置 --headless=new)
|
|
710
689
|
if self._headless:
|
|
@@ -1273,8 +1252,11 @@ class StealthBrowser:
|
|
|
1273
1252
|
if not self._ensure_page():
|
|
1274
1253
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1275
1254
|
|
|
1255
|
+
# [v1.47.16] Firefox+VNC 模式:通过 xdotool type 输入
|
|
1256
|
+
if self._firefox_mode:
|
|
1257
|
+
return self._firefox_fill(selector or "", text, clear, wait)
|
|
1258
|
+
|
|
1276
1259
|
try:
|
|
1277
|
-
# 如果提供了 selector,先点击聚焦
|
|
1278
1260
|
if selector:
|
|
1279
1261
|
ele = self._find_element(selector, timeout=10)
|
|
1280
1262
|
if not ele:
|
|
@@ -1366,8 +1348,11 @@ class StealthBrowser:
|
|
|
1366
1348
|
if not self._ensure_page():
|
|
1367
1349
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1368
1350
|
|
|
1351
|
+
# [v1.47.16] Firefox+VNC 模式:通过 xdotool key 按键
|
|
1352
|
+
if self._firefox_mode:
|
|
1353
|
+
return self._firefox_press_key(key, selector, wait)
|
|
1354
|
+
|
|
1369
1355
|
try:
|
|
1370
|
-
# 如果提供了 selector,先聚焦
|
|
1371
1356
|
if selector:
|
|
1372
1357
|
ele = self._find_element(selector, timeout=10)
|
|
1373
1358
|
if ele:
|
|
@@ -1490,6 +1475,13 @@ class StealthBrowser:
|
|
|
1490
1475
|
if not self._ensure_page():
|
|
1491
1476
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1492
1477
|
|
|
1478
|
+
# [v1.47.16] Firefox+VNC 模式:无法通过 CDP 执行 JS
|
|
1479
|
+
if self._firefox_mode:
|
|
1480
|
+
return SkillResult(
|
|
1481
|
+
success=False,
|
|
1482
|
+
error="Firefox+VNC 模式下不支持 JS 执行。请在 VNC 中手动操作,或切换到桌面环境使用 Chromium。",
|
|
1483
|
+
)
|
|
1484
|
+
|
|
1493
1485
|
try:
|
|
1494
1486
|
_script = script.strip()
|
|
1495
1487
|
# 检测是否包含 return 语句
|
|
@@ -1546,6 +1538,13 @@ class StealthBrowser:
|
|
|
1546
1538
|
if not self._ensure_page():
|
|
1547
1539
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1548
1540
|
|
|
1541
|
+
# [v1.47.16] Firefox+VNC 模式:无法获取页面内容
|
|
1542
|
+
if self._firefox_mode:
|
|
1543
|
+
return SkillResult(
|
|
1544
|
+
success=False,
|
|
1545
|
+
error="Firefox+VNC 模式下不支持获取页面内容。请在 VNC 中手动查看,或切换到桌面环境使用 Chromium。",
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1549
1548
|
try:
|
|
1550
1549
|
# Bug Fix: DrissionPage 没有 page.text 属性
|
|
1551
1550
|
# 需要通过 page.ele('tag:html').text 获取页面文本
|
|
@@ -1584,6 +1583,13 @@ class StealthBrowser:
|
|
|
1584
1583
|
if not self._ensure_page():
|
|
1585
1584
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1586
1585
|
|
|
1586
|
+
# [v1.47.16] Firefox+VNC 模式:无法获取页面 HTML
|
|
1587
|
+
if self._firefox_mode:
|
|
1588
|
+
return SkillResult(
|
|
1589
|
+
success=False,
|
|
1590
|
+
error="Firefox+VNC 模式下不支持获取页面 HTML。请在 VNC 中手动查看,或切换到桌面环境使用 Chromium。",
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1587
1593
|
try:
|
|
1588
1594
|
html = self._page.html or ""
|
|
1589
1595
|
output_html = html[:50000] if len(html) > 50000 else html
|
|
@@ -1608,6 +1614,13 @@ class StealthBrowser:
|
|
|
1608
1614
|
if not self._ensure_page():
|
|
1609
1615
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1610
1616
|
|
|
1617
|
+
# [v1.47.16] Firefox+VNC 模式:无法等待元素
|
|
1618
|
+
if self._firefox_mode:
|
|
1619
|
+
return SkillResult(
|
|
1620
|
+
success=False,
|
|
1621
|
+
error="Firefox+VNC 模式下不支持等待元素。请在 VNC 中手动操作。",
|
|
1622
|
+
)
|
|
1623
|
+
|
|
1611
1624
|
try:
|
|
1612
1625
|
ele = self._find_element(selector, timeout=timeout)
|
|
1613
1626
|
if ele:
|
|
@@ -1662,6 +1675,13 @@ class StealthBrowser:
|
|
|
1662
1675
|
if not self._ensure_page():
|
|
1663
1676
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1664
1677
|
|
|
1678
|
+
# [v1.47.16] Firefox+VNC 模式:Cookie 已由 Firefox 自动保存到 profile 目录
|
|
1679
|
+
if self._firefox_mode:
|
|
1680
|
+
return SkillResult(
|
|
1681
|
+
success=True,
|
|
1682
|
+
message="Firefox+VNC 模式: Cookie 由 Firefox 自动保存到 profile 目录",
|
|
1683
|
+
)
|
|
1684
|
+
|
|
1665
1685
|
try:
|
|
1666
1686
|
cookies = self._page.cookies()
|
|
1667
1687
|
cookie_list = []
|
|
@@ -1691,6 +1711,13 @@ class StealthBrowser:
|
|
|
1691
1711
|
if not self._ensure_page():
|
|
1692
1712
|
return SkillResult(success=False, error="浏览器未启动")
|
|
1693
1713
|
|
|
1714
|
+
# [v1.47.16] Firefox+VNC 模式:Cookie 由 Firefox 自动从 profile 目录加载
|
|
1715
|
+
if self._firefox_mode:
|
|
1716
|
+
return SkillResult(
|
|
1717
|
+
success=True,
|
|
1718
|
+
message="Firefox+VNC 模式: Cookie 由 Firefox 自动从 profile 目录加载",
|
|
1719
|
+
)
|
|
1720
|
+
|
|
1694
1721
|
try:
|
|
1695
1722
|
from core.browser_profile import get_browser_profile_manager
|
|
1696
1723
|
mgr = get_browser_profile_manager()
|
|
@@ -1970,6 +1997,74 @@ class StealthBrowser:
|
|
|
1970
1997
|
error=f"Firefox+VNC 输入失败: {e}",
|
|
1971
1998
|
)
|
|
1972
1999
|
|
|
2000
|
+
def _firefox_press_key(self, key: str, selector: str = "", wait: float = 0.5) -> SkillResult:
|
|
2001
|
+
"""[v1.47.16] Firefox+VNC 模式下通过 xdotool key 按键。"""
|
|
2002
|
+
display = os.environ.get("DISPLAY", ":99")
|
|
2003
|
+
env = {**os.environ, "DISPLAY": display}
|
|
2004
|
+
|
|
2005
|
+
try:
|
|
2006
|
+
# 查找 Firefox 窗口
|
|
2007
|
+
result = subprocess.run(
|
|
2008
|
+
["xdotool", "search", "--onlyvisible", "--class", "firefox"],
|
|
2009
|
+
capture_output=True, text=True, timeout=5,
|
|
2010
|
+
env=env, start_new_session=True,
|
|
2011
|
+
)
|
|
2012
|
+
if result.returncode != 0 or not result.stdout.strip():
|
|
2013
|
+
return SkillResult(
|
|
2014
|
+
success=False,
|
|
2015
|
+
error="Firefox+VNC: 未找到 Firefox 窗口。请在 VNC 中确认 Firefox 已打开。",
|
|
2016
|
+
)
|
|
2017
|
+
|
|
2018
|
+
window_id = result.stdout.strip().split()[0]
|
|
2019
|
+
subprocess.run(
|
|
2020
|
+
["xdotool", "windowactivate", "--sync", window_id],
|
|
2021
|
+
capture_output=True, timeout=5, env=env, start_new_session=True,
|
|
2022
|
+
)
|
|
2023
|
+
|
|
2024
|
+
# 将 JS 按键名称映射为 xdotool 按键名称
|
|
2025
|
+
xdotool_key_map = {
|
|
2026
|
+
'enter': 'Return', 'tab': 'Tab', 'escape': 'Escape', 'esc': 'Escape',
|
|
2027
|
+
'backspace': 'BackSpace', 'delete': 'Delete', 'del': 'Delete',
|
|
2028
|
+
'arrowup': 'Up', 'arrowdown': 'Down', 'arrowleft': 'Left', 'arrowright': 'Right',
|
|
2029
|
+
'up': 'Up', 'down': 'Down', 'left': 'Left', 'right': 'Right',
|
|
2030
|
+
'home': 'Home', 'end': 'End',
|
|
2031
|
+
'pageup': 'Page_Up', 'pagedown': 'Page_Down',
|
|
2032
|
+
'space': 'space', ' ': 'space',
|
|
2033
|
+
'ctrl': 'ctrl', 'shift': 'shift', 'alt': 'alt', 'meta': 'super',
|
|
2034
|
+
'cmd': 'super', 'command': 'super',
|
|
2035
|
+
}
|
|
2036
|
+
# F1-F12
|
|
2037
|
+
for i in range(1, 13):
|
|
2038
|
+
xdotool_key_map[f'f{i}'] = f'F{i}'
|
|
2039
|
+
|
|
2040
|
+
# 解析组合键
|
|
2041
|
+
parts = key.strip().split('+')
|
|
2042
|
+
xdotool_keys = []
|
|
2043
|
+
for part in parts:
|
|
2044
|
+
p = part.strip()
|
|
2045
|
+
mapped = xdotool_key_map.get(p.lower(), p)
|
|
2046
|
+
xdotool_keys.append(mapped)
|
|
2047
|
+
|
|
2048
|
+
# 构建 xdotool key 参数
|
|
2049
|
+
xdotool_key_str = '+'.join(xdotool_keys)
|
|
2050
|
+
subprocess.run(
|
|
2051
|
+
["xdotool", "key", "--window", window_id, xdotool_key_str],
|
|
2052
|
+
capture_output=True, timeout=5, env=env, start_new_session=True,
|
|
2053
|
+
)
|
|
2054
|
+
|
|
2055
|
+
if wait > 0:
|
|
2056
|
+
time.sleep(wait)
|
|
2057
|
+
|
|
2058
|
+
return SkillResult(
|
|
2059
|
+
success=True,
|
|
2060
|
+
message=f"Firefox+VNC: 已按键: {key}",
|
|
2061
|
+
)
|
|
2062
|
+
except Exception as e:
|
|
2063
|
+
return SkillResult(
|
|
2064
|
+
success=False,
|
|
2065
|
+
error=f"Firefox+VNC 按键失败: {e}",
|
|
2066
|
+
)
|
|
2067
|
+
|
|
1973
2068
|
def _firefox_screenshot(self, save_path: str = "") -> SkillResult:
|
|
1974
2069
|
"""Firefox+VNC 模式下通过 xdotool + import 截图。"""
|
|
1975
2070
|
display = os.environ.get("DISPLAY", ":99")
|
package/package.json
CHANGED
package/web/api_server.py
CHANGED
|
@@ -7901,10 +7901,11 @@ window.addEventListener('beforeunload', function() {{
|
|
|
7901
7901
|
# 4. 检测到裸 JSON(整个回复以 { 开头):进入 action 模式,提取 thought
|
|
7902
7902
|
# 5. 代码块结束后回到文本模式,继续流式推送
|
|
7903
7903
|
_stream_state = {
|
|
7904
|
-
"mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json"
|
|
7904
|
+
"mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json" | "output_xml"
|
|
7905
7905
|
"processed_pos": 0, # 已处理到的位置(用于去重 streaming)
|
|
7906
7906
|
"thought_sent": 0, # 已推送的 thought 长度
|
|
7907
7907
|
"action_block_depth": 0, # ``` 嵌套深度
|
|
7908
|
+
"reply_sent": 0, # [v1.47.16] output_xml 模式下已推送的 reply 长度
|
|
7908
7909
|
}
|
|
7909
7910
|
|
|
7910
7911
|
# 需要回退(hold back)的最大字符数,用于检测 ```action 或 ```tasklist 标记
|
|
@@ -7913,12 +7914,30 @@ window.addEventListener('beforeunload', function() {{
|
|
|
7913
7914
|
_MAX_HOLD = 12
|
|
7914
7915
|
|
|
7915
7916
|
async def _text_delta_callback(full_text_so_far: str, delta_text: str):
|
|
7916
|
-
"""智能流式过滤器:文本正常推送,JSON action
|
|
7917
|
+
"""智能流式过滤器:文本正常推送,JSON action 块拦截,<output> XML 拦截"""
|
|
7917
7918
|
st = _stream_state
|
|
7918
7919
|
remaining = full_text_so_far[st["processed_pos"]:]
|
|
7919
7920
|
|
|
7920
7921
|
while remaining:
|
|
7921
7922
|
if st["mode"] == "text":
|
|
7923
|
+
# ── [v1.47.16] 检测 <output> XML 标签 → 进入 output_xml 模式 ──
|
|
7924
|
+
output_marker = remaining.find("<output")
|
|
7925
|
+
if output_marker >= 0:
|
|
7926
|
+
# 推送 <output> 之前的文本
|
|
7927
|
+
text_before = remaining[:output_marker]
|
|
7928
|
+
if text_before.strip():
|
|
7929
|
+
await _write_sse({"type": "text_delta", "content": text_before})
|
|
7930
|
+
_all_streamed_text_parts.append(text_before)
|
|
7931
|
+
# 跳过 <output...> 开始标签
|
|
7932
|
+
tag_end = remaining.find(">", output_marker)
|
|
7933
|
+
if tag_end >= 0:
|
|
7934
|
+
st["processed_pos"] += tag_end + 1
|
|
7935
|
+
else:
|
|
7936
|
+
st["processed_pos"] += len(remaining)
|
|
7937
|
+
st["mode"] = "output_xml"
|
|
7938
|
+
remaining = full_text_so_far[st["processed_pos"]:]
|
|
7939
|
+
continue
|
|
7940
|
+
|
|
7922
7941
|
# ── 文本模式:寻找 ```action 或 ```tasklist 标记 ──
|
|
7923
7942
|
action_marker = remaining.find("```action")
|
|
7924
7943
|
tasklist_marker = remaining.find("```tasklist")
|
|
@@ -8037,6 +8056,60 @@ window.addEventListener('beforeunload', function() {{
|
|
|
8037
8056
|
remaining = ""
|
|
8038
8057
|
break
|
|
8039
8058
|
|
|
8059
|
+
elif st["mode"] == "output_xml":
|
|
8060
|
+
# ── [v1.47.16] <output> XML 模式:提取 <reply> 内容流式推送,其余全部拦截 ──
|
|
8061
|
+
# 策略:在 output_xml 模式下,只在检测到 <reply> 内容时推送,其他标签内容全部跳过
|
|
8062
|
+
import re as _re_xml_stream
|
|
8063
|
+
|
|
8064
|
+
# 检查 </output> 闭合标签 → 退出 output_xml 模式
|
|
8065
|
+
close_output = remaining.find("</output>")
|
|
8066
|
+
if close_output >= 0:
|
|
8067
|
+
# 在闭合标签前,检查是否有未推送的 <reply> 内容
|
|
8068
|
+
before_close = full_text_so_far[st["processed_pos"]:st["processed_pos"] + close_output]
|
|
8069
|
+
# 尝试提取 <reply> 内容
|
|
8070
|
+
reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', before_close)
|
|
8071
|
+
if reply_m and reply_m.group(1).strip():
|
|
8072
|
+
reply_content = reply_m.group(1).strip()
|
|
8073
|
+
new_part = reply_content[st["reply_sent"]:]
|
|
8074
|
+
if new_part:
|
|
8075
|
+
await _write_sse({"type": "text_delta", "content": new_part})
|
|
8076
|
+
_all_streamed_text_parts.append(new_part)
|
|
8077
|
+
st["reply_sent"] = len(reply_content)
|
|
8078
|
+
# 跳过到 </output> 之后
|
|
8079
|
+
st["processed_pos"] += close_output + len("</output>")
|
|
8080
|
+
st["mode"] = "text"
|
|
8081
|
+
remaining = full_text_so_far[st["processed_pos"]:]
|
|
8082
|
+
continue
|
|
8083
|
+
|
|
8084
|
+
# 尚未闭合:尝试提取已闭合的 <reply>...</reply> 内容并流式推送
|
|
8085
|
+
all_so_far = full_text_so_far[st["processed_pos"]:]
|
|
8086
|
+
reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', all_so_far)
|
|
8087
|
+
if reply_m and reply_m.group(1).strip():
|
|
8088
|
+
reply_content = reply_m.group(1).strip()
|
|
8089
|
+
new_part = reply_content[st["reply_sent"]:]
|
|
8090
|
+
if new_part:
|
|
8091
|
+
await _write_sse({"type": "text_delta", "content": new_part})
|
|
8092
|
+
_all_streamed_text_parts.append(new_part)
|
|
8093
|
+
st["reply_sent"] = len(reply_content)
|
|
8094
|
+
|
|
8095
|
+
# 尝试提取未闭合的 <reply> 内容(流式输出中标签可能尚未关闭)
|
|
8096
|
+
elif not reply_m:
|
|
8097
|
+
reply_open_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]+)$', all_so_far)
|
|
8098
|
+
if reply_open_m and reply_open_m.group(1).strip():
|
|
8099
|
+
partial_reply = reply_open_m.group(1)
|
|
8100
|
+
# 去除尾部可能的不完整标签
|
|
8101
|
+
partial_reply = _re_xml_stream.sub(r'<[^>]*$', '', partial_reply).strip()
|
|
8102
|
+
if partial_reply and len(partial_reply) > st["reply_sent"]:
|
|
8103
|
+
new_part = partial_reply[st["reply_sent"]:]
|
|
8104
|
+
if new_part:
|
|
8105
|
+
await _write_sse({"type": "text_delta", "content": new_part})
|
|
8106
|
+
_all_streamed_text_parts.append(new_part)
|
|
8107
|
+
st["reply_sent"] = len(partial_reply)
|
|
8108
|
+
|
|
8109
|
+
# 等待更多 token
|
|
8110
|
+
remaining = ""
|
|
8111
|
+
break
|
|
8112
|
+
|
|
8040
8113
|
# Stream 结束后的 flush:推送所有 hold 住的文本
|
|
8041
8114
|
async def _flush_remaining_text(full_text: str):
|
|
8042
8115
|
"""流结束后,推送所有剩余的文本(处理 hold back 的部分)"""
|
|
@@ -8050,6 +8123,25 @@ window.addEventListener('beforeunload', function() {{
|
|
|
8050
8123
|
await _write_sse({"type": "text_delta", "content": remaining})
|
|
8051
8124
|
_all_streamed_text_parts.append(remaining)
|
|
8052
8125
|
st["processed_pos"] = len(full_text)
|
|
8126
|
+
elif st["mode"] == "output_xml":
|
|
8127
|
+
# [v1.47.16] output_xml 模式下 flush:尝试提取 <reply> 内容
|
|
8128
|
+
import re as _re_xml_flush
|
|
8129
|
+
reply_m = _re_xml_flush.search(r'<reply[^>]*>([\s\S]*?)(?:</reply>|$)', remaining)
|
|
8130
|
+
if reply_m and reply_m.group(1).strip():
|
|
8131
|
+
reply_content = reply_m.group(1).strip()
|
|
8132
|
+
new_part = reply_content[st["reply_sent"]:]
|
|
8133
|
+
if new_part:
|
|
8134
|
+
await _write_sse({"type": "text_delta", "content": new_part})
|
|
8135
|
+
_all_streamed_text_parts.append(new_part)
|
|
8136
|
+
st["reply_sent"] = len(reply_content)
|
|
8137
|
+
# 检查 </output> 之后是否还有文本
|
|
8138
|
+
close_pos = remaining.find("</output>")
|
|
8139
|
+
if close_pos >= 0:
|
|
8140
|
+
after_output = remaining[close_pos + len("</output>"):].strip()
|
|
8141
|
+
if after_output and st["mode"] == "output_xml":
|
|
8142
|
+
# 不推送(output_xml 模式结束后可能有残余标签文本)
|
|
8143
|
+
pass
|
|
8144
|
+
st["processed_pos"] = len(full_text)
|
|
8053
8145
|
|
|
8054
8146
|
# Call LLM with streaming — tokens are filtered through _text_delta_callback
|
|
8055
8147
|
# Call LLM with streaming + frequency_penalty to reduce repetition
|
package/web/ui/chat/chat_main.js
CHANGED
|
@@ -2999,12 +2999,12 @@ async function selectSession(id) {
|
|
|
2999
2999
|
return m && (m.role === 'user' || m.role === 'assistant' || m.role === 'tool');
|
|
3000
3000
|
}).map(function(m) {
|
|
3001
3001
|
var content = (m.content != null) ? String(m.content) : '';
|
|
3002
|
-
//
|
|
3003
|
-
// 只有无 key 的旧格式 assistant 消息且内容以 < 开头时才需要剥离 XML
|
|
3004
|
-
// 有 key 的消息(reasoning/reply/tool_call)已经是解析后的纯内容,无需处理
|
|
3002
|
+
// [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
|
|
3005
3003
|
var mkey = (m.key || '').toLowerCase();
|
|
3006
|
-
if (m.role === 'assistant' &&
|
|
3007
|
-
|
|
3004
|
+
if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
|
|
3005
|
+
if (!mkey || mkey === 'reply') {
|
|
3006
|
+
content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
|
|
3007
|
+
}
|
|
3008
3008
|
}
|
|
3009
3009
|
var mapped = {
|
|
3010
3010
|
role: m.role || 'assistant',
|
|
@@ -3110,8 +3110,10 @@ async function loadMoreMessages() {
|
|
|
3110
3110
|
}).map(function(m) {
|
|
3111
3111
|
var content = (m.content != null) ? String(m.content) : '';
|
|
3112
3112
|
var mkey = (m.key || '').toLowerCase();
|
|
3113
|
-
if (m.role === 'assistant' &&
|
|
3114
|
-
|
|
3113
|
+
if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
|
|
3114
|
+
if (!mkey || mkey === 'reply' || (mkey !== 'tool_call' && mkey !== 'reasoning')) {
|
|
3115
|
+
content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
|
|
3116
|
+
}
|
|
3115
3117
|
}
|
|
3116
3118
|
var mapped = {
|
|
3117
3119
|
role: m.role || 'assistant',
|
|
@@ -398,10 +398,12 @@ async function pollChatHistory() {
|
|
|
398
398
|
}).map(function(m) {
|
|
399
399
|
var content = (m.content != null) ? String(m.content) : '';
|
|
400
400
|
var mkey = (m.key || '').toLowerCase();
|
|
401
|
-
//
|
|
402
|
-
// 有 key 的消息(reasoning/reply/tool_call
|
|
403
|
-
if (m.role === 'assistant' &&
|
|
404
|
-
|
|
401
|
+
// [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
|
|
402
|
+
// 有 key 的消息(reasoning/reply/tool_call)一般已是纯内容,但部分模型仍会输出 XML
|
|
403
|
+
if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
|
|
404
|
+
if (!mkey || mkey === 'reply') {
|
|
405
|
+
content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
|
|
406
|
+
}
|
|
405
407
|
}
|
|
406
408
|
var mapped = {
|
|
407
409
|
role: m.role || 'assistant',
|
|
@@ -473,9 +475,11 @@ async function forceRefreshHistory() {
|
|
|
473
475
|
}).map(function(m) {
|
|
474
476
|
var content = (m.content != null) ? String(m.content) : '';
|
|
475
477
|
var mkey = (m.key || '').toLowerCase();
|
|
476
|
-
//
|
|
477
|
-
if (m.role === 'assistant' &&
|
|
478
|
-
|
|
478
|
+
// [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
|
|
479
|
+
if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
|
|
480
|
+
if (!mkey || mkey === 'reply') {
|
|
481
|
+
content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
|
|
482
|
+
}
|
|
479
483
|
}
|
|
480
484
|
var mapped = {
|
|
481
485
|
role: m.role || 'assistant',
|