myagent-ai 1.47.20 → 1.47.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,6 @@ from core.llm import LLMClient, LLMResponse, Message
16
16
  from agents.base import BaseAgent, AgentContext
17
17
  from core.utils import generate_id, timestamp, truncate_str
18
18
  from core.context_builder import ContextBuilder
19
- from core.output_parser import ParsedOutput, parse_output, validate_output, extract_surrounding_text
20
19
  from core.tool_dispatcher import ToolDispatcher
21
20
 
22
21
  logger = get_logger("myagent.agent.main")
@@ -500,86 +499,6 @@ class MainAgent(BaseAgent):
500
499
  except Exception as e:
501
500
  logger.debug(f"V2 SSE 事件发送失败 ({event_type}): {e}")
502
501
 
503
- def _try_extract_partial_response(self, llm_raw: str) -> str:
504
- """[v1.15.73] 从不完整的 LLM 输出中提取部分回复内容。
505
-
506
- 当 <output> 块被截断(缺少 </output>)时,尝试:
507
- 1. 提取 <reply>...</reply> 中已闭合的内容
508
- 2. 提取未闭合的 <reply> 后的内容(宽松模式)
509
- 3. 提取 <knowledge>...</knowledge> 中已闭合的内容(兜底)
510
- 4. 提取 <output> 后到截断点之间的纯文本
511
- 5. 去除 XML 标签后的残余文本(跳过工具执行状态文本)
512
- """
513
- if not llm_raw:
514
- return ""
515
-
516
- import re
517
- _parts = []
518
-
519
- # 策略1: 尝试提取已闭合的 <reply> 内容
520
- reply_match = re.search(
521
- r"<reply[^>]*>(.*?)</reply>",
522
- llm_raw,
523
- re.DOTALL | re.IGNORECASE,
524
- )
525
- if reply_match:
526
- text = reply_match.group(1).strip()
527
- if text:
528
- _parts.append(text)
529
-
530
- # 策略2: 尝试提取未闭合的 <reply> 内容(LLM 截断时 <reply> 常未闭合)
531
- if not _parts:
532
- reply_open_match = re.search(
533
- r"<reply[^>]*>(.*?)$",
534
- llm_raw,
535
- re.DOTALL | re.IGNORECASE,
536
- )
537
- if reply_open_match:
538
- text = reply_open_match.group(1).strip()
539
- # 去除尾部可能的不完整标签
540
- text = re.sub(r"<[^>]*$", "", text).strip()
541
- if text and len(text) > 5:
542
- _parts.append(text)
543
-
544
- # 策略3: 尝试提取已闭合的 <knowledge> 内容(兜底)
545
- if not _parts:
546
- knowledge_match = re.search(
547
- r"<knowledge[^>]*>(.*?)</knowledge>",
548
- llm_raw,
549
- re.DOTALL | re.IGNORECASE,
550
- )
551
- if knowledge_match:
552
- text = knowledge_match.group(1).strip()
553
- if text and len(text) > 20:
554
- _parts.append(text)
555
-
556
- if _parts:
557
- return "\n".join(_parts)
558
-
559
- # 策略4: 提取 <output> 标签后的内容(可能包含未闭合的标签)
560
- output_match = re.search(r"<output[^>]*>", llm_raw, re.IGNORECASE)
561
- if output_match:
562
- after_output = llm_raw[output_match.end():].strip()
563
- if after_output:
564
- cleaned = re.sub(r"<[^>]+>", "", after_output).strip()
565
- cleaned = re.sub(r"^(reasoning|assistant)\s*", "", cleaned, flags=re.IGNORECASE).strip()
566
- # 跳过工具执行状态文本(如"执行工具 task_plan:...")
567
- if cleaned and len(cleaned) > 5 and not re.match(
568
- r"^(执行工具|调用工具|Running tool|Calling tool)", cleaned, re.IGNORECASE
569
- ):
570
- return cleaned
571
-
572
- # 策略5: 提取去除 XML 标签后的整体文本
573
- cleaned = re.sub(r"<[^>]+>", "", llm_raw).strip()
574
- cleaned = re.sub(r"^(reasoning|assistant)\s*", "", cleaned, flags=re.IGNORECASE).strip()
575
- # 跳过工具执行状态文本
576
- if cleaned and len(cleaned) > 10 and not re.match(
577
- r"^(执行工具|调用工具|Running tool|Calling tool)", cleaned, re.IGNORECASE
578
- ):
579
- return cleaned
580
-
581
- return ""
582
-
583
502
  async def _merge_duplicate_memory(
584
503
  self,
585
504
  old_memory,
@@ -783,13 +702,13 @@ class MainAgent(BaseAgent):
783
702
  agent_path: Optional[str] = None,
784
703
  ) -> AgentContext:
785
704
  """
786
- V2 主处理循环 — 使用结构化输出格式。
705
+ V2 主处理循环 — 使用原生 tool_calling。
787
706
 
788
707
  核心流程:
789
708
  1. 使用 ContextBuilder 构建 <context> XML
790
709
  2. 将 context 注入 SYSTEM_PROMPT,调用 LLM
791
- 3. 使用 OutputParser 解析 <output> XML
792
- 4. 根据 parsed.tools_to_call 依次执行工具
710
+ 3. LLM 通过原生 tool_calling 返回工具调用
711
+ 4. 根据 tool_calls 依次执行工具
793
712
  5. 任一工具超时 → 强制回调 LLM
794
713
  6. 根据 callback 标志决定是否回调 LLM
795
714
  7. 处理 remember/recall
@@ -1240,200 +1159,8 @@ class MainAgent(BaseAgent):
1240
1159
  continue
1241
1160
 
1242
1161
  else:
1243
- # 没有原生工具调用 → 检查是否为旧格式 <output> XML(某些模型不支持 tool_calling
1244
- raw_content = (response.content or "").strip()
1245
-
1246
- # [v1.47.16] 兼容旧格式:当 LLM 输出 <output> XML 时,用 output_parser 解析
1247
- if raw_content.startswith("<output") or ("<output>" in raw_content and "<toolstocal>" in raw_content):
1248
- logger.info(f"[{task_id}] 检测到旧格式 <output> XML 输出,启用 output_parser 解析")
1249
- parsed = parse_output(raw_content)
1250
-
1251
- if parsed.parse_success:
1252
- # 1) 处理 mainsubject → 更新会话标题
1253
- if parsed.mainsubject and self.dispatcher:
1254
- try:
1255
- await self.dispatcher.dispatch(
1256
- tool_name="update_conversation_title",
1257
- params={"title": parsed.mainsubject, "session_id": context.session_id},
1258
- timeout=10,
1259
- )
1260
- except Exception:
1261
- pass
1262
-
1263
- # 2) 处理 remember → 保存记忆
1264
- if parsed.remember and self.dispatcher:
1265
- try:
1266
- await self.dispatcher.dispatch(
1267
- tool_name="save_memory",
1268
- params={
1269
- "content": parsed.remember,
1270
- "type": parsed.remember_type or "session",
1271
- "session_id": context.session_id,
1272
- },
1273
- timeout=10,
1274
- )
1275
- except Exception:
1276
- pass
1277
-
1278
- # 3) 处理 task_plan
1279
- if parsed.task_plan and self.dispatcher:
1280
- try:
1281
- await self.dispatcher.dispatch(
1282
- tool_name="task_plan",
1283
- params={"action": "create", "plan": parsed.task_plan},
1284
- timeout=10,
1285
- )
1286
- current_task_plan = parsed.task_plan
1287
- await self._emit_v2_event(
1288
- "v2_task_plan",
1289
- {"plan": truncate_str(current_task_plan, 2000)},
1290
- stream_callback,
1291
- )
1292
- except Exception:
1293
- pass
1294
-
1295
- # 4) 处理 tools_to_call → 执行工具
1296
- if parsed.tools_to_call:
1297
- logger.info(f"[{task_id}] 从 <output> XML 提取到 {len(parsed.tools_to_call)} 个工具调用")
1298
-
1299
- # 添加 assistant 消息到消息列表
1300
- messages.append(Message(
1301
- role="assistant",
1302
- content=raw_content,
1303
- ))
1304
-
1305
- # 保存 LLM 原始输出
1306
- if self.memory:
1307
- self.memory.add_session(agent_id=_effective_agent_id,
1308
- session_id=context.session_id,
1309
- role="assistant",
1310
- content=raw_content,
1311
- key="llm_output",
1312
- importance=0.3,
1313
- )
1314
-
1315
- for tool_desc in parsed.tools_to_call:
1316
- _tc_name = tool_desc.get("toolname", "")
1317
- _tc_parms = tool_desc.get("parms", "{}")
1318
- _tc_timeout = int(tool_desc.get("timeout", 120))
1319
-
1320
- if not _tc_name:
1321
- continue
1322
-
1323
- # 注入 session_id
1324
- if _tc_name in ("save_memory", "recall_memory", "update_conversation_title"):
1325
- if isinstance(_tc_parms, str):
1326
- try:
1327
- _tc_parms_dict = json.loads(_tc_parms)
1328
- except (json.JSONDecodeError, TypeError):
1329
- _tc_parms_dict = {"raw_input": _tc_parms}
1330
- else:
1331
- _tc_parms_dict = _tc_parms
1332
- _tc_parms_dict.setdefault("session_id", context.session_id)
1333
- _tc_parms = json.dumps(_tc_parms_dict, ensure_ascii=False)
1334
-
1335
- # 发送工具开始事件
1336
- await self._emit_v2_event(
1337
- "v2_tool_start",
1338
- {"tool": {"toolname": _tc_name, "parms": truncate_str(str(_tc_parms), 500)}},
1339
- stream_callback,
1340
- )
1341
-
1342
- self._add_exec_event("tool_call", {
1343
- "title": f"调用工具: {_tc_name}",
1344
- "tool_name": _tc_name,
1345
- "arguments": str(_tc_parms),
1346
- })
1347
-
1348
- # 执行工具
1349
- tool_result = await self._execute_v2_tool(
1350
- _tc_name, str(_tc_parms), _tc_timeout,
1351
- context, task_id,
1352
- stream_callback=stream_callback,
1353
- sent_files=_sent_files,
1354
- agent_path=agent_path,
1355
- )
1356
-
1357
- # 提取输出
1358
- if tool_result is None:
1359
- tool_result = {"success": False, "error": "工具返回了空结果"}
1360
- _output_text = (
1361
- tool_result.get("output", "")
1362
- or tool_result.get("message", "")
1363
- or tool_result.get("stdout", "")
1364
- or tool_result.get("error", "")
1365
- )
1366
- if not _output_text and tool_result.get("data"):
1367
- try:
1368
- _output_text = json.dumps(tool_result["data"], ensure_ascii=False, default=str)[:30000]
1369
- except Exception:
1370
- _output_text = str(tool_result["data"])[:30000]
1371
-
1372
- # 发送工具结果事件
1373
- await self._emit_v2_event(
1374
- "v2_tool_result",
1375
- {"tool": {"toolname": _tc_name}, "result": {
1376
- "success": tool_result.get("success", False),
1377
- "output": truncate_str(_output_text, 30000),
1378
- "error": truncate_str(tool_result.get("error", ""), 30000),
1379
- }},
1380
- stream_callback,
1381
- )
1382
-
1383
- self._add_exec_event("tool_result", {
1384
- "title": f"工具结果: {_tc_name}",
1385
- "tool_name": _tc_name,
1386
- "success": tool_result.get("success", False),
1387
- "summary": truncate_str(_output_text, 30000),
1388
- })
1389
-
1390
- # 添加 tool result 消息
1391
- messages.append(Message(
1392
- role="user",
1393
- content=f"[工具结果: {_tc_name}] {truncate_str(_output_text, 5000)}",
1394
- ))
1395
-
1396
- # 工具执行完毕 → 继续循环让 LLM 处理结果
1397
- continue
1398
-
1399
- # 5) 没有工具但有 reply → 提取纯文本回复
1400
- if parsed.reply:
1401
- reply_text = parsed.reply.strip()
1402
- else:
1403
- # 兜底:去除所有 XML 标签
1404
- import re as _re_xml
1405
- reply_text = _re_xml.sub(r'<[^>]+>', '', raw_content).strip()
1406
-
1407
- if not reply_text:
1408
- reply_text = "处理完毕。"
1409
-
1410
- context.working_memory["final_response"] = reply_text
1411
- await self._emit_v2_event("v2_reasoning", {"content": truncate_str(reply_text, 3000)}, stream_callback)
1412
-
1413
- # 保存回复到会话记忆
1414
- if self.memory:
1415
- self.memory.add_session(agent_id=_effective_agent_id,
1416
- session_id=context.session_id,
1417
- role="assistant",
1418
- content=reply_text,
1419
- key="reply",
1420
- importance=0.5,
1421
- )
1422
-
1423
- # 保存 LLM 原始输出
1424
- if self.memory:
1425
- self.memory.add_session(agent_id=_effective_agent_id,
1426
- session_id=context.session_id,
1427
- role="assistant",
1428
- content=raw_content,
1429
- key="llm_output",
1430
- importance=0.3,
1431
- )
1432
-
1433
- break
1434
-
1435
- # 纯文本回复(非 XML 格式)
1436
- reply_text = raw_content
1162
+ # 没有原生工具调用 → 纯文本回复,完全依赖 tool_calling
1163
+ reply_text = (response.content or "").strip()
1437
1164
  logger.info(f"[{task_id}] 无工具调用,任务完成 (reply长度={len(reply_text)})")
1438
1165
 
1439
1166
  if not reply_text:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.47.20",
3
+ "version": "1.47.22",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -7901,11 +7901,10 @@ window.addEventListener('beforeunload', function() {{
7901
7901
  # 4. 检测到裸 JSON(整个回复以 { 开头):进入 action 模式,提取 thought
7902
7902
  # 5. 代码块结束后回到文本模式,继续流式推送
7903
7903
  _stream_state = {
7904
- "mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json" | "output_xml"
7904
+ "mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json"
7905
7905
  "processed_pos": 0, # 已处理到的位置(用于去重 streaming)
7906
7906
  "thought_sent": 0, # 已推送的 thought 长度
7907
7907
  "action_block_depth": 0, # ``` 嵌套深度
7908
- "reply_sent": 0, # [v1.47.16] output_xml 模式下已推送的 reply 长度
7909
7908
  }
7910
7909
 
7911
7910
  # 需要回退(hold back)的最大字符数,用于检测 ```action 或 ```tasklist 标记
@@ -7914,30 +7913,12 @@ window.addEventListener('beforeunload', function() {{
7914
7913
  _MAX_HOLD = 12
7915
7914
 
7916
7915
  async def _text_delta_callback(full_text_so_far: str, delta_text: str):
7917
- """智能流式过滤器:文本正常推送,JSON action 块拦截,<output> XML 拦截"""
7916
+ """[v1.47.21] 智能流式过滤器:文本正常推送,JSON action 块拦截"""
7918
7917
  st = _stream_state
7919
7918
  remaining = full_text_so_far[st["processed_pos"]:]
7920
7919
 
7921
7920
  while remaining:
7922
7921
  if st["mode"] == "text":
7923
- # ── [v1.47.16] 检测 <output> XML 标签 → 进入 output_xml 模式 ──
7924
- output_marker = remaining.find("<output")
7925
- if output_marker >= 0:
7926
- # 推送 <output> 之前的文本
7927
- text_before = remaining[:output_marker]
7928
- if text_before.strip():
7929
- await _write_sse({"type": "text_delta", "content": text_before})
7930
- _all_streamed_text_parts.append(text_before)
7931
- # 跳过 <output...> 开始标签
7932
- tag_end = remaining.find(">", output_marker)
7933
- if tag_end >= 0:
7934
- st["processed_pos"] += tag_end + 1
7935
- else:
7936
- st["processed_pos"] += len(remaining)
7937
- st["mode"] = "output_xml"
7938
- remaining = full_text_so_far[st["processed_pos"]:]
7939
- continue
7940
-
7941
7922
  # ── 文本模式:寻找 ```action 或 ```tasklist 标记 ──
7942
7923
  action_marker = remaining.find("```action")
7943
7924
  tasklist_marker = remaining.find("```tasklist")
@@ -8056,63 +8037,9 @@ window.addEventListener('beforeunload', function() {{
8056
8037
  remaining = ""
8057
8038
  break
8058
8039
 
8059
- elif st["mode"] == "output_xml":
8060
- # ── [v1.47.16] <output> XML 模式:提取 <reply> 内容流式推送,其余全部拦截 ──
8061
- # 策略:在 output_xml 模式下,只在检测到 <reply> 内容时推送,其他标签内容全部跳过
8062
- import re as _re_xml_stream
8063
-
8064
- # 检查 </output> 闭合标签 → 退出 output_xml 模式
8065
- close_output = remaining.find("</output>")
8066
- if close_output >= 0:
8067
- # 在闭合标签前,检查是否有未推送的 <reply> 内容
8068
- before_close = full_text_so_far[st["processed_pos"]:st["processed_pos"] + close_output]
8069
- # 尝试提取 <reply> 内容
8070
- reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', before_close)
8071
- if reply_m and reply_m.group(1).strip():
8072
- reply_content = reply_m.group(1).strip()
8073
- new_part = reply_content[st["reply_sent"]:]
8074
- if new_part:
8075
- await _write_sse({"type": "text_delta", "content": new_part})
8076
- _all_streamed_text_parts.append(new_part)
8077
- st["reply_sent"] = len(reply_content)
8078
- # 跳过到 </output> 之后
8079
- st["processed_pos"] += close_output + len("</output>")
8080
- st["mode"] = "text"
8081
- remaining = full_text_so_far[st["processed_pos"]:]
8082
- continue
8083
-
8084
- # 尚未闭合:尝试提取已闭合的 <reply>...</reply> 内容并流式推送
8085
- all_so_far = full_text_so_far[st["processed_pos"]:]
8086
- reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', all_so_far)
8087
- if reply_m and reply_m.group(1).strip():
8088
- reply_content = reply_m.group(1).strip()
8089
- new_part = reply_content[st["reply_sent"]:]
8090
- if new_part:
8091
- await _write_sse({"type": "text_delta", "content": new_part})
8092
- _all_streamed_text_parts.append(new_part)
8093
- st["reply_sent"] = len(reply_content)
8094
-
8095
- # 尝试提取未闭合的 <reply> 内容(流式输出中标签可能尚未关闭)
8096
- elif not reply_m:
8097
- reply_open_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]+)$', all_so_far)
8098
- if reply_open_m and reply_open_m.group(1).strip():
8099
- partial_reply = reply_open_m.group(1)
8100
- # 去除尾部可能的不完整标签
8101
- partial_reply = _re_xml_stream.sub(r'<[^>]*$', '', partial_reply).strip()
8102
- if partial_reply and len(partial_reply) > st["reply_sent"]:
8103
- new_part = partial_reply[st["reply_sent"]:]
8104
- if new_part:
8105
- await _write_sse({"type": "text_delta", "content": new_part})
8106
- _all_streamed_text_parts.append(new_part)
8107
- st["reply_sent"] = len(partial_reply)
8108
-
8109
- # 等待更多 token
8110
- remaining = ""
8111
- break
8112
-
8113
8040
  # Stream 结束后的 flush:推送所有 hold 住的文本
8114
8041
  async def _flush_remaining_text(full_text: str):
8115
- """流结束后,推送所有剩余的文本(处理 hold back 的部分)"""
8042
+ """[v1.47.21] 流结束后,推送所有剩余的文本(处理 hold back 的部分)"""
8116
8043
  st = _stream_state
8117
8044
  remaining = full_text[st["processed_pos"]:]
8118
8045
  if remaining.strip() and st["mode"] == "text":
@@ -8123,25 +8050,6 @@ window.addEventListener('beforeunload', function() {{
8123
8050
  await _write_sse({"type": "text_delta", "content": remaining})
8124
8051
  _all_streamed_text_parts.append(remaining)
8125
8052
  st["processed_pos"] = len(full_text)
8126
- elif st["mode"] == "output_xml":
8127
- # [v1.47.16] output_xml 模式下 flush:尝试提取 <reply> 内容
8128
- import re as _re_xml_flush
8129
- reply_m = _re_xml_flush.search(r'<reply[^>]*>([\s\S]*?)(?:</reply>|$)', remaining)
8130
- if reply_m and reply_m.group(1).strip():
8131
- reply_content = reply_m.group(1).strip()
8132
- new_part = reply_content[st["reply_sent"]:]
8133
- if new_part:
8134
- await _write_sse({"type": "text_delta", "content": new_part})
8135
- _all_streamed_text_parts.append(new_part)
8136
- st["reply_sent"] = len(reply_content)
8137
- # 检查 </output> 之后是否还有文本
8138
- close_pos = remaining.find("</output>")
8139
- if close_pos >= 0:
8140
- after_output = remaining[close_pos + len("</output>"):].strip()
8141
- if after_output and st["mode"] == "output_xml":
8142
- # 不推送(output_xml 模式结束后可能有残余标签文本)
8143
- pass
8144
- st["processed_pos"] = len(full_text)
8145
8053
 
8146
8054
  # Call LLM with streaming — tokens are filtered through _text_delta_callback
8147
8055
  # Call LLM with streaming + frequency_penalty to reduce repetition
@@ -2999,13 +2999,7 @@ async function selectSession(id) {
2999
2999
  return m && (m.role === 'user' || m.role === 'assistant' || m.role === 'tool');
3000
3000
  }).map(function(m) {
3001
3001
  var content = (m.content != null) ? String(m.content) : '';
3002
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
3003
3002
  var mkey = (m.key || '').toLowerCase();
3004
- if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
3005
- if (!mkey || mkey === 'reply') {
3006
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
3007
- }
3008
- }
3009
3003
  var mapped = {
3010
3004
  role: m.role || 'assistant',
3011
3005
  content: content,
@@ -3110,11 +3104,6 @@ async function loadMoreMessages() {
3110
3104
  }).map(function(m) {
3111
3105
  var content = (m.content != null) ? String(m.content) : '';
3112
3106
  var mkey = (m.key || '').toLowerCase();
3113
- if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
3114
- if (!mkey || mkey === 'reply' || (mkey !== 'tool_call' && mkey !== 'reasoning')) {
3115
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
3116
- }
3117
- }
3118
3107
  var mapped = {
3119
3108
  role: m.role || 'assistant',
3120
3109
  content: content,
@@ -398,13 +398,6 @@ async function pollChatHistory() {
398
398
  }).map(function(m) {
399
399
  var content = (m.content != null) ? String(m.content) : '';
400
400
  var mkey = (m.key || '').toLowerCase();
401
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
402
- // 有 key 的消息(reasoning/reply/tool_call)一般已是纯内容,但部分模型仍会输出 XML
403
- if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
404
- if (!mkey || mkey === 'reply') {
405
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
406
- }
407
- }
408
401
  var mapped = {
409
402
  role: m.role || 'assistant',
410
403
  content: content,
@@ -419,7 +412,7 @@ async function pollChatHistory() {
419
412
  if (m._media && m._media.length > 0) mapped._media = m._media;
420
413
  return mapped;
421
414
  });
422
-
415
+
423
416
  // 检测是否有新消息
424
417
  var newCount = loaded.length;
425
418
  if (newCount === _lastKnownMessageCount && !state.isGenerating) return; // 无变化且非生成中,跳过
@@ -475,12 +468,6 @@ async function forceRefreshHistory() {
475
468
  }).map(function(m) {
476
469
  var content = (m.content != null) ? String(m.content) : '';
477
470
  var mkey = (m.key || '').toLowerCase();
478
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
479
- if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
480
- if (!mkey || mkey === 'reply') {
481
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
482
- }
483
- }
484
471
  var mapped = {
485
472
  role: m.role || 'assistant',
486
473
  content: content,
@@ -1119,37 +1106,6 @@ function _showFinishNotification(text) {
1119
1106
  setTimeout(function() { if (overlay.parentNode) overlay.remove(); }, 5000);
1120
1107
  }
1121
1108
 
1122
- /**
1123
- * Strip XML tags from text for real-time streaming preview in V2 mode.
1124
- * Shows plain text between tags so the user sees progress during LLM streaming.
1125
- */
1126
- function _stripXmlTags(xml) {
1127
- if (!xml) return '';
1128
- var text = xml;
1129
- // [v1.15.12] 移除未闭合的 <task_plan>...</task_plan> 区域(流式输出中常见)
1130
- // 当只有开始标签没有闭合标签时,将开始标签到文本末尾的内容完全移除
1131
- text = text.replace(/<task_plan[^>]*>[\s\S]*?<\/task_plan>/g, ''); // 已闭合的完整 task_plan
1132
- text = text.replace(/<task_plan[^>]*>[\s\S]*$/g, ''); // 未闭合的 task_plan(流式中标签已打开但未关闭)
1133
- // [v1.37] 优先提取 <reply> 标签内容,不再兜底 <response>
1134
- var replyMatch = text.match(/<reply[^>]*>([\s\S]*?)<\/reply>/i);
1135
- if (replyMatch && replyMatch[1] && replyMatch[1].trim()) {
1136
- return replyMatch[1].trim();
1137
- }
1138
- // [v1.37] 移除 <response> 包裹(不再作为兜底提取,直接剥离标签)
1139
- text = text.replace(/<response[^>]*>|<\/response>/gi, '');
1140
- // 兜底:去除所有XML标签
1141
- return text
1142
- .replace(/<[^>]+>/g, ' ') // Replace tags with space
1143
- .replace(/&lt;/g, '<')
1144
- .replace(/&gt;/g, '>')
1145
- .replace(/&amp;/g, '&')
1146
- .replace(/&quot;/g, '"')
1147
- .replace(/&#39;/g, "'")
1148
- .replace(/&#x27;/g, "'")
1149
- .replace(/\s{3,}/g, ' ') // Collapse 3+ whitespace to single space
1150
- .trim();
1151
- }
1152
-
1153
1109
  // ══════════════════════════════════════════════════════
1154
1110
  // ── V2 Content Assembler (V2 内容组装) ──
1155
1111
  // ══════════════════════════════════════════════════════
@@ -1174,14 +1130,7 @@ function _assembleV2Content(msg, msgParts) {
1174
1130
  if (msg._askUser && msg._askUser.trim()) {
1175
1131
  return msg._askUser.trim();
1176
1132
  }
1177
- // Priority 4: V2 raw XML stripped of tags (fallback when v2_reasoning not sent)
1178
- if (msg._v2RawXml && msg._v2RawXml.trim()) {
1179
- var strippedText = _stripXmlTags(msg._v2RawXml);
1180
- if (strippedText && strippedText.trim()) {
1181
- return strippedText.trim();
1182
- }
1183
- }
1184
- // Priority 5: raw content from message (server-stored response)
1133
+ // Priority 4: raw content from message (server-stored response)
1185
1134
  if (msg.content && msg.content.trim() && msg.content !== '(无回复)') {
1186
1135
  return msg.content.trim();
1187
1136
  }
package/worklog.md CHANGED
@@ -117,3 +117,30 @@ Stage Summary:
117
117
  - Streaming filter extracts only `<reply>` content for real-time display
118
118
  - Frontend strips XML from both keyless and key="reply" assistant messages
119
119
  - VNC mode Firefox support fully functional
120
+
121
+ ---
122
+ Task ID: 2
123
+ Agent: Main
124
+ Task: Fix Firefox+VNC browser_stealth: content/close/evaluate/wait_for + browser_open/web_control VNC fallback
125
+
126
+ Work Log:
127
+ - Analyzed logs: stealth_browser_navigate now works (Popen non-blocking), but stealth_browser_content returns "不支持" and agent falls back to web_control/browser_open which also fail in VNC mode
128
+ - Added `_firefox_read_sessionstore()` method: reads Firefox's recovery.jsonlz4 (mozLz4 format) to get current tab URL/title
129
+ - Added `_firefox_get_content()` method: screenshot + sessionstore → returns screenshot path, URL, title, tabs list
130
+ - Changed `get_content()` Firefox mode: calls `_firefox_get_content()` instead of returning error
131
+ - Changed `get_html()` Firefox mode: calls `_firefox_get_content()` instead of returning error
132
+ - Changed `close()` Firefox mode: VNC mode only clears internal state, does NOT kill Firefox (managed by vnc_manager)
133
+ - Changed `StealthBrowserCloseSkill.execute()`: VNC mode returns "会话已释放" instead of "浏览器已关闭"
134
+ - Changed `evaluate()` Firefox mode: better error message suggesting stealth_browser alternatives
135
+ - Changed `wait_for()` Firefox mode: sleep + sessionstore read instead of returning error
136
+ - Changed `browser_open` in chromedev_mcp.py: VNC mode without Chromium → returns error suggesting stealth_browser
137
+ - Added VNC mode hint injection in main_agent.py system prompt: tells agent to use stealth_browser_* tools in VNC mode
138
+ - Published v1.47.20 to npm
139
+
140
+ Stage Summary:
141
+ - Firefox+VNC mode: stealth_browser_content now returns screenshot + tab info (URL/title/tabs)
142
+ - Firefox+VNC mode: close() no longer kills VNC browser process
143
+ - Firefox+VNC mode: wait_for() works (sleep + sessionstore), evaluate() has actionable error message
144
+ - browser_open: VNC mode without Chromium → clear error suggesting stealth_browser
145
+ - main_agent: VNC mode system prompt tells agent to prefer stealth_browser over browser_open/web_control
146
+ - All syntax checks passed
@@ -1,730 +0,0 @@
1
- """
2
- Custom Fault-Tolerant XML Output Parser Module.
3
-
4
- Parses the XML ``<output>`` block generated by the LLM in response to the
5
- system prompt. The LLM produces structured XML that drives the agent's
6
- execution loop — including tool calls, memory operations, user interaction
7
- hints, and loop-control flags.
8
-
9
- **This module does NOT use xml.etree.ElementTree.** All parsing is done with
10
- pure Python + regex to achieve maximum fault tolerance.
11
-
12
- Expected XML schema produced by the LLM::
13
-
14
- <output>
15
- <mainsubject>当前对话的6字以内标题</mainsubject>
16
- <usersays_correct>...</usersays_correct>
17
- <reply>展示给用户的文本内容</reply>
18
- <toolstocal>
19
- <tool>
20
-
21
- <toolname>工具名</toolname>
22
- <parms>参数JSON或描述</parms>
23
- <timeout>预估超时时限(秒)</timeout>
24
- </tool>
25
- </toolstocal>
26
- <remember>
27
- <type>global|session</type>
28
- <content>记忆内容</content>
29
- </remember>
30
- <recall>下一轮需要调取的记忆</recall>
31
- <get_knowledge>下一轮需要搜索获得的知识</get_knowledge>
32
- </output>
33
-
34
- Fault-tolerance features:
35
-
36
- * Text before ``<output>`` or after ``</output>`` is silently stripped.
37
- * Unclosed tags are auto-closed at the next sibling tag boundary.
38
- * Self-closing tags (``<tag/>``) resolve to empty strings.
39
- * Case-insensitive tag matching (``<OUTPUT>`` == ``<output>``).
40
- * Tag-name aliases (reserved for future use).
41
- * If extraction yields nothing meaningful, ``needs_correction`` is set to
42
- ``True`` so the caller can ask the LLM to re-format.
43
- """
44
-
45
- from __future__ import annotations
46
-
47
- import html
48
- import re
49
- from dataclasses import dataclass, field
50
- from typing import Any, Dict, List
51
-
52
- from core.logger import get_logger
53
-
54
- logger = get_logger("myagent.output_parser")
55
-
56
- # ---------------------------------------------------------------------------
57
- # Constants
58
- # ---------------------------------------------------------------------------
59
-
60
- _DEFAULT_TIMEOUT: int = 120
61
-
62
- # All top-level tags we recognise inside <output>.
63
- KNOWN_TOP_LEVEL_TAGS = [
64
- "usersays_correct",
65
- "task_plan", # 任务计划(Markdown格式)
66
- "toolstocal",
67
- "remember",
68
- "recall",
69
- "knowledge",
70
- "get_knowledge",
71
-
72
- "reply", # [v1.36] 用户可见文本(顶层标签,不再嵌套在 <response> 内)
73
- # [v1.37] "response" 已移除 — 不再兼容 <response> 包裹,统一使用 <reply>
74
- "mainsubject", # [v1.15.8] 会话标题自动命名
75
- ]
76
-
77
- # Inner tags inside each <tool>.
78
- TOOL_INNER_TAGS = [
79
- "toolname",
80
- "parms",
81
- "timeout",
82
- ]
83
-
84
- # Inner tags inside <remember>.
85
- REMEMBER_INNER_TAGS = ["type", "content"]
86
-
87
- # Tag aliases: canonical name -> list of aliases.
88
- _TAG_ALIASES: Dict[str, List[str]] = {
89
- # [v1.36] askuser/ask_user aliases removed — tag no longer used
90
- }
91
-
92
- # Build reverse lookup: alias -> canonical.
93
- _ALIAS_TO_CANONICAL: Dict[str, str] = {}
94
- for _canonical, _aliases in _TAG_ALIASES.items():
95
- for _alias in _aliases:
96
- _ALIAS_TO_CANONICAL[_alias.lower()] = _canonical
97
-
98
-
99
- # ---------------------------------------------------------------------------
100
- # Data classes
101
- # ---------------------------------------------------------------------------
102
-
103
-
104
- @dataclass
105
- class ParsedOutput:
106
- """Structured representation of the LLM's ``<output>`` block.
107
-
108
- Attributes:
109
- usersays_correct: Corrected / canonicalised version of the user's
110
- voice input.
111
- task_plan: Updated or new task plan (may contain Markdown).
112
- tools_to_call: Ordered list of tool descriptors to execute.
113
- remember: Content that should be persisted to the agent's memory.
114
- remember_type: "global" (cross-session) or "session" (current session only).
115
- recall: Memory keys / descriptions to retrieve for the next loop
116
- iteration.
117
- knowledge: Knowledge content the LLM wants to persist.
118
- get_knowledge: Knowledge search keywords for the next loop iteration.
119
- reply: User-visible text content extracted from <reply> tag (sole display content).
120
- raw_text: The verbatim raw text returned by the LLM.
121
- parse_success: Whether parsing extracted at least one meaningful field.
122
- needs_correction: When ``True``, the caller should send the raw text
123
- back to the LLM for re-formatting.
124
- """
125
-
126
- usersays_correct: str = ""
127
- task_plan: str = "" # 任务计划(Markdown格式)
128
- tools_to_call: List[Dict[str, Any]] = field(default_factory=list)
129
- remember: str = ""
130
- remember_type: str = ""
131
- recall: str = ""
132
- knowledge: str = ""
133
- get_knowledge: str = ""
134
-
135
- reply: str = "" # [v1.37] 用户可见文本(<reply> 标签,唯一回复来源)
136
- mainsubject: str = "" # [v1.15.8] 会话标题自动命名(6字以内)
137
- raw_text: str = ""
138
- parse_success: bool = False
139
- needs_correction: bool = False
140
- output_block_complete: bool = False # </output> 闭合标签是否存在
141
-
142
-
143
- # ---------------------------------------------------------------------------
144
- # Low-level extraction helpers
145
- # ---------------------------------------------------------------------------
146
-
147
-
148
- def _safe_strip(value: str | None) -> str:
149
- if value is None:
150
- return ""
151
- return value.strip()
152
-
153
-
154
- def _parse_bool(value: str | None, default: bool) -> bool:
155
- if value is None:
156
- return default
157
- stripped = value.strip().lower()
158
- if stripped in ("true", "1", "yes"):
159
- return True
160
- if stripped in ("false", "0", "no"):
161
- return False
162
- return default
163
-
164
-
165
- def _parse_int(value: str | None, default: int) -> int:
166
- if value is None:
167
- return default
168
- try:
169
- return int(value.strip())
170
- except (ValueError, TypeError):
171
- return default
172
-
173
-
174
- def _canonical_tag(tag_name: str) -> str:
175
- """Return the canonical tag name for *tag_name* (alias-aware, lowercased)."""
176
- lower = tag_name.strip().lower()
177
- return _ALIAS_TO_CANONICAL.get(lower, lower)
178
-
179
-
180
- def _extract_tag_content(text: str, tag_name: str, stop_tags: List[str] | None = None, *, conservative: bool = False) -> str:
181
- """Extract the text content of ``<tag_name>…</tag_name>`` from *text*.
182
-
183
- Fault-tolerant strategies tried in order:
184
-
185
- 1. **Properly closed**: ``<tag>content</tag>``
186
- 2. **Unclosed at next sibling opening tag**: ``<tag>content<next_tag>…``
187
- 3. **Unclosed at ``</output>``**: ``<tag>content</output>``
188
- 4. **Self-closing**: ``<tag/>``
189
- 5. **Opening tag at end of string**: ``<tag>content$``
190
-
191
- Parameters:
192
- text: The text to search within (typically the body of ``<output>``).
193
- tag_name: The tag name to extract (case-insensitive).
194
- stop_tags: Sibling tag names that signal the end of this tag's
195
- content (used for unclosed-tag detection). Defaults to
196
- ``KNOWN_TOP_LEVEL_TAGS``.
197
- """
198
- if not text or not tag_name:
199
- return ""
200
-
201
- if stop_tags is None:
202
- stop_tags = KNOWN_TOP_LEVEL_TAGS
203
-
204
- tag_esc = re.escape(tag_name)
205
-
206
- # Strategy 1: Properly closed <tag>content</tag>
207
- m = re.search(
208
- rf"<{tag_esc}[^>]*>(.*?)</{tag_esc}\s*>",
209
- text,
210
- re.DOTALL | re.IGNORECASE,
211
- )
212
- if m:
213
- return html.unescape(m.group(1))
214
-
215
- # Conservative mode: only extract properly closed tags, skip all fallbacks
216
- if conservative:
217
- return ""
218
-
219
- # Strategy 2: Unclosed — content runs until the next opening/closing
220
- # sibling tag or </output>.
221
- sibling_names = [t for t in stop_tags if t.lower() != tag_name.lower()]
222
- if sibling_names:
223
- sibling_pat = "|".join(re.escape(t) for t in sibling_names)
224
- # CRITICAL: Wrap sibling_pat in (?:...) so that | doesn't split the
225
- # leading < or </ from the alternation. Without this, e.g.
226
- # "<a|b|c" is parsed as "<a" OR "b" OR "c" — NOT "<a" OR "<b" OR "<c".
227
- boundary = rf"(?:</output\s*>|<(?:{sibling_pat})\b|</(?:{sibling_pat})\s*>)"
228
- else:
229
- boundary = r"</output\s*>"
230
-
231
- m = re.search(
232
- rf"<{tag_esc}[^>]*>(.*?)({boundary})",
233
- text,
234
- re.DOTALL | re.IGNORECASE,
235
- )
236
- if m:
237
- return html.unescape(m.group(1))
238
-
239
- # Strategy 3: Self-closing <tag/> or <tag />
240
- m = re.search(rf"<{tag_esc}[^>]*/\s*>", text, re.IGNORECASE)
241
- if m:
242
- return ""
243
-
244
- # Strategy 4: Opening tag at end of text with no closing
245
- m = re.search(
246
- rf"<{tag_esc}[^>]*>(.*?)$",
247
- text,
248
- re.DOTALL | re.IGNORECASE,
249
- )
250
- if m:
251
- content = m.group(1).strip()
252
- # Only return if there's actual content (not just whitespace)
253
- if content:
254
- return html.unescape(content)
255
-
256
- return ""
257
-
258
-
259
- def _extract_all_tag_blocks(
260
- text: str,
261
- tag_name: str,
262
- parent_close_tag: str | None = None,
263
- *,
264
- conservative: bool = False,
265
- ) -> List[str]:
266
- """Extract all ``<tag_name>…`` blocks from *text*.
267
-
268
- Used for extracting multiple ``<tool>`` blocks from ``<toolstocal>``
269
- content. Handles both properly closed and unclosed blocks.
270
-
271
- Returns a list of content strings, one per block.
272
- """
273
- if not text:
274
- return []
275
-
276
- tag_esc = re.escape(tag_name)
277
- blocks: List[str] = []
278
-
279
- # Strategy 1: Find all properly closed <tag>content</tag> blocks
280
- properly_closed = re.findall(
281
- rf"<{tag_esc}[^>]*>(.*?)</{tag_esc}\s*>",
282
- text,
283
- re.DOTALL | re.IGNORECASE,
284
- )
285
- if properly_closed:
286
- return [html.unescape(b) for b in properly_closed]
287
-
288
- # Conservative mode: only extract properly closed blocks
289
- if conservative:
290
- return []
291
-
292
- # Strategy 2: Split by <tag> openings — each segment is a block
293
- positions = [
294
- m.end() for m in re.finditer(rf"<{tag_esc}[^>]*>", text, re.IGNORECASE)
295
- ]
296
-
297
- for i, content_start in enumerate(positions):
298
- if i + 1 < len(positions):
299
- # Block ends at next <tag> opening
300
- content_end = positions[i + 1]
301
- elif parent_close_tag:
302
- # Last block — ends at parent close tag
303
- close_m = re.search(
304
- re.escape(parent_close_tag),
305
- text[content_start:],
306
- re.IGNORECASE,
307
- )
308
- content_end = content_start + close_m.start() if close_m else len(text)
309
- else:
310
- content_end = len(text)
311
-
312
- blocks.append(html.unescape(text[content_start:content_end]))
313
-
314
- return blocks
315
-
316
-
317
- def _extract_output_body(raw_text: str) -> str | None:
318
- """Extract the content between ``<output>`` and ``</output>``.
319
-
320
- If ``</output>`` is missing (unclosed), returns everything after the
321
- opening ``<output>`` tag.
322
-
323
- Returns ``None`` if no ``<output>`` opening tag is found at all.
324
- """
325
- open_match = re.search(r"<output[^>]*>", raw_text, re.IGNORECASE)
326
- if open_match is None:
327
- return None
328
-
329
- content_start = open_match.end()
330
-
331
- close_match = re.search(
332
- r"</output\s*>",
333
- raw_text[content_start:],
334
- re.IGNORECASE,
335
- )
336
- if close_match:
337
- return raw_text[content_start : content_start + close_match.start()]
338
-
339
- # Unclosed <output> — take everything after it
340
- return raw_text[content_start:]
341
-
342
-
343
- def _strip_outer_noise(text: str) -> str:
344
- """Remove text that is outside any recognised XML tags.
345
-
346
- This handles the case where the LLM outputs plain text before or
347
- after the ``<output>`` block, e.g.::
348
-
349
- "我来使用 Python 脚本下载... <output>...</output>"
350
-
351
- The function returns the ``<output>…</output>`` body, or the original
352
- text if no output block is found.
353
- """
354
- if not text:
355
- return text
356
-
357
- body = _extract_output_body(text)
358
- if body is not None:
359
- return body
360
-
361
- # No <output> tag at all — check if there are any recognised tags
362
- has_tags = False
363
- for tag in KNOWN_TOP_LEVEL_TAGS:
364
- if re.search(rf"<{re.escape(tag)}[\s>]", text, re.IGNORECASE):
365
- has_tags = True
366
- break
367
-
368
- if has_tags:
369
- # Tags exist but no <output> wrapper — return as-is
370
- return text
371
-
372
- # No tags at all — return original (caller will set needs_correction)
373
- return text
374
-
375
-
376
- def is_output_block_complete(raw_text: str) -> bool:
377
- """Check if *raw_text* contains a properly closed ``<output>...</output>`` block.
378
-
379
- Returns:
380
- True if both ``<output>`` and ``</output>`` tags are present.
381
- False if neither tag, or only the opening tag, is found.
382
- """
383
- if not raw_text:
384
- return False
385
- open_m = re.search(r"<output[^>]*>", raw_text, re.IGNORECASE)
386
- if open_m is None:
387
- return False
388
- close_m = re.search(r"</output\s*>", raw_text[open_m.end():], re.IGNORECASE)
389
- return close_m is not None
390
-
391
-
392
- # ---------------------------------------------------------------------------
393
- # Core custom parser — NO xml.etree.ElementTree
394
- # ---------------------------------------------------------------------------
395
-
396
-
397
- def _custom_parse(raw_text: str) -> ParsedOutput:
398
- """Fully custom, regex-based XML parser with maximum fault tolerance.
399
-
400
- This function does NOT use ``xml.etree.ElementTree`` at all. Every
401
- extraction is done via regex patterns that handle malformed XML
402
- gracefully.
403
-
404
- Returns a :class:`ParsedOutput` with ``parse_success=True`` if at least
405
- one meaningful field was extracted, or ``needs_correction=True`` if
406
- nothing could be parsed.
407
- """
408
- parsed = ParsedOutput(raw_text=raw_text)
409
-
410
- if not raw_text or not raw_text.strip():
411
- parsed.needs_correction = True
412
- return parsed
413
-
414
- # ── Step 0: 检查 <output> 块,处理缺少开始/闭合标签的情况 ──
415
- _has_open = bool(re.search(r"<output[^>]*>", raw_text, re.IGNORECASE))
416
- _has_close = bool(re.search(r"</output\s*>", raw_text, re.IGNORECASE))
417
-
418
- if not _has_open and not _has_close:
419
- # 完全没有 <output> 标签 — 检查是否包含已知子标签
420
- _has_known_tags = any(
421
- re.search(rf"<{re.escape(t)}[\s>]", raw_text, re.IGNORECASE)
422
- for t in KNOWN_TOP_LEVEL_TAGS
423
- )
424
- if _has_known_tags:
425
- # 有子标签但缺少 <output> 包装 — 自动补全后正常解析
426
- logger.info(
427
- "LLM 输出缺少 <output> 标签但包含已知子标签,"
428
- "自动补全 <output> 包装后解析"
429
- )
430
- raw_text = "<output>\n" + raw_text.strip() + "\n</output>"
431
- parsed.output_block_complete = True
432
- else:
433
- parsed.output_block_complete = False
434
- elif _has_open and not _has_close:
435
- parsed.output_block_complete = False
436
- elif not _has_open and _has_close:
437
- # 有闭合标签但没开始标签 — 补全开始标签
438
- logger.info("LLM 输出缺少 <output> 开始标签但有 </output>,自动补全")
439
- raw_text = "<output>\n" + raw_text.strip()
440
- parsed.output_block_complete = True
441
- else:
442
- parsed.output_block_complete = True
443
-
444
- conservative = not parsed.output_block_complete
445
-
446
- if conservative:
447
- logger.warning(
448
- "XML <output> 块不完整(缺少 </output> 闭合标签),"
449
- "启用保守解析模式(仅提取完整闭合的标签)\n"
450
- "====== LLM 完整输出开始 ======\n"
451
- f"{raw_text}\n"
452
- "====== LLM 完整输出结束 ======"
453
- )
454
-
455
- # ── Step 1: Strip non-XML noise (text before/after <output>) ──
456
- body = _strip_outer_noise(raw_text)
457
-
458
- # ── Step 2: Extract each known top-level tag ──
459
-
460
- # usersays_correct
461
- raw_val = _extract_tag_content(body, "usersays_correct", conservative=conservative)
462
- parsed.usersays_correct = _safe_strip(raw_val)
463
-
464
- # task_plan [v1.34.5] 任务计划(Markdown格式)
465
- raw_val = _extract_tag_content(body, "task_plan", conservative=conservative)
466
- parsed.task_plan = _safe_strip(raw_val)
467
-
468
- # [v1.37] 不再提取 <response> — 统一使用 <reply>,<response> 标签直接剥离不保留
469
- # reply — 用户可见文本(唯一回复来源)
470
- # [v1.38] 保守模式下 <reply> 仍尝试宽松提取 — LLM 输出截断时 <reply> 常不完整但包含重要内容
471
- raw_val = _extract_tag_content(body, "reply", conservative=conservative)
472
- if not raw_val.strip() and conservative:
473
- # 保守模式未提取到闭合的 <reply>,尝试宽松模式(允许未闭合标签)
474
- raw_val = _extract_tag_content(body, "reply", conservative=False)
475
- if raw_val.strip():
476
- logger.info("保守模式下 <reply> 未闭合但通过宽松提取恢复内容")
477
- parsed.reply = _safe_strip(raw_val)
478
-
479
- # recall
480
- raw_val = _extract_tag_content(body, "recall", conservative=conservative)
481
- parsed.recall = _safe_strip(raw_val)
482
-
483
- # knowledge
484
- raw_val = _extract_tag_content(body, "knowledge", conservative=conservative)
485
- parsed.knowledge = _safe_strip(raw_val)
486
-
487
- # get_knowledge
488
- raw_val = _extract_tag_content(body, "get_knowledge", conservative=conservative)
489
- parsed.get_knowledge = _safe_strip(raw_val)
490
-
491
-
492
-
493
- # mainsubject [v1.15.8] 会话标题自动命名
494
- raw_val = _extract_tag_content(body, "mainsubject", conservative=conservative)
495
- parsed.mainsubject = _safe_strip(raw_val)
496
-
497
- # ── Step 3: Parse <remember> (may contain <type> and <content>) ──
498
- remember_raw = _extract_tag_content(body, "remember", conservative=conservative)
499
- if remember_raw.strip():
500
- # Try structured format: <type>global</type><content>...</content>
501
- type_val = _extract_tag_content(remember_raw, "type", REMEMBER_INNER_TAGS, conservative=conservative)
502
- content_val = _extract_tag_content(remember_raw, "content", REMEMBER_INNER_TAGS, conservative=conservative)
503
-
504
- if content_val.strip():
505
- mem_type = _safe_strip(type_val) or "session"
506
- if mem_type not in ("global", "session"):
507
- mem_type = "session"
508
- parsed.remember = _safe_strip(content_val)
509
- parsed.remember_type = mem_type
510
- else:
511
- # Legacy plain-text format
512
- parsed.remember = _safe_strip(remember_raw)
513
- parsed.remember_type = "session"
514
-
515
- # ── Step 4: Parse <toolstocal> → list of tool dicts ──
516
- toolstocal_raw = _extract_tag_content(body, "toolstocal", conservative=conservative)
517
- if toolstocal_raw.strip():
518
- parsed.tools_to_call = _parse_toolstocal(toolstocal_raw, conservative=conservative)
519
-
520
- # ── Step 4.5: 兜底机制 — 宽松提取工具调用,确保执行不会因解析错误而中断 ──
521
- # 策略优先级:
522
- # 1. _parse_toolstocal 已成功提取 → 不做任何事
523
- # 2. 直接在整个输出中搜索 <tool>...</tool> 块(跳过 toolstocal 包装)
524
- # 3. 搜索散落的 <toolname>...</toolname> + <parms>...</parms> 配对
525
- if not parsed.tools_to_call:
526
- # 兜底 Level 1: 在整个原始文本中直接搜索 <tool> 块
527
- _raw_tool_blocks = _extract_all_tag_blocks(
528
- raw_text, "tool", parent_close_tag=None, conservative=False,
529
- )
530
- for block in _raw_tool_blocks:
531
- tn = _safe_strip(_extract_tag_content(block, "toolname", TOOL_INNER_TAGS))
532
- if tn:
533
- parsed.tools_to_call.append({
534
- "toolname": tn,
535
- "parms": _safe_strip(_extract_tag_content(block, "parms", TOOL_INNER_TAGS)),
536
- "timeout": _parse_int(_extract_tag_content(block, "timeout", TOOL_INNER_TAGS), _DEFAULT_TIMEOUT),
537
- })
538
- logger.info(f"[兜底L1] 从非<toolstocal>区域提取到工具调用: {tn}")
539
-
540
- if not parsed.tools_to_call:
541
- # 兜底 Level 2: 搜索散落的 <toolname>...</toolname>,然后在同一段中找最近的 <parms>
542
- _toolname_positions = []
543
- for m in re.finditer(r"<toolname[^>]*>(.*?)</toolname\s*>", raw_text, re.DOTALL | re.IGNORECASE):
544
- tn = html.unescape(m.group(1)).strip()
545
- if tn:
546
- _toolname_positions.append((m.start(), m.end(), tn))
547
-
548
- if _toolname_positions:
549
- logger.info(f"[兜底L2] 找到 {len(_toolname_positions)} 个散落的 <toolname> 标签")
550
- for _i, (_start, _end, _tn) in enumerate(_toolname_positions):
551
- # 在 toolname 之后的 500 字符内搜索最近的 <parms>
552
- _search_region = raw_text[_end:_end + 500]
553
- _parms_match = re.search(
554
- r"<parms[^>]*>(.*?)</parms\s*>",
555
- _search_region, re.DOTALL | re.IGNORECASE,
556
- )
557
- _parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
558
-
559
- # 也尝试在 toolname 之前的 200 字符内搜索(parms 可能在 toolname 前面)
560
- if not _parms:
561
- _pre_region = raw_text[max(0, _start - 200):_start]
562
- _parms_match = re.search(
563
- r"<parms[^>]*>(.*?)</parms\s*>",
564
- _pre_region, re.DOTALL | re.IGNORECASE,
565
- )
566
- _parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
567
-
568
- parsed.tools_to_call.append({
569
- "toolname": _tn,
570
- "parms": _parms,
571
- "timeout": _DEFAULT_TIMEOUT,
572
- })
573
- logger.info(f"[兜底L2] 散落提取工具: {_tn}, parms={'有' if _parms else '无'}")
574
-
575
- # ── Step 5: Determine parse success ──
576
- has_content = bool(
577
- parsed.reply
578
- or parsed.usersays_correct
579
- or parsed.tools_to_call
580
- or parsed.remember
581
- or parsed.recall
582
- or parsed.knowledge
583
- or parsed.get_knowledge
584
- )
585
-
586
- if has_content:
587
- parsed.parse_success = True
588
- else:
589
- # Nothing was extracted — check if there's any raw text that could
590
- # be a response (the LLM might have skipped XML entirely)
591
- cleaned = raw_text.strip()
592
- # Remove any residual XML tags
593
- cleaned_no_tags = re.sub(r"<[^>]+>", "", cleaned).strip()
594
- if cleaned_no_tags:
595
- # The LLM output something but not in XML format
596
- # Treat the entire output as a response
597
- parsed.reply = cleaned_no_tags
598
- parsed.parse_success = True
599
- logger.info(
600
- f"XML解析未提取到结构化字段,将原始文本(去除标签后)作为reply: "
601
- f"{cleaned_no_tags[:100]}..."
602
- )
603
- else:
604
- # Complete parse failure
605
- parsed.needs_correction = True
606
- logger.warning(
607
- f"XML解析完全失败,需要LLM修正。原始输出前200字符: {raw_text[:200]}"
608
- )
609
-
610
- return parsed
611
-
612
-
613
- def _parse_toolstocal(toolstocal_content: str, *, conservative: bool = False) -> List[Dict[str, Any]]:
614
- """Parse ``<toolstocal>`` body into a list of tool descriptors."""
615
- tools: List[Dict[str, Any]] = []
616
-
617
- tool_blocks = _extract_all_tag_blocks(
618
- toolstocal_content, "tool", parent_close_tag="</toolstocal>",
619
- conservative=conservative,
620
- )
621
-
622
- for block in tool_blocks:
623
- tool: Dict[str, Any] = {
624
- "toolname": _safe_strip(
625
- _extract_tag_content(block, "toolname", TOOL_INNER_TAGS, conservative=conservative)
626
- ),
627
- "parms": _safe_strip(
628
- _extract_tag_content(block, "parms", TOOL_INNER_TAGS, conservative=conservative)
629
- ),
630
- "timeout": _parse_int(
631
- _extract_tag_content(block, "timeout", TOOL_INNER_TAGS, conservative=conservative),
632
- _DEFAULT_TIMEOUT,
633
- ),
634
- }
635
- # Only add if toolname is present
636
- if tool["toolname"]:
637
- tools.append(tool)
638
-
639
- return tools
640
-
641
-
642
- # ---------------------------------------------------------------------------
643
- # Public API
644
- # ---------------------------------------------------------------------------
645
-
646
-
647
- def parse_output(raw_text: str) -> ParsedOutput:
648
- """Parse the LLM's raw response into a :class:`ParsedOutput`.
649
-
650
- This function uses a **fully custom regex-based parser** (no
651
- ``xml.etree.ElementTree``) for maximum fault tolerance.
652
-
653
- If the custom parser cannot extract any meaningful content, it falls
654
- back to treating the raw text as a plain response. Only if even that
655
- fails does it set ``needs_correction=True``, signalling the caller to
656
- ask the LLM to re-format its output.
657
-
658
- Parameters:
659
- raw_text: The complete text returned by the LLM.
660
-
661
- Returns:
662
- A :class:`ParsedOutput` instance.
663
- """
664
- if not raw_text:
665
- return ParsedOutput(raw_text=raw_text, needs_correction=True)
666
-
667
- return _custom_parse(raw_text)
668
-
669
-
670
- def extract_surrounding_text(full_text: str) -> tuple[str, str]:
671
- """Split *full_text* around the ``<output>…</output>`` block.
672
-
673
- Returns:
674
- A ``(text_before_xml, text_after_xml)`` tuple. Both parts are
675
- stripped. If no ``<output>`` block is found the original text
676
- becomes *text_before_xml* and *text_after_xml* is ``""``.
677
- """
678
- open_match = re.search(r"<output[^>]*>", full_text, re.IGNORECASE)
679
- if open_match is None:
680
- return full_text.strip(), ""
681
-
682
- text_before = full_text[: open_match.start()].strip()
683
-
684
- rest = full_text[open_match.end() :]
685
- close_match = re.search(r"</output\s*>", rest, re.IGNORECASE)
686
- if close_match is None:
687
- text_after = rest.strip()
688
- else:
689
- text_after = rest[close_match.end() :].strip()
690
-
691
- return text_before, text_after
692
-
693
-
694
- # ---------------------------------------------------------------------------
695
- # Validation
696
- # ---------------------------------------------------------------------------
697
-
698
-
699
- def validate_output(parsed: ParsedOutput) -> list[str]:
700
- """Validate a :class:`ParsedOutput` and return a list of warnings.
701
-
702
- An empty list means no issues were detected. Warnings are non-fatal
703
- hints that the calling code may log or present to the user.
704
- """
705
- warnings: list[str] = []
706
-
707
- # --- Tool-level checks ---
708
- for idx, tool in enumerate(parsed.tools_to_call):
709
- prefix = f"tool[{idx}]"
710
-
711
- if not tool.get("toolname"):
712
- warnings.append(f"{prefix}: missing 'toolname'")
713
-
714
- timeout = tool.get("timeout", _DEFAULT_TIMEOUT)
715
- if isinstance(timeout, int) and timeout <= 0:
716
- warnings.append(
717
- f"{prefix}: timeout={timeout} is not positive; "
718
- f"defaulting to {_DEFAULT_TIMEOUT}s"
719
- )
720
-
721
- if tool.get("toolname") and not tool.get("parms"):
722
- warnings.append(
723
- f"{prefix} ('{tool['toolname']}'): 'parms' is empty — "
724
- "verify the tool requires no parameters"
725
- )
726
-
727
- # --- Semantic checks ---
728
- # [v1.36] askuser/finish/finish_reason 已废弃,移除相关校验
729
-
730
- return warnings