myagent-ai 1.47.20 → 1.47.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,6 @@ from core.llm import LLMClient, LLMResponse, Message
16
16
  from agents.base import BaseAgent, AgentContext
17
17
  from core.utils import generate_id, timestamp, truncate_str
18
18
  from core.context_builder import ContextBuilder
19
- from core.output_parser import ParsedOutput, parse_output, validate_output, extract_surrounding_text
20
19
  from core.tool_dispatcher import ToolDispatcher
21
20
 
22
21
  logger = get_logger("myagent.agent.main")
@@ -501,79 +500,20 @@ class MainAgent(BaseAgent):
501
500
  logger.debug(f"V2 SSE 事件发送失败 ({event_type}): {e}")
502
501
 
503
502
  def _try_extract_partial_response(self, llm_raw: str) -> str:
504
- """[v1.15.73] 从不完整的 LLM 输出中提取部分回复内容。
505
-
506
- <output> 块被截断(缺少 </output>)时,尝试:
507
- 1. 提取 <reply>...</reply> 中已闭合的内容
508
- 2. 提取未闭合的 <reply> 后的内容(宽松模式)
509
- 3. 提取 <knowledge>...</knowledge> 中已闭合的内容(兜底)
510
- 4. 提取 <output> 后到截断点之间的纯文本
511
- 5. 去除 XML 标签后的残余文本(跳过工具执行状态文本)
503
+ """[v1.47.21] 从不完整的 LLM 输出中提取纯文本回复。
504
+
505
+ 完全依赖原生 tool_calling,不再解析 XML 格式。
506
+ 仅做简单的 XML 标签清理(兜底,防止模型意外输出 XML)。
512
507
  """
513
508
  if not llm_raw:
514
509
  return ""
515
510
 
516
511
  import re
517
- _parts = []
518
-
519
- # 策略1: 尝试提取已闭合的 <reply> 内容
520
- reply_match = re.search(
521
- r"<reply[^>]*>(.*?)</reply>",
522
- llm_raw,
523
- re.DOTALL | re.IGNORECASE,
524
- )
525
- if reply_match:
526
- text = reply_match.group(1).strip()
527
- if text:
528
- _parts.append(text)
529
-
530
- # 策略2: 尝试提取未闭合的 <reply> 内容(LLM 截断时 <reply> 常未闭合)
531
- if not _parts:
532
- reply_open_match = re.search(
533
- r"<reply[^>]*>(.*?)$",
534
- llm_raw,
535
- re.DOTALL | re.IGNORECASE,
536
- )
537
- if reply_open_match:
538
- text = reply_open_match.group(1).strip()
539
- # 去除尾部可能的不完整标签
540
- text = re.sub(r"<[^>]*$", "", text).strip()
541
- if text and len(text) > 5:
542
- _parts.append(text)
543
-
544
- # 策略3: 尝试提取已闭合的 <knowledge> 内容(兜底)
545
- if not _parts:
546
- knowledge_match = re.search(
547
- r"<knowledge[^>]*>(.*?)</knowledge>",
548
- llm_raw,
549
- re.DOTALL | re.IGNORECASE,
550
- )
551
- if knowledge_match:
552
- text = knowledge_match.group(1).strip()
553
- if text and len(text) > 20:
554
- _parts.append(text)
555
-
556
- if _parts:
557
- return "\n".join(_parts)
558
-
559
- # 策略4: 提取 <output> 标签后的内容(可能包含未闭合的标签)
560
- output_match = re.search(r"<output[^>]*>", llm_raw, re.IGNORECASE)
561
- if output_match:
562
- after_output = llm_raw[output_match.end():].strip()
563
- if after_output:
564
- cleaned = re.sub(r"<[^>]+>", "", after_output).strip()
565
- cleaned = re.sub(r"^(reasoning|assistant)\s*", "", cleaned, flags=re.IGNORECASE).strip()
566
- # 跳过工具执行状态文本(如"执行工具 task_plan:...")
567
- if cleaned and len(cleaned) > 5 and not re.match(
568
- r"^(执行工具|调用工具|Running tool|Calling tool)", cleaned, re.IGNORECASE
569
- ):
570
- return cleaned
571
-
572
- # 策略5: 提取去除 XML 标签后的整体文本
512
+ # 去除所有 XML 标签
573
513
  cleaned = re.sub(r"<[^>]+>", "", llm_raw).strip()
574
514
  cleaned = re.sub(r"^(reasoning|assistant)\s*", "", cleaned, flags=re.IGNORECASE).strip()
575
515
  # 跳过工具执行状态文本
576
- if cleaned and len(cleaned) > 10 and not re.match(
516
+ if cleaned and len(cleaned) > 5 and not re.match(
577
517
  r"^(执行工具|调用工具|Running tool|Calling tool)", cleaned, re.IGNORECASE
578
518
  ):
579
519
  return cleaned
@@ -783,13 +723,13 @@ class MainAgent(BaseAgent):
783
723
  agent_path: Optional[str] = None,
784
724
  ) -> AgentContext:
785
725
  """
786
- V2 主处理循环 — 使用结构化输出格式。
726
+ V2 主处理循环 — 使用原生 tool_calling。
787
727
 
788
728
  核心流程:
789
729
  1. 使用 ContextBuilder 构建 <context> XML
790
730
  2. 将 context 注入 SYSTEM_PROMPT,调用 LLM
791
- 3. 使用 OutputParser 解析 <output> XML
792
- 4. 根据 parsed.tools_to_call 依次执行工具
731
+ 3. LLM 通过原生 tool_calling 返回工具调用
732
+ 4. 根据 tool_calls 依次执行工具
793
733
  5. 任一工具超时 → 强制回调 LLM
794
734
  6. 根据 callback 标志决定是否回调 LLM
795
735
  7. 处理 remember/recall
@@ -1240,199 +1180,19 @@ class MainAgent(BaseAgent):
1240
1180
  continue
1241
1181
 
1242
1182
  else:
1243
- # 没有原生工具调用 → 检查是否为旧格式 <output> XML(某些模型不支持 tool_calling)
1183
+ # [v1.47.21] 没有原生工具调用 → 纯文本回复
1184
+ # 完全依赖 tool_calling,不再解析 <output> XML
1244
1185
  raw_content = (response.content or "").strip()
1245
1186
 
1246
- # [v1.47.16] 兼容旧格式:当 LLM 输出 <output> XML 时,用 output_parser 解析
1247
- if raw_content.startswith("<output") or ("<output>" in raw_content and "<toolstocal>" in raw_content):
1248
- logger.info(f"[{task_id}] 检测到旧格式 <output> XML 输出,启用 output_parser 解析")
1249
- parsed = parse_output(raw_content)
1250
-
1251
- if parsed.parse_success:
1252
- # 1) 处理 mainsubject → 更新会话标题
1253
- if parsed.mainsubject and self.dispatcher:
1254
- try:
1255
- await self.dispatcher.dispatch(
1256
- tool_name="update_conversation_title",
1257
- params={"title": parsed.mainsubject, "session_id": context.session_id},
1258
- timeout=10,
1259
- )
1260
- except Exception:
1261
- pass
1262
-
1263
- # 2) 处理 remember → 保存记忆
1264
- if parsed.remember and self.dispatcher:
1265
- try:
1266
- await self.dispatcher.dispatch(
1267
- tool_name="save_memory",
1268
- params={
1269
- "content": parsed.remember,
1270
- "type": parsed.remember_type or "session",
1271
- "session_id": context.session_id,
1272
- },
1273
- timeout=10,
1274
- )
1275
- except Exception:
1276
- pass
1277
-
1278
- # 3) 处理 task_plan
1279
- if parsed.task_plan and self.dispatcher:
1280
- try:
1281
- await self.dispatcher.dispatch(
1282
- tool_name="task_plan",
1283
- params={"action": "create", "plan": parsed.task_plan},
1284
- timeout=10,
1285
- )
1286
- current_task_plan = parsed.task_plan
1287
- await self._emit_v2_event(
1288
- "v2_task_plan",
1289
- {"plan": truncate_str(current_task_plan, 2000)},
1290
- stream_callback,
1291
- )
1292
- except Exception:
1293
- pass
1294
-
1295
- # 4) 处理 tools_to_call → 执行工具
1296
- if parsed.tools_to_call:
1297
- logger.info(f"[{task_id}] 从 <output> XML 提取到 {len(parsed.tools_to_call)} 个工具调用")
1298
-
1299
- # 添加 assistant 消息到消息列表
1300
- messages.append(Message(
1301
- role="assistant",
1302
- content=raw_content,
1303
- ))
1304
-
1305
- # 保存 LLM 原始输出
1306
- if self.memory:
1307
- self.memory.add_session(agent_id=_effective_agent_id,
1308
- session_id=context.session_id,
1309
- role="assistant",
1310
- content=raw_content,
1311
- key="llm_output",
1312
- importance=0.3,
1313
- )
1314
-
1315
- for tool_desc in parsed.tools_to_call:
1316
- _tc_name = tool_desc.get("toolname", "")
1317
- _tc_parms = tool_desc.get("parms", "{}")
1318
- _tc_timeout = int(tool_desc.get("timeout", 120))
1319
-
1320
- if not _tc_name:
1321
- continue
1322
-
1323
- # 注入 session_id
1324
- if _tc_name in ("save_memory", "recall_memory", "update_conversation_title"):
1325
- if isinstance(_tc_parms, str):
1326
- try:
1327
- _tc_parms_dict = json.loads(_tc_parms)
1328
- except (json.JSONDecodeError, TypeError):
1329
- _tc_parms_dict = {"raw_input": _tc_parms}
1330
- else:
1331
- _tc_parms_dict = _tc_parms
1332
- _tc_parms_dict.setdefault("session_id", context.session_id)
1333
- _tc_parms = json.dumps(_tc_parms_dict, ensure_ascii=False)
1334
-
1335
- # 发送工具开始事件
1336
- await self._emit_v2_event(
1337
- "v2_tool_start",
1338
- {"tool": {"toolname": _tc_name, "parms": truncate_str(str(_tc_parms), 500)}},
1339
- stream_callback,
1340
- )
1341
-
1342
- self._add_exec_event("tool_call", {
1343
- "title": f"调用工具: {_tc_name}",
1344
- "tool_name": _tc_name,
1345
- "arguments": str(_tc_parms),
1346
- })
1347
-
1348
- # 执行工具
1349
- tool_result = await self._execute_v2_tool(
1350
- _tc_name, str(_tc_parms), _tc_timeout,
1351
- context, task_id,
1352
- stream_callback=stream_callback,
1353
- sent_files=_sent_files,
1354
- agent_path=agent_path,
1355
- )
1356
-
1357
- # 提取输出
1358
- if tool_result is None:
1359
- tool_result = {"success": False, "error": "工具返回了空结果"}
1360
- _output_text = (
1361
- tool_result.get("output", "")
1362
- or tool_result.get("message", "")
1363
- or tool_result.get("stdout", "")
1364
- or tool_result.get("error", "")
1365
- )
1366
- if not _output_text and tool_result.get("data"):
1367
- try:
1368
- _output_text = json.dumps(tool_result["data"], ensure_ascii=False, default=str)[:30000]
1369
- except Exception:
1370
- _output_text = str(tool_result["data"])[:30000]
1371
-
1372
- # 发送工具结果事件
1373
- await self._emit_v2_event(
1374
- "v2_tool_result",
1375
- {"tool": {"toolname": _tc_name}, "result": {
1376
- "success": tool_result.get("success", False),
1377
- "output": truncate_str(_output_text, 30000),
1378
- "error": truncate_str(tool_result.get("error", ""), 30000),
1379
- }},
1380
- stream_callback,
1381
- )
1382
-
1383
- self._add_exec_event("tool_result", {
1384
- "title": f"工具结果: {_tc_name}",
1385
- "tool_name": _tc_name,
1386
- "success": tool_result.get("success", False),
1387
- "summary": truncate_str(_output_text, 30000),
1388
- })
1389
-
1390
- # 添加 tool result 消息
1391
- messages.append(Message(
1392
- role="user",
1393
- content=f"[工具结果: {_tc_name}] {truncate_str(_output_text, 5000)}",
1394
- ))
1395
-
1396
- # 工具执行完毕 → 继续循环让 LLM 处理结果
1397
- continue
1398
-
1399
- # 5) 没有工具但有 reply → 提取纯文本回复
1400
- if parsed.reply:
1401
- reply_text = parsed.reply.strip()
1402
- else:
1403
- # 兜底:去除所有 XML 标签
1404
- import re as _re_xml
1405
- reply_text = _re_xml.sub(r'<[^>]+>', '', raw_content).strip()
1406
-
1407
- if not reply_text:
1408
- reply_text = "处理完毕。"
1409
-
1410
- context.working_memory["final_response"] = reply_text
1411
- await self._emit_v2_event("v2_reasoning", {"content": truncate_str(reply_text, 3000)}, stream_callback)
1412
-
1413
- # 保存回复到会话记忆
1414
- if self.memory:
1415
- self.memory.add_session(agent_id=_effective_agent_id,
1416
- session_id=context.session_id,
1417
- role="assistant",
1418
- content=reply_text,
1419
- key="reply",
1420
- importance=0.5,
1421
- )
1422
-
1423
- # 保存 LLM 原始输出
1424
- if self.memory:
1425
- self.memory.add_session(agent_id=_effective_agent_id,
1426
- session_id=context.session_id,
1427
- role="assistant",
1428
- content=raw_content,
1429
- key="llm_output",
1430
- importance=0.3,
1431
- )
1432
-
1433
- break
1187
+ # 如果模型意外输出了 XML 标签,清理掉
1188
+ import re as _re_clean
1189
+ if raw_content.startswith("<") and "</" in raw_content:
1190
+ # 清除 XML 标签,提取纯文本
1191
+ cleaned = _re_clean.sub(r'<[^>]+>', '', raw_content).strip()
1192
+ if cleaned:
1193
+ raw_content = cleaned
1194
+ logger.info(f"[{task_id}] 清理了 LLM 输出中的 XML 标签")
1434
1195
 
1435
- # 纯文本回复(非 XML 格式)
1436
1196
  reply_text = raw_content
1437
1197
  logger.info(f"[{task_id}] 无工具调用,任务完成 (reply长度={len(reply_text)})")
1438
1198
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.47.20",
3
+ "version": "1.47.21",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -7901,11 +7901,10 @@ window.addEventListener('beforeunload', function() {{
7901
7901
  # 4. 检测到裸 JSON(整个回复以 { 开头):进入 action 模式,提取 thought
7902
7902
  # 5. 代码块结束后回到文本模式,继续流式推送
7903
7903
  _stream_state = {
7904
- "mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json" | "output_xml"
7904
+ "mode": "text", # "text" | "action_block" | "tasklist_block" | "bare_json"
7905
7905
  "processed_pos": 0, # 已处理到的位置(用于去重 streaming)
7906
7906
  "thought_sent": 0, # 已推送的 thought 长度
7907
7907
  "action_block_depth": 0, # ``` 嵌套深度
7908
- "reply_sent": 0, # [v1.47.16] output_xml 模式下已推送的 reply 长度
7909
7908
  }
7910
7909
 
7911
7910
  # 需要回退(hold back)的最大字符数,用于检测 ```action 或 ```tasklist 标记
@@ -7914,30 +7913,12 @@ window.addEventListener('beforeunload', function() {{
7914
7913
  _MAX_HOLD = 12
7915
7914
 
7916
7915
  async def _text_delta_callback(full_text_so_far: str, delta_text: str):
7917
- """智能流式过滤器:文本正常推送,JSON action 块拦截,<output> XML 拦截"""
7916
+ """[v1.47.21] 智能流式过滤器:文本正常推送,JSON action 块拦截"""
7918
7917
  st = _stream_state
7919
7918
  remaining = full_text_so_far[st["processed_pos"]:]
7920
7919
 
7921
7920
  while remaining:
7922
7921
  if st["mode"] == "text":
7923
- # ── [v1.47.16] 检测 <output> XML 标签 → 进入 output_xml 模式 ──
7924
- output_marker = remaining.find("<output")
7925
- if output_marker >= 0:
7926
- # 推送 <output> 之前的文本
7927
- text_before = remaining[:output_marker]
7928
- if text_before.strip():
7929
- await _write_sse({"type": "text_delta", "content": text_before})
7930
- _all_streamed_text_parts.append(text_before)
7931
- # 跳过 <output...> 开始标签
7932
- tag_end = remaining.find(">", output_marker)
7933
- if tag_end >= 0:
7934
- st["processed_pos"] += tag_end + 1
7935
- else:
7936
- st["processed_pos"] += len(remaining)
7937
- st["mode"] = "output_xml"
7938
- remaining = full_text_so_far[st["processed_pos"]:]
7939
- continue
7940
-
7941
7922
  # ── 文本模式:寻找 ```action 或 ```tasklist 标记 ──
7942
7923
  action_marker = remaining.find("```action")
7943
7924
  tasklist_marker = remaining.find("```tasklist")
@@ -8056,63 +8037,9 @@ window.addEventListener('beforeunload', function() {{
8056
8037
  remaining = ""
8057
8038
  break
8058
8039
 
8059
- elif st["mode"] == "output_xml":
8060
- # ── [v1.47.16] <output> XML 模式:提取 <reply> 内容流式推送,其余全部拦截 ──
8061
- # 策略:在 output_xml 模式下,只在检测到 <reply> 内容时推送,其他标签内容全部跳过
8062
- import re as _re_xml_stream
8063
-
8064
- # 检查 </output> 闭合标签 → 退出 output_xml 模式
8065
- close_output = remaining.find("</output>")
8066
- if close_output >= 0:
8067
- # 在闭合标签前,检查是否有未推送的 <reply> 内容
8068
- before_close = full_text_so_far[st["processed_pos"]:st["processed_pos"] + close_output]
8069
- # 尝试提取 <reply> 内容
8070
- reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', before_close)
8071
- if reply_m and reply_m.group(1).strip():
8072
- reply_content = reply_m.group(1).strip()
8073
- new_part = reply_content[st["reply_sent"]:]
8074
- if new_part:
8075
- await _write_sse({"type": "text_delta", "content": new_part})
8076
- _all_streamed_text_parts.append(new_part)
8077
- st["reply_sent"] = len(reply_content)
8078
- # 跳过到 </output> 之后
8079
- st["processed_pos"] += close_output + len("</output>")
8080
- st["mode"] = "text"
8081
- remaining = full_text_so_far[st["processed_pos"]:]
8082
- continue
8083
-
8084
- # 尚未闭合:尝试提取已闭合的 <reply>...</reply> 内容并流式推送
8085
- all_so_far = full_text_so_far[st["processed_pos"]:]
8086
- reply_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]*?)</reply>', all_so_far)
8087
- if reply_m and reply_m.group(1).strip():
8088
- reply_content = reply_m.group(1).strip()
8089
- new_part = reply_content[st["reply_sent"]:]
8090
- if new_part:
8091
- await _write_sse({"type": "text_delta", "content": new_part})
8092
- _all_streamed_text_parts.append(new_part)
8093
- st["reply_sent"] = len(reply_content)
8094
-
8095
- # 尝试提取未闭合的 <reply> 内容(流式输出中标签可能尚未关闭)
8096
- elif not reply_m:
8097
- reply_open_m = _re_xml_stream.search(r'<reply[^>]*>([\s\S]+)$', all_so_far)
8098
- if reply_open_m and reply_open_m.group(1).strip():
8099
- partial_reply = reply_open_m.group(1)
8100
- # 去除尾部可能的不完整标签
8101
- partial_reply = _re_xml_stream.sub(r'<[^>]*$', '', partial_reply).strip()
8102
- if partial_reply and len(partial_reply) > st["reply_sent"]:
8103
- new_part = partial_reply[st["reply_sent"]:]
8104
- if new_part:
8105
- await _write_sse({"type": "text_delta", "content": new_part})
8106
- _all_streamed_text_parts.append(new_part)
8107
- st["reply_sent"] = len(partial_reply)
8108
-
8109
- # 等待更多 token
8110
- remaining = ""
8111
- break
8112
-
8113
8040
  # Stream 结束后的 flush:推送所有 hold 住的文本
8114
8041
  async def _flush_remaining_text(full_text: str):
8115
- """流结束后,推送所有剩余的文本(处理 hold back 的部分)"""
8042
+ """[v1.47.21] 流结束后,推送所有剩余的文本(处理 hold back 的部分)"""
8116
8043
  st = _stream_state
8117
8044
  remaining = full_text[st["processed_pos"]:]
8118
8045
  if remaining.strip() and st["mode"] == "text":
@@ -8123,25 +8050,6 @@ window.addEventListener('beforeunload', function() {{
8123
8050
  await _write_sse({"type": "text_delta", "content": remaining})
8124
8051
  _all_streamed_text_parts.append(remaining)
8125
8052
  st["processed_pos"] = len(full_text)
8126
- elif st["mode"] == "output_xml":
8127
- # [v1.47.16] output_xml 模式下 flush:尝试提取 <reply> 内容
8128
- import re as _re_xml_flush
8129
- reply_m = _re_xml_flush.search(r'<reply[^>]*>([\s\S]*?)(?:</reply>|$)', remaining)
8130
- if reply_m and reply_m.group(1).strip():
8131
- reply_content = reply_m.group(1).strip()
8132
- new_part = reply_content[st["reply_sent"]:]
8133
- if new_part:
8134
- await _write_sse({"type": "text_delta", "content": new_part})
8135
- _all_streamed_text_parts.append(new_part)
8136
- st["reply_sent"] = len(reply_content)
8137
- # 检查 </output> 之后是否还有文本
8138
- close_pos = remaining.find("</output>")
8139
- if close_pos >= 0:
8140
- after_output = remaining[close_pos + len("</output>"):].strip()
8141
- if after_output and st["mode"] == "output_xml":
8142
- # 不推送(output_xml 模式结束后可能有残余标签文本)
8143
- pass
8144
- st["processed_pos"] = len(full_text)
8145
8053
 
8146
8054
  # Call LLM with streaming — tokens are filtered through _text_delta_callback
8147
8055
  # Call LLM with streaming + frequency_penalty to reduce repetition
@@ -2999,12 +2999,10 @@ async function selectSession(id) {
2999
2999
  return m && (m.role === 'user' || m.role === 'assistant' || m.role === 'tool');
3000
3000
  }).map(function(m) {
3001
3001
  var content = (m.content != null) ? String(m.content) : '';
3002
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
3002
+ // [v1.47.21] 清理意外输出的 XML 标签(完全依赖 tool_calling)
3003
3003
  var mkey = (m.key || '').toLowerCase();
3004
3004
  if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
3005
- if (!mkey || mkey === 'reply') {
3006
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
3007
- }
3005
+ content = content.replace(/<[^>]+>/g, ' ').replace(/\s{2,}/g, ' ').trim();
3008
3006
  }
3009
3007
  var mapped = {
3010
3008
  role: m.role || 'assistant',
@@ -3110,10 +3108,9 @@ async function loadMoreMessages() {
3110
3108
  }).map(function(m) {
3111
3109
  var content = (m.content != null) ? String(m.content) : '';
3112
3110
  var mkey = (m.key || '').toLowerCase();
3111
+ // [v1.47.21] 清理意外输出的 XML 标签
3113
3112
  if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
3114
- if (!mkey || mkey === 'reply' || (mkey !== 'tool_call' && mkey !== 'reasoning')) {
3115
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
3116
- }
3113
+ content = content.replace(/<[^>]+>/g, ' ').replace(/\s{2,}/g, ' ').trim();
3117
3114
  }
3118
3115
  var mapped = {
3119
3116
  role: m.role || 'assistant',
@@ -398,12 +398,9 @@ async function pollChatHistory() {
398
398
  }).map(function(m) {
399
399
  var content = (m.content != null) ? String(m.content) : '';
400
400
  var mkey = (m.key || '').toLowerCase();
401
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
402
- // 有 key 的消息(reasoning/reply/tool_call)一般已是纯内容,但部分模型仍会输出 XML
401
+ // [v1.47.21] 清理意外输出的 XML 标签(完全依赖 tool_calling,不再解析 XML
403
402
  if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
404
- if (!mkey || mkey === 'reply') {
405
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
406
- }
403
+ content = content.replace(/<[^>]+>/g, ' ').replace(/\s{2,}/g, ' ').trim();
407
404
  }
408
405
  var mapped = {
409
406
  role: m.role || 'assistant',
@@ -475,11 +472,9 @@ async function forceRefreshHistory() {
475
472
  }).map(function(m) {
476
473
  var content = (m.content != null) ? String(m.content) : '';
477
474
  var mkey = (m.key || '').toLowerCase();
478
- // [v1.47.16] 剥离 XML 标签:无 key 的旧格式 + key=reply 但仍含 XML 标签的消息
475
+ // [v1.47.21] 清理意外输出的 XML 标签
479
476
  if (m.role === 'assistant' && content && content.trim().startsWith('<')) {
480
- if (!mkey || mkey === 'reply') {
481
- content = (typeof _stripXmlTags === 'function') ? _stripXmlTags(content) : content;
482
- }
477
+ content = content.replace(/<[^>]+>/g, ' ').replace(/\s{2,}/g, ' ').trim();
483
478
  }
484
479
  var mapped = {
485
480
  role: m.role || 'assistant',
@@ -1120,34 +1115,13 @@ function _showFinishNotification(text) {
1120
1115
  }
1121
1116
 
1122
1117
  /**
1123
- * Strip XML tags from text for real-time streaming preview in V2 mode.
1124
- * Shows plain text between tags so the user sees progress during LLM streaming.
1118
+ * [v1.47.21] Strip XML tags from text simple regex cleanup for accidental XML output.
1119
+ * No longer parses <output>/<reply>/<toolstocal> fully relies on native tool_calling.
1125
1120
  */
1126
1121
  function _stripXmlTags(xml) {
1127
1122
  if (!xml) return '';
1128
- var text = xml;
1129
- // [v1.15.12] 移除未闭合的 <task_plan>...</task_plan> 区域(流式输出中常见)
1130
- // 当只有开始标签没有闭合标签时,将开始标签到文本末尾的内容完全移除
1131
- text = text.replace(/<task_plan[^>]*>[\s\S]*?<\/task_plan>/g, ''); // 已闭合的完整 task_plan
1132
- text = text.replace(/<task_plan[^>]*>[\s\S]*$/g, ''); // 未闭合的 task_plan(流式中标签已打开但未关闭)
1133
- // [v1.37] 优先提取 <reply> 标签内容,不再兜底 <response>
1134
- var replyMatch = text.match(/<reply[^>]*>([\s\S]*?)<\/reply>/i);
1135
- if (replyMatch && replyMatch[1] && replyMatch[1].trim()) {
1136
- return replyMatch[1].trim();
1137
- }
1138
- // [v1.37] 移除 <response> 包裹(不再作为兜底提取,直接剥离标签)
1139
- text = text.replace(/<response[^>]*>|<\/response>/gi, '');
1140
- // 兜底:去除所有XML标签
1141
- return text
1142
- .replace(/<[^>]+>/g, ' ') // Replace tags with space
1143
- .replace(/&lt;/g, '<')
1144
- .replace(/&gt;/g, '>')
1145
- .replace(/&amp;/g, '&')
1146
- .replace(/&quot;/g, '"')
1147
- .replace(/&#39;/g, "'")
1148
- .replace(/&#x27;/g, "'")
1149
- .replace(/\s{3,}/g, ' ') // Collapse 3+ whitespace to single space
1150
- .trim();
1123
+ return xml.replace(/<[^>]+>/g, ' ').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
1124
+ .replace(/&amp;/g, '&').replace(/\s{2,}/g, ' ').trim();
1151
1125
  }
1152
1126
 
1153
1127
  // ══════════════════════════════════════════════════════
package/worklog.md CHANGED
@@ -117,3 +117,30 @@ Stage Summary:
117
117
  - Streaming filter extracts only `<reply>` content for real-time display
118
118
  - Frontend strips XML from both keyless and key="reply" assistant messages
119
119
  - VNC mode Firefox support fully functional
120
+
121
+ ---
122
+ Task ID: 2
123
+ Agent: Main
124
+ Task: Fix Firefox+VNC browser_stealth: content/close/evaluate/wait_for + browser_open/web_control VNC fallback
125
+
126
+ Work Log:
127
+ - Analyzed logs: stealth_browser_navigate now works (Popen non-blocking), but stealth_browser_content returns "不支持" and agent falls back to web_control/browser_open which also fail in VNC mode
128
+ - Added `_firefox_read_sessionstore()` method: reads Firefox's recovery.jsonlz4 (mozLz4 format) to get current tab URL/title
129
+ - Added `_firefox_get_content()` method: screenshot + sessionstore → returns screenshot path, URL, title, tabs list
130
+ - Changed `get_content()` Firefox mode: calls `_firefox_get_content()` instead of returning error
131
+ - Changed `get_html()` Firefox mode: calls `_firefox_get_content()` instead of returning error
132
+ - Changed `close()` Firefox mode: VNC mode only clears internal state, does NOT kill Firefox (managed by vnc_manager)
133
+ - Changed `StealthBrowserCloseSkill.execute()`: VNC mode returns "会话已释放" instead of "浏览器已关闭"
134
+ - Changed `evaluate()` Firefox mode: better error message suggesting stealth_browser alternatives
135
+ - Changed `wait_for()` Firefox mode: sleep + sessionstore read instead of returning error
136
+ - Changed `browser_open` in chromedev_mcp.py: VNC mode without Chromium → returns error suggesting stealth_browser
137
+ - Added VNC mode hint injection in main_agent.py system prompt: tells agent to use stealth_browser_* tools in VNC mode
138
+ - Published v1.47.20 to npm
139
+
140
+ Stage Summary:
141
+ - Firefox+VNC mode: stealth_browser_content now returns screenshot + tab info (URL/title/tabs)
142
+ - Firefox+VNC mode: close() no longer kills VNC browser process
143
+ - Firefox+VNC mode: wait_for() works (sleep + sessionstore), evaluate() has actionable error message
144
+ - browser_open: VNC mode without Chromium → clear error suggesting stealth_browser
145
+ - main_agent: VNC mode system prompt tells agent to prefer stealth_browser over browser_open/web_control
146
+ - All syntax checks passed
@@ -1,730 +0,0 @@
1
- """
2
- Custom Fault-Tolerant XML Output Parser Module.
3
-
4
- Parses the XML ``<output>`` block generated by the LLM in response to the
5
- system prompt. The LLM produces structured XML that drives the agent's
6
- execution loop — including tool calls, memory operations, user interaction
7
- hints, and loop-control flags.
8
-
9
- **This module does NOT use xml.etree.ElementTree.** All parsing is done with
10
- pure Python + regex to achieve maximum fault tolerance.
11
-
12
- Expected XML schema produced by the LLM::
13
-
14
- <output>
15
- <mainsubject>当前对话的6字以内标题</mainsubject>
16
- <usersays_correct>...</usersays_correct>
17
- <reply>展示给用户的文本内容</reply>
18
- <toolstocal>
19
- <tool>
20
-
21
- <toolname>工具名</toolname>
22
- <parms>参数JSON或描述</parms>
23
- <timeout>预估超时时限(秒)</timeout>
24
- </tool>
25
- </toolstocal>
26
- <remember>
27
- <type>global|session</type>
28
- <content>记忆内容</content>
29
- </remember>
30
- <recall>下一轮需要调取的记忆</recall>
31
- <get_knowledge>下一轮需要搜索获得的知识</get_knowledge>
32
- </output>
33
-
34
- Fault-tolerance features:
35
-
36
- * Text before ``<output>`` or after ``</output>`` is silently stripped.
37
- * Unclosed tags are auto-closed at the next sibling tag boundary.
38
- * Self-closing tags (``<tag/>``) resolve to empty strings.
39
- * Case-insensitive tag matching (``<OUTPUT>`` == ``<output>``).
40
- * Tag-name aliases (reserved for future use).
41
- * If extraction yields nothing meaningful, ``needs_correction`` is set to
42
- ``True`` so the caller can ask the LLM to re-format.
43
- """
44
-
45
- from __future__ import annotations
46
-
47
- import html
48
- import re
49
- from dataclasses import dataclass, field
50
- from typing import Any, Dict, List
51
-
52
- from core.logger import get_logger
53
-
54
- logger = get_logger("myagent.output_parser")
55
-
56
- # ---------------------------------------------------------------------------
57
- # Constants
58
- # ---------------------------------------------------------------------------
59
-
60
- _DEFAULT_TIMEOUT: int = 120
61
-
62
- # All top-level tags we recognise inside <output>.
63
- KNOWN_TOP_LEVEL_TAGS = [
64
- "usersays_correct",
65
- "task_plan", # 任务计划(Markdown格式)
66
- "toolstocal",
67
- "remember",
68
- "recall",
69
- "knowledge",
70
- "get_knowledge",
71
-
72
- "reply", # [v1.36] 用户可见文本(顶层标签,不再嵌套在 <response> 内)
73
- # [v1.37] "response" 已移除 — 不再兼容 <response> 包裹,统一使用 <reply>
74
- "mainsubject", # [v1.15.8] 会话标题自动命名
75
- ]
76
-
77
- # Inner tags inside each <tool>.
78
- TOOL_INNER_TAGS = [
79
- "toolname",
80
- "parms",
81
- "timeout",
82
- ]
83
-
84
- # Inner tags inside <remember>.
85
- REMEMBER_INNER_TAGS = ["type", "content"]
86
-
87
- # Tag aliases: canonical name -> list of aliases.
88
- _TAG_ALIASES: Dict[str, List[str]] = {
89
- # [v1.36] askuser/ask_user aliases removed — tag no longer used
90
- }
91
-
92
- # Build reverse lookup: alias -> canonical.
93
- _ALIAS_TO_CANONICAL: Dict[str, str] = {}
94
- for _canonical, _aliases in _TAG_ALIASES.items():
95
- for _alias in _aliases:
96
- _ALIAS_TO_CANONICAL[_alias.lower()] = _canonical
97
-
98
-
99
- # ---------------------------------------------------------------------------
100
- # Data classes
101
- # ---------------------------------------------------------------------------
102
-
103
-
104
- @dataclass
105
- class ParsedOutput:
106
- """Structured representation of the LLM's ``<output>`` block.
107
-
108
- Attributes:
109
- usersays_correct: Corrected / canonicalised version of the user's
110
- voice input.
111
- task_plan: Updated or new task plan (may contain Markdown).
112
- tools_to_call: Ordered list of tool descriptors to execute.
113
- remember: Content that should be persisted to the agent's memory.
114
- remember_type: "global" (cross-session) or "session" (current session only).
115
- recall: Memory keys / descriptions to retrieve for the next loop
116
- iteration.
117
- knowledge: Knowledge content the LLM wants to persist.
118
- get_knowledge: Knowledge search keywords for the next loop iteration.
119
- reply: User-visible text content extracted from <reply> tag (sole display content).
120
- raw_text: The verbatim raw text returned by the LLM.
121
- parse_success: Whether parsing extracted at least one meaningful field.
122
- needs_correction: When ``True``, the caller should send the raw text
123
- back to the LLM for re-formatting.
124
- """
125
-
126
- usersays_correct: str = ""
127
- task_plan: str = "" # 任务计划(Markdown格式)
128
- tools_to_call: List[Dict[str, Any]] = field(default_factory=list)
129
- remember: str = ""
130
- remember_type: str = ""
131
- recall: str = ""
132
- knowledge: str = ""
133
- get_knowledge: str = ""
134
-
135
- reply: str = "" # [v1.37] 用户可见文本(<reply> 标签,唯一回复来源)
136
- mainsubject: str = "" # [v1.15.8] 会话标题自动命名(6字以内)
137
- raw_text: str = ""
138
- parse_success: bool = False
139
- needs_correction: bool = False
140
- output_block_complete: bool = False # </output> 闭合标签是否存在
141
-
142
-
143
- # ---------------------------------------------------------------------------
144
- # Low-level extraction helpers
145
- # ---------------------------------------------------------------------------
146
-
147
-
148
- def _safe_strip(value: str | None) -> str:
149
- if value is None:
150
- return ""
151
- return value.strip()
152
-
153
-
154
- def _parse_bool(value: str | None, default: bool) -> bool:
155
- if value is None:
156
- return default
157
- stripped = value.strip().lower()
158
- if stripped in ("true", "1", "yes"):
159
- return True
160
- if stripped in ("false", "0", "no"):
161
- return False
162
- return default
163
-
164
-
165
- def _parse_int(value: str | None, default: int) -> int:
166
- if value is None:
167
- return default
168
- try:
169
- return int(value.strip())
170
- except (ValueError, TypeError):
171
- return default
172
-
173
-
174
- def _canonical_tag(tag_name: str) -> str:
175
- """Return the canonical tag name for *tag_name* (alias-aware, lowercased)."""
176
- lower = tag_name.strip().lower()
177
- return _ALIAS_TO_CANONICAL.get(lower, lower)
178
-
179
-
180
- def _extract_tag_content(text: str, tag_name: str, stop_tags: List[str] | None = None, *, conservative: bool = False) -> str:
181
- """Extract the text content of ``<tag_name>…</tag_name>`` from *text*.
182
-
183
- Fault-tolerant strategies tried in order:
184
-
185
- 1. **Properly closed**: ``<tag>content</tag>``
186
- 2. **Unclosed at next sibling opening tag**: ``<tag>content<next_tag>…``
187
- 3. **Unclosed at ``</output>``**: ``<tag>content</output>``
188
- 4. **Self-closing**: ``<tag/>``
189
- 5. **Opening tag at end of string**: ``<tag>content$``
190
-
191
- Parameters:
192
- text: The text to search within (typically the body of ``<output>``).
193
- tag_name: The tag name to extract (case-insensitive).
194
- stop_tags: Sibling tag names that signal the end of this tag's
195
- content (used for unclosed-tag detection). Defaults to
196
- ``KNOWN_TOP_LEVEL_TAGS``.
197
- """
198
- if not text or not tag_name:
199
- return ""
200
-
201
- if stop_tags is None:
202
- stop_tags = KNOWN_TOP_LEVEL_TAGS
203
-
204
- tag_esc = re.escape(tag_name)
205
-
206
- # Strategy 1: Properly closed <tag>content</tag>
207
- m = re.search(
208
- rf"<{tag_esc}[^>]*>(.*?)</{tag_esc}\s*>",
209
- text,
210
- re.DOTALL | re.IGNORECASE,
211
- )
212
- if m:
213
- return html.unescape(m.group(1))
214
-
215
- # Conservative mode: only extract properly closed tags, skip all fallbacks
216
- if conservative:
217
- return ""
218
-
219
- # Strategy 2: Unclosed — content runs until the next opening/closing
220
- # sibling tag or </output>.
221
- sibling_names = [t for t in stop_tags if t.lower() != tag_name.lower()]
222
- if sibling_names:
223
- sibling_pat = "|".join(re.escape(t) for t in sibling_names)
224
- # CRITICAL: Wrap sibling_pat in (?:...) so that | doesn't split the
225
- # leading < or </ from the alternation. Without this, e.g.
226
- # "<a|b|c" is parsed as "<a" OR "b" OR "c" — NOT "<a" OR "<b" OR "<c".
227
- boundary = rf"(?:</output\s*>|<(?:{sibling_pat})\b|</(?:{sibling_pat})\s*>)"
228
- else:
229
- boundary = r"</output\s*>"
230
-
231
- m = re.search(
232
- rf"<{tag_esc}[^>]*>(.*?)({boundary})",
233
- text,
234
- re.DOTALL | re.IGNORECASE,
235
- )
236
- if m:
237
- return html.unescape(m.group(1))
238
-
239
- # Strategy 3: Self-closing <tag/> or <tag />
240
- m = re.search(rf"<{tag_esc}[^>]*/\s*>", text, re.IGNORECASE)
241
- if m:
242
- return ""
243
-
244
- # Strategy 4: Opening tag at end of text with no closing
245
- m = re.search(
246
- rf"<{tag_esc}[^>]*>(.*?)$",
247
- text,
248
- re.DOTALL | re.IGNORECASE,
249
- )
250
- if m:
251
- content = m.group(1).strip()
252
- # Only return if there's actual content (not just whitespace)
253
- if content:
254
- return html.unescape(content)
255
-
256
- return ""
257
-
258
-
259
- def _extract_all_tag_blocks(
260
- text: str,
261
- tag_name: str,
262
- parent_close_tag: str | None = None,
263
- *,
264
- conservative: bool = False,
265
- ) -> List[str]:
266
- """Extract all ``<tag_name>…`` blocks from *text*.
267
-
268
- Used for extracting multiple ``<tool>`` blocks from ``<toolstocal>``
269
- content. Handles both properly closed and unclosed blocks.
270
-
271
- Returns a list of content strings, one per block.
272
- """
273
- if not text:
274
- return []
275
-
276
- tag_esc = re.escape(tag_name)
277
- blocks: List[str] = []
278
-
279
- # Strategy 1: Find all properly closed <tag>content</tag> blocks
280
- properly_closed = re.findall(
281
- rf"<{tag_esc}[^>]*>(.*?)</{tag_esc}\s*>",
282
- text,
283
- re.DOTALL | re.IGNORECASE,
284
- )
285
- if properly_closed:
286
- return [html.unescape(b) for b in properly_closed]
287
-
288
- # Conservative mode: only extract properly closed blocks
289
- if conservative:
290
- return []
291
-
292
- # Strategy 2: Split by <tag> openings — each segment is a block
293
- positions = [
294
- m.end() for m in re.finditer(rf"<{tag_esc}[^>]*>", text, re.IGNORECASE)
295
- ]
296
-
297
- for i, content_start in enumerate(positions):
298
- if i + 1 < len(positions):
299
- # Block ends at next <tag> opening
300
- content_end = positions[i + 1]
301
- elif parent_close_tag:
302
- # Last block — ends at parent close tag
303
- close_m = re.search(
304
- re.escape(parent_close_tag),
305
- text[content_start:],
306
- re.IGNORECASE,
307
- )
308
- content_end = content_start + close_m.start() if close_m else len(text)
309
- else:
310
- content_end = len(text)
311
-
312
- blocks.append(html.unescape(text[content_start:content_end]))
313
-
314
- return blocks
315
-
316
-
317
- def _extract_output_body(raw_text: str) -> str | None:
318
- """Extract the content between ``<output>`` and ``</output>``.
319
-
320
- If ``</output>`` is missing (unclosed), returns everything after the
321
- opening ``<output>`` tag.
322
-
323
- Returns ``None`` if no ``<output>`` opening tag is found at all.
324
- """
325
- open_match = re.search(r"<output[^>]*>", raw_text, re.IGNORECASE)
326
- if open_match is None:
327
- return None
328
-
329
- content_start = open_match.end()
330
-
331
- close_match = re.search(
332
- r"</output\s*>",
333
- raw_text[content_start:],
334
- re.IGNORECASE,
335
- )
336
- if close_match:
337
- return raw_text[content_start : content_start + close_match.start()]
338
-
339
- # Unclosed <output> — take everything after it
340
- return raw_text[content_start:]
341
-
342
-
343
- def _strip_outer_noise(text: str) -> str:
344
- """Remove text that is outside any recognised XML tags.
345
-
346
- This handles the case where the LLM outputs plain text before or
347
- after the ``<output>`` block, e.g.::
348
-
349
- "我来使用 Python 脚本下载... <output>...</output>"
350
-
351
- The function returns the ``<output>…</output>`` body, or the original
352
- text if no output block is found.
353
- """
354
- if not text:
355
- return text
356
-
357
- body = _extract_output_body(text)
358
- if body is not None:
359
- return body
360
-
361
- # No <output> tag at all — check if there are any recognised tags
362
- has_tags = False
363
- for tag in KNOWN_TOP_LEVEL_TAGS:
364
- if re.search(rf"<{re.escape(tag)}[\s>]", text, re.IGNORECASE):
365
- has_tags = True
366
- break
367
-
368
- if has_tags:
369
- # Tags exist but no <output> wrapper — return as-is
370
- return text
371
-
372
- # No tags at all — return original (caller will set needs_correction)
373
- return text
374
-
375
-
376
- def is_output_block_complete(raw_text: str) -> bool:
377
- """Check if *raw_text* contains a properly closed ``<output>...</output>`` block.
378
-
379
- Returns:
380
- True if both ``<output>`` and ``</output>`` tags are present.
381
- False if neither tag, or only the opening tag, is found.
382
- """
383
- if not raw_text:
384
- return False
385
- open_m = re.search(r"<output[^>]*>", raw_text, re.IGNORECASE)
386
- if open_m is None:
387
- return False
388
- close_m = re.search(r"</output\s*>", raw_text[open_m.end():], re.IGNORECASE)
389
- return close_m is not None
390
-
391
-
392
- # ---------------------------------------------------------------------------
393
- # Core custom parser — NO xml.etree.ElementTree
394
- # ---------------------------------------------------------------------------
395
-
396
-
397
- def _custom_parse(raw_text: str) -> ParsedOutput:
398
- """Fully custom, regex-based XML parser with maximum fault tolerance.
399
-
400
- This function does NOT use ``xml.etree.ElementTree`` at all. Every
401
- extraction is done via regex patterns that handle malformed XML
402
- gracefully.
403
-
404
- Returns a :class:`ParsedOutput` with ``parse_success=True`` if at least
405
- one meaningful field was extracted, or ``needs_correction=True`` if
406
- nothing could be parsed.
407
- """
408
- parsed = ParsedOutput(raw_text=raw_text)
409
-
410
- if not raw_text or not raw_text.strip():
411
- parsed.needs_correction = True
412
- return parsed
413
-
414
- # ── Step 0: 检查 <output> 块,处理缺少开始/闭合标签的情况 ──
415
- _has_open = bool(re.search(r"<output[^>]*>", raw_text, re.IGNORECASE))
416
- _has_close = bool(re.search(r"</output\s*>", raw_text, re.IGNORECASE))
417
-
418
- if not _has_open and not _has_close:
419
- # 完全没有 <output> 标签 — 检查是否包含已知子标签
420
- _has_known_tags = any(
421
- re.search(rf"<{re.escape(t)}[\s>]", raw_text, re.IGNORECASE)
422
- for t in KNOWN_TOP_LEVEL_TAGS
423
- )
424
- if _has_known_tags:
425
- # 有子标签但缺少 <output> 包装 — 自动补全后正常解析
426
- logger.info(
427
- "LLM 输出缺少 <output> 标签但包含已知子标签,"
428
- "自动补全 <output> 包装后解析"
429
- )
430
- raw_text = "<output>\n" + raw_text.strip() + "\n</output>"
431
- parsed.output_block_complete = True
432
- else:
433
- parsed.output_block_complete = False
434
- elif _has_open and not _has_close:
435
- parsed.output_block_complete = False
436
- elif not _has_open and _has_close:
437
- # 有闭合标签但没开始标签 — 补全开始标签
438
- logger.info("LLM 输出缺少 <output> 开始标签但有 </output>,自动补全")
439
- raw_text = "<output>\n" + raw_text.strip()
440
- parsed.output_block_complete = True
441
- else:
442
- parsed.output_block_complete = True
443
-
444
- conservative = not parsed.output_block_complete
445
-
446
- if conservative:
447
- logger.warning(
448
- "XML <output> 块不完整(缺少 </output> 闭合标签),"
449
- "启用保守解析模式(仅提取完整闭合的标签)\n"
450
- "====== LLM 完整输出开始 ======\n"
451
- f"{raw_text}\n"
452
- "====== LLM 完整输出结束 ======"
453
- )
454
-
455
- # ── Step 1: Strip non-XML noise (text before/after <output>) ──
456
- body = _strip_outer_noise(raw_text)
457
-
458
- # ── Step 2: Extract each known top-level tag ──
459
-
460
- # usersays_correct
461
- raw_val = _extract_tag_content(body, "usersays_correct", conservative=conservative)
462
- parsed.usersays_correct = _safe_strip(raw_val)
463
-
464
- # task_plan [v1.34.5] 任务计划(Markdown格式)
465
- raw_val = _extract_tag_content(body, "task_plan", conservative=conservative)
466
- parsed.task_plan = _safe_strip(raw_val)
467
-
468
- # [v1.37] 不再提取 <response> — 统一使用 <reply>,<response> 标签直接剥离不保留
469
- # reply — 用户可见文本(唯一回复来源)
470
- # [v1.38] 保守模式下 <reply> 仍尝试宽松提取 — LLM 输出截断时 <reply> 常不完整但包含重要内容
471
- raw_val = _extract_tag_content(body, "reply", conservative=conservative)
472
- if not raw_val.strip() and conservative:
473
- # 保守模式未提取到闭合的 <reply>,尝试宽松模式(允许未闭合标签)
474
- raw_val = _extract_tag_content(body, "reply", conservative=False)
475
- if raw_val.strip():
476
- logger.info("保守模式下 <reply> 未闭合但通过宽松提取恢复内容")
477
- parsed.reply = _safe_strip(raw_val)
478
-
479
- # recall
480
- raw_val = _extract_tag_content(body, "recall", conservative=conservative)
481
- parsed.recall = _safe_strip(raw_val)
482
-
483
- # knowledge
484
- raw_val = _extract_tag_content(body, "knowledge", conservative=conservative)
485
- parsed.knowledge = _safe_strip(raw_val)
486
-
487
- # get_knowledge
488
- raw_val = _extract_tag_content(body, "get_knowledge", conservative=conservative)
489
- parsed.get_knowledge = _safe_strip(raw_val)
490
-
491
-
492
-
493
- # mainsubject [v1.15.8] 会话标题自动命名
494
- raw_val = _extract_tag_content(body, "mainsubject", conservative=conservative)
495
- parsed.mainsubject = _safe_strip(raw_val)
496
-
497
- # ── Step 3: Parse <remember> (may contain <type> and <content>) ──
498
- remember_raw = _extract_tag_content(body, "remember", conservative=conservative)
499
- if remember_raw.strip():
500
- # Try structured format: <type>global</type><content>...</content>
501
- type_val = _extract_tag_content(remember_raw, "type", REMEMBER_INNER_TAGS, conservative=conservative)
502
- content_val = _extract_tag_content(remember_raw, "content", REMEMBER_INNER_TAGS, conservative=conservative)
503
-
504
- if content_val.strip():
505
- mem_type = _safe_strip(type_val) or "session"
506
- if mem_type not in ("global", "session"):
507
- mem_type = "session"
508
- parsed.remember = _safe_strip(content_val)
509
- parsed.remember_type = mem_type
510
- else:
511
- # Legacy plain-text format
512
- parsed.remember = _safe_strip(remember_raw)
513
- parsed.remember_type = "session"
514
-
515
- # ── Step 4: Parse <toolstocal> → list of tool dicts ──
516
- toolstocal_raw = _extract_tag_content(body, "toolstocal", conservative=conservative)
517
- if toolstocal_raw.strip():
518
- parsed.tools_to_call = _parse_toolstocal(toolstocal_raw, conservative=conservative)
519
-
520
- # ── Step 4.5: 兜底机制 — 宽松提取工具调用,确保执行不会因解析错误而中断 ──
521
- # 策略优先级:
522
- # 1. _parse_toolstocal 已成功提取 → 不做任何事
523
- # 2. 直接在整个输出中搜索 <tool>...</tool> 块(跳过 toolstocal 包装)
524
- # 3. 搜索散落的 <toolname>...</toolname> + <parms>...</parms> 配对
525
- if not parsed.tools_to_call:
526
- # 兜底 Level 1: 在整个原始文本中直接搜索 <tool> 块
527
- _raw_tool_blocks = _extract_all_tag_blocks(
528
- raw_text, "tool", parent_close_tag=None, conservative=False,
529
- )
530
- for block in _raw_tool_blocks:
531
- tn = _safe_strip(_extract_tag_content(block, "toolname", TOOL_INNER_TAGS))
532
- if tn:
533
- parsed.tools_to_call.append({
534
- "toolname": tn,
535
- "parms": _safe_strip(_extract_tag_content(block, "parms", TOOL_INNER_TAGS)),
536
- "timeout": _parse_int(_extract_tag_content(block, "timeout", TOOL_INNER_TAGS), _DEFAULT_TIMEOUT),
537
- })
538
- logger.info(f"[兜底L1] 从非<toolstocal>区域提取到工具调用: {tn}")
539
-
540
- if not parsed.tools_to_call:
541
- # 兜底 Level 2: 搜索散落的 <toolname>...</toolname>,然后在同一段中找最近的 <parms>
542
- _toolname_positions = []
543
- for m in re.finditer(r"<toolname[^>]*>(.*?)</toolname\s*>", raw_text, re.DOTALL | re.IGNORECASE):
544
- tn = html.unescape(m.group(1)).strip()
545
- if tn:
546
- _toolname_positions.append((m.start(), m.end(), tn))
547
-
548
- if _toolname_positions:
549
- logger.info(f"[兜底L2] 找到 {len(_toolname_positions)} 个散落的 <toolname> 标签")
550
- for _i, (_start, _end, _tn) in enumerate(_toolname_positions):
551
- # 在 toolname 之后的 500 字符内搜索最近的 <parms>
552
- _search_region = raw_text[_end:_end + 500]
553
- _parms_match = re.search(
554
- r"<parms[^>]*>(.*?)</parms\s*>",
555
- _search_region, re.DOTALL | re.IGNORECASE,
556
- )
557
- _parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
558
-
559
- # 也尝试在 toolname 之前的 200 字符内搜索(parms 可能在 toolname 前面)
560
- if not _parms:
561
- _pre_region = raw_text[max(0, _start - 200):_start]
562
- _parms_match = re.search(
563
- r"<parms[^>]*>(.*?)</parms\s*>",
564
- _pre_region, re.DOTALL | re.IGNORECASE,
565
- )
566
- _parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
567
-
568
- parsed.tools_to_call.append({
569
- "toolname": _tn,
570
- "parms": _parms,
571
- "timeout": _DEFAULT_TIMEOUT,
572
- })
573
- logger.info(f"[兜底L2] 散落提取工具: {_tn}, parms={'有' if _parms else '无'}")
574
-
575
- # ── Step 5: Determine parse success ──
576
- has_content = bool(
577
- parsed.reply
578
- or parsed.usersays_correct
579
- or parsed.tools_to_call
580
- or parsed.remember
581
- or parsed.recall
582
- or parsed.knowledge
583
- or parsed.get_knowledge
584
- )
585
-
586
- if has_content:
587
- parsed.parse_success = True
588
- else:
589
- # Nothing was extracted — check if there's any raw text that could
590
- # be a response (the LLM might have skipped XML entirely)
591
- cleaned = raw_text.strip()
592
- # Remove any residual XML tags
593
- cleaned_no_tags = re.sub(r"<[^>]+>", "", cleaned).strip()
594
- if cleaned_no_tags:
595
- # The LLM output something but not in XML format
596
- # Treat the entire output as a response
597
- parsed.reply = cleaned_no_tags
598
- parsed.parse_success = True
599
- logger.info(
600
- f"XML解析未提取到结构化字段,将原始文本(去除标签后)作为reply: "
601
- f"{cleaned_no_tags[:100]}..."
602
- )
603
- else:
604
- # Complete parse failure
605
- parsed.needs_correction = True
606
- logger.warning(
607
- f"XML解析完全失败,需要LLM修正。原始输出前200字符: {raw_text[:200]}"
608
- )
609
-
610
- return parsed
611
-
612
-
613
- def _parse_toolstocal(toolstocal_content: str, *, conservative: bool = False) -> List[Dict[str, Any]]:
614
- """Parse ``<toolstocal>`` body into a list of tool descriptors."""
615
- tools: List[Dict[str, Any]] = []
616
-
617
- tool_blocks = _extract_all_tag_blocks(
618
- toolstocal_content, "tool", parent_close_tag="</toolstocal>",
619
- conservative=conservative,
620
- )
621
-
622
- for block in tool_blocks:
623
- tool: Dict[str, Any] = {
624
- "toolname": _safe_strip(
625
- _extract_tag_content(block, "toolname", TOOL_INNER_TAGS, conservative=conservative)
626
- ),
627
- "parms": _safe_strip(
628
- _extract_tag_content(block, "parms", TOOL_INNER_TAGS, conservative=conservative)
629
- ),
630
- "timeout": _parse_int(
631
- _extract_tag_content(block, "timeout", TOOL_INNER_TAGS, conservative=conservative),
632
- _DEFAULT_TIMEOUT,
633
- ),
634
- }
635
- # Only add if toolname is present
636
- if tool["toolname"]:
637
- tools.append(tool)
638
-
639
- return tools
640
-
641
-
642
- # ---------------------------------------------------------------------------
643
- # Public API
644
- # ---------------------------------------------------------------------------
645
-
646
-
647
- def parse_output(raw_text: str) -> ParsedOutput:
648
- """Parse the LLM's raw response into a :class:`ParsedOutput`.
649
-
650
- This function uses a **fully custom regex-based parser** (no
651
- ``xml.etree.ElementTree``) for maximum fault tolerance.
652
-
653
- If the custom parser cannot extract any meaningful content, it falls
654
- back to treating the raw text as a plain response. Only if even that
655
- fails does it set ``needs_correction=True``, signalling the caller to
656
- ask the LLM to re-format its output.
657
-
658
- Parameters:
659
- raw_text: The complete text returned by the LLM.
660
-
661
- Returns:
662
- A :class:`ParsedOutput` instance.
663
- """
664
- if not raw_text:
665
- return ParsedOutput(raw_text=raw_text, needs_correction=True)
666
-
667
- return _custom_parse(raw_text)
668
-
669
-
670
- def extract_surrounding_text(full_text: str) -> tuple[str, str]:
671
- """Split *full_text* around the ``<output>…</output>`` block.
672
-
673
- Returns:
674
- A ``(text_before_xml, text_after_xml)`` tuple. Both parts are
675
- stripped. If no ``<output>`` block is found the original text
676
- becomes *text_before_xml* and *text_after_xml* is ``""``.
677
- """
678
- open_match = re.search(r"<output[^>]*>", full_text, re.IGNORECASE)
679
- if open_match is None:
680
- return full_text.strip(), ""
681
-
682
- text_before = full_text[: open_match.start()].strip()
683
-
684
- rest = full_text[open_match.end() :]
685
- close_match = re.search(r"</output\s*>", rest, re.IGNORECASE)
686
- if close_match is None:
687
- text_after = rest.strip()
688
- else:
689
- text_after = rest[close_match.end() :].strip()
690
-
691
- return text_before, text_after
692
-
693
-
694
- # ---------------------------------------------------------------------------
695
- # Validation
696
- # ---------------------------------------------------------------------------
697
-
698
-
699
- def validate_output(parsed: ParsedOutput) -> list[str]:
700
- """Validate a :class:`ParsedOutput` and return a list of warnings.
701
-
702
- An empty list means no issues were detected. Warnings are non-fatal
703
- hints that the calling code may log or present to the user.
704
- """
705
- warnings: list[str] = []
706
-
707
- # --- Tool-level checks ---
708
- for idx, tool in enumerate(parsed.tools_to_call):
709
- prefix = f"tool[{idx}]"
710
-
711
- if not tool.get("toolname"):
712
- warnings.append(f"{prefix}: missing 'toolname'")
713
-
714
- timeout = tool.get("timeout", _DEFAULT_TIMEOUT)
715
- if isinstance(timeout, int) and timeout <= 0:
716
- warnings.append(
717
- f"{prefix}: timeout={timeout} is not positive; "
718
- f"defaulting to {_DEFAULT_TIMEOUT}s"
719
- )
720
-
721
- if tool.get("toolname") and not tool.get("parms"):
722
- warnings.append(
723
- f"{prefix} ('{tool['toolname']}'): 'parms' is empty — "
724
- "verify the tool requires no parameters"
725
- )
726
-
727
- # --- Semantic checks ---
728
- # [v1.36] askuser/finish/finish_reason 已废弃,移除相关校验
729
-
730
- return warnings