myagent-ai 1.26.4 → 1.26.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/main_agent.py +101 -82
- package/config.py +2 -2
- package/core/llm.py +23 -2
- package/core/output_parser.py +87 -2
- package/main.py +0 -11
- package/package.json +1 -1
- package/scripts/cli.py +1 -124
- package//"docs/351205215347275256344275277347224250350257264346230216.md/" +0 -0
- package/skills/file_skill.py +0 -573
package/agents/main_agent.py
CHANGED
|
@@ -45,7 +45,7 @@ class MainAgent(BaseAgent):
|
|
|
45
45
|
|
|
46
46
|
严格以XML格式化输出以下内容,否则解析器无法解析:"""
|
|
47
47
|
|
|
48
|
-
# XML
|
|
48
|
+
# XML 输出格式规范(嵌入 SYSTEM_PROMPT,只出现一次)
|
|
49
49
|
xml_prompt = """
|
|
50
50
|
|
|
51
51
|
<output>
|
|
@@ -756,7 +756,6 @@ GUI桌面 (仅Windows/macOS):
|
|
|
756
756
|
+ self.SYSTEM_PROMPT.split("\n", 1)[1]
|
|
757
757
|
)
|
|
758
758
|
system_content = _prompt_with_placeholder.replace(_CONTEXT_PLACEHOLDER, context_xml)
|
|
759
|
-
system_content = system_content + "\n最后,再检查输出格式,确保满足以下要求:" + self.xml_prompt
|
|
760
759
|
|
|
761
760
|
# Step 3: 构建 LLM 消息(必须包含 role=user,否则 OpenAI 兼容 API 返回 400)
|
|
762
761
|
messages = [Message(role="system", content=system_content)]
|
|
@@ -875,7 +874,13 @@ GUI桌面 (仅Windows/macOS):
|
|
|
875
874
|
break
|
|
876
875
|
|
|
877
876
|
llm_raw = response.content
|
|
877
|
+
_finish_reason = getattr(response, 'finish_reason', '') or ''
|
|
878
878
|
logger.debug(f"[{task_id}] LLM 输出 (前500字): {llm_raw[:500]}")
|
|
879
|
+
if _finish_reason == "length":
|
|
880
|
+
logger.warning(
|
|
881
|
+
f"[{task_id}] LLM 输出被 max_tokens 截断 (finish_reason=length),"
|
|
882
|
+
f"输出长度={len(llm_raw)} 字符"
|
|
883
|
+
)
|
|
879
884
|
|
|
880
885
|
# 保存 LLM 原始输出到会话记忆(用于回溯和审计,key=llm_output 不出现在对话历史中)
|
|
881
886
|
if self.memory:
|
|
@@ -918,137 +923,151 @@ GUI桌面 (仅Windows/macOS):
|
|
|
918
923
|
stream_callback,
|
|
919
924
|
)
|
|
920
925
|
|
|
921
|
-
# Step 4.2: <output> 块完整性检查
|
|
922
|
-
#
|
|
923
|
-
#
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
926
|
+
# Step 4.2: <output> 块完整性检查 + finish_reason 截断检测
|
|
927
|
+
# 统一处理三种异常:
|
|
928
|
+
# A) finish_reason=length → max_tokens 截断
|
|
929
|
+
# B) 缺少 <output> 开始标签
|
|
930
|
+
# C) 缺少 </output> 闭合标签
|
|
931
|
+
_has_output_open = bool(re.search(r"<output[^>]*>", llm_raw, re.IGNORECASE))
|
|
932
|
+
_has_output_close = bool(re.search(r"</output\s*>", llm_raw, re.IGNORECASE))
|
|
933
|
+
_is_truncated = (_finish_reason == "length")
|
|
934
|
+
_block_incomplete = (not _has_output_open or not _has_output_close)
|
|
935
|
+
|
|
936
|
+
if _is_truncated or (_block_incomplete and not parsed.parse_success):
|
|
937
|
+
# 构建诊断信息
|
|
938
|
+
_diag_parts = []
|
|
939
|
+
if _is_truncated:
|
|
940
|
+
_diag_parts.append("finish_reason=length(max_tokens截断)")
|
|
941
|
+
if not _has_output_open:
|
|
942
|
+
_diag_parts.append("缺少<output>开始标签")
|
|
943
|
+
if not _has_output_close and _has_output_open:
|
|
944
|
+
_diag_parts.append("缺少</output>闭合标签")
|
|
945
|
+
_diag_msg = ", ".join(_diag_parts)
|
|
946
|
+
|
|
935
947
|
logger.warning(
|
|
936
|
-
f"[{task_id}]
|
|
937
|
-
f"
|
|
948
|
+
f"[{task_id}] XML 输出异常: {_diag_msg},"
|
|
949
|
+
f"parse_success={parsed.parse_success}, "
|
|
950
|
+
f"输出长度={len(llm_raw)} 字符"
|
|
938
951
|
)
|
|
939
|
-
|
|
952
|
+
|
|
953
|
+
if _xml_correction_retries < 3:
|
|
940
954
|
_xml_correction_retries += 1
|
|
941
|
-
#
|
|
942
|
-
# 防止 reasoning 模型因 max_tokens 截断导致内容完全丢失
|
|
955
|
+
# 尝试从不完整输出中提取已有内容,防止内容完全丢失
|
|
943
956
|
_partial_text = self._try_extract_partial_response(llm_raw)
|
|
944
957
|
if _partial_text:
|
|
945
958
|
logger.info(f"[{task_id}] 从不完整XML中提取到部分回复: {_partial_text[:100]}")
|
|
946
959
|
_v2_reasoning_collected.append(_partial_text)
|
|
947
960
|
_emitted_reasoning_this_iter = True
|
|
948
961
|
await self._emit_v2_event("v2_reasoning", {"content": _partial_text}, stream_callback)
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
962
|
+
|
|
963
|
+
# 根据诊断信息构建精准修正提示词
|
|
964
|
+
if _is_truncated:
|
|
965
|
+
correction_prompt = (
|
|
966
|
+
"你的上一次输出因长度限制被截断,XML块不完整。\n"
|
|
967
|
+
"请在回复中只包含精简的核心内容,避免冗长描述。\n"
|
|
968
|
+
"如果需要执行工具调用,只保留最关键的工具。\n"
|
|
969
|
+
"严格按照 <output>...</output> 格式输出,确保标签完整闭合。\n\n"
|
|
970
|
+
f"你上一次被截断的输出如下(供参考,不要照搬):\n{llm_raw[-2000:]}"
|
|
971
|
+
)
|
|
972
|
+
elif not _has_output_open:
|
|
973
|
+
correction_prompt = (
|
|
974
|
+
"你的输出缺少 <output> 开始标签,解析器无法识别。\n"
|
|
975
|
+
"请严格按照 <output>...</output> 格式重新输出,"
|
|
976
|
+
"确保以 <output> 开头、以 </output> 结尾。\n"
|
|
977
|
+
"不要在 <output> 标签前后输出任何其他文字。\n\n"
|
|
978
|
+
f"你上一次的原始输出如下:\n{llm_raw[-2000:]}"
|
|
979
|
+
)
|
|
980
|
+
else:
|
|
981
|
+
correction_prompt = (
|
|
982
|
+
"你的输出缺少 </output> 闭合标签,XML块不完整,"
|
|
983
|
+
"解析器不会处理不完整的块。\n"
|
|
984
|
+
"请严格按照 <output>...</output> 格式重新输出,"
|
|
985
|
+
"确保所有标签正确闭合。\n\n"
|
|
986
|
+
f"你上一次的原始输出如下:\n{llm_raw[-2000:]}"
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
# 追加到 history 时使用截断版本,防止 history 膨胀
|
|
956
990
|
conversation_history.append(
|
|
957
|
-
Message(role="assistant", content=llm_raw)
|
|
991
|
+
Message(role="assistant", content=llm_raw[-3000:] if len(llm_raw) > 3000 else llm_raw)
|
|
958
992
|
)
|
|
959
993
|
conversation_history.append(
|
|
960
994
|
Message(role="user", content=correction_prompt)
|
|
961
995
|
)
|
|
962
996
|
await self._emit_v2_event(
|
|
963
997
|
"v2_reasoning",
|
|
964
|
-
{"content": "⚠️
|
|
998
|
+
{"content": f"⚠️ 模型输出异常({_diag_msg}),正在自动修正({_xml_correction_retries}/3)..."},
|
|
965
999
|
stream_callback,
|
|
966
1000
|
)
|
|
967
1001
|
continue # 重新进入循环,让 LLM 重新生成
|
|
968
1002
|
else:
|
|
969
|
-
#
|
|
970
|
-
logger.warning(f"[{task_id}] XML
|
|
971
|
-
|
|
1003
|
+
# 已重试 3 次 → 不终止,提取已有内容作为 response 继续执行
|
|
1004
|
+
logger.warning(f"[{task_id}] XML输出异常已重试3次,提取已有内容继续执行: {_diag_msg}")
|
|
1005
|
+
_fallback_save = ""
|
|
972
1006
|
if _v2_reasoning_collected:
|
|
973
1007
|
_fallback_save = "\n".join(_v2_reasoning_collected)
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
"v2_reasoning",
|
|
984
|
-
{"content": _fallback_save},
|
|
985
|
-
stream_callback,
|
|
986
|
-
)
|
|
987
|
-
else:
|
|
988
|
-
context.working_memory["final_response"] = "模型输出格式异常,请重新尝试。"
|
|
989
|
-
await self._emit_v2_event(
|
|
990
|
-
"v2_reasoning",
|
|
991
|
-
{"content": "模型输出格式异常,已自动终止。"},
|
|
992
|
-
stream_callback,
|
|
993
|
-
)
|
|
994
|
-
break
|
|
995
|
-
elif not parsed.output_block_complete and parsed.parse_success:
|
|
1008
|
+
if not _fallback_save.strip():
|
|
1009
|
+
before, after = extract_surrounding_text(llm_raw)
|
|
1010
|
+
_fallback_save = ((before + "\n" + after) if (before.strip() or after.strip()) else re.sub(r"<[^>]+>", "", llm_raw).strip())
|
|
1011
|
+
_fallback_save = _fallback_save.strip() if _fallback_save.strip() else "处理完毕。"
|
|
1012
|
+
parsed.parse_success = True
|
|
1013
|
+
parsed.response = _fallback_save
|
|
1014
|
+
parsed.finish = True
|
|
1015
|
+
parsed.finish_reason = f"输出异常({_diag_msg}),已提取内容继续执行"
|
|
1016
|
+
elif _block_incomplete and parsed.parse_success:
|
|
996
1017
|
# 块不完整但已提取到有效内容,记录日志但正常继续
|
|
997
1018
|
logger.info(
|
|
998
1019
|
f"[{task_id}] <output> 块不完整但已提取到有效内容,跳过修正直接处理"
|
|
999
1020
|
)
|
|
1000
1021
|
|
|
1001
|
-
# Step 4.5: 解析失败处理 —
|
|
1022
|
+
# Step 4.5: 解析失败处理 — 三层兜底,确保执行不会因解析错误而中断
|
|
1023
|
+
# 兜底策略(由 output_parser Step 4.5 已完成 L1/L2,此处处理 L3):
|
|
1024
|
+
# L1 (parser): 直接搜索 <tool> 块(跳过 toolstocal 包装)
|
|
1025
|
+
# L2 (parser): 搜索散落的 <toolname>+<parms> 配对
|
|
1026
|
+
# L3 (此处): 将原始文本回退给 LLM 修正格式,或提取为纯文本继续
|
|
1002
1027
|
if not parsed.parse_success:
|
|
1003
|
-
# 即使解析失败,如果 regex fallback 提取到了工具调用,仍然继续执行
|
|
1004
1028
|
if parsed.tools_to_call:
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1029
|
+
# L1/L2 兜底成功提取到了工具调用 → 标记为成功,继续执行工具
|
|
1030
|
+
parsed.parse_success = True
|
|
1031
|
+
logger.warning(f"[{task_id}] 解析兜底成功: 提取到 {len(parsed.tools_to_call)} 个工具调用,继续执行")
|
|
1032
|
+
elif parsed.needs_correction and _xml_correction_retries < 3:
|
|
1033
|
+
# L3 兜底: 将原始文本回退给 LLM 让它调好格式再输出
|
|
1008
1034
|
_xml_correction_retries += 1
|
|
1009
1035
|
logger.warning(
|
|
1010
|
-
f"[{task_id}]
|
|
1011
|
-
f"(重试 {_xml_correction_retries}/
|
|
1036
|
+
f"[{task_id}] 解析兜底L3: 回退给LLM修正格式 "
|
|
1037
|
+
f"(重试 {_xml_correction_retries}/3)"
|
|
1012
1038
|
)
|
|
1013
1039
|
correction_prompt = (
|
|
1014
|
-
"你上一次的输出格式有误,XML
|
|
1015
|
-
"请严格按照 <output>...</output>
|
|
1016
|
-
"
|
|
1017
|
-
|
|
1040
|
+
"你上一次的输出格式有误,XML解析器无法识别。\n"
|
|
1041
|
+
"请严格按照 <output>...</output> 格式重新输出你的回答。\n"
|
|
1042
|
+
"注意:以 <output> 开头,以 </output> 结尾,"
|
|
1043
|
+
"不要在 <output> 标签前后输出任何其他文字。\n\n"
|
|
1044
|
+
f"你上一次的原始输出如下:\n{llm_raw[-2000:]}"
|
|
1018
1045
|
)
|
|
1019
1046
|
conversation_history.append(
|
|
1020
|
-
Message(role="assistant", content=llm_raw)
|
|
1047
|
+
Message(role="assistant", content=llm_raw[-3000:] if len(llm_raw) > 3000 else llm_raw)
|
|
1021
1048
|
)
|
|
1022
1049
|
conversation_history.append(
|
|
1023
1050
|
Message(role="user", content=correction_prompt)
|
|
1024
1051
|
)
|
|
1025
1052
|
await self._emit_v2_event(
|
|
1026
1053
|
"v2_reasoning",
|
|
1027
|
-
{"content": "⚠️
|
|
1054
|
+
{"content": f"⚠️ 输出格式异常,正在自动修正({_xml_correction_retries}/3)..."},
|
|
1028
1055
|
stream_callback,
|
|
1029
1056
|
)
|
|
1030
1057
|
continue # 重新进入循环,让 LLM 重新生成
|
|
1031
1058
|
else:
|
|
1032
|
-
#
|
|
1033
|
-
logger.warning(f"[{task_id}]
|
|
1059
|
+
# 所有兜底都失败 → 把原始文本去除标签后作为 response,不中断执行
|
|
1060
|
+
logger.warning(f"[{task_id}] 所有解析兜底失败,提取原始文本作为response继续执行")
|
|
1034
1061
|
before, after = extract_surrounding_text(llm_raw)
|
|
1035
1062
|
if before.strip() or after.strip():
|
|
1036
1063
|
final_text = (before + "\n" + after).strip()
|
|
1037
1064
|
else:
|
|
1038
|
-
# 清除残余 XML 标签后作为纯文本
|
|
1039
1065
|
final_text = re.sub(r"<[^>]+>", "", llm_raw).strip()
|
|
1040
1066
|
final_text = final_text if final_text else "处理完毕。"
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
if self.memory:
|
|
1046
|
-
self.memory.add_session(
|
|
1047
|
-
session_id=context.session_id,
|
|
1048
|
-
role="assistant",
|
|
1049
|
-
content=final_text,
|
|
1050
|
-
)
|
|
1051
|
-
break
|
|
1067
|
+
parsed.parse_success = True
|
|
1068
|
+
parsed.response = final_text
|
|
1069
|
+
parsed.finish = True
|
|
1070
|
+
parsed.finish_reason = "输出格式异常,已提取文本内容作为最终回复"
|
|
1052
1071
|
|
|
1053
1072
|
warnings = validate_output(parsed)
|
|
1054
1073
|
for w in warnings:
|
package/config.py
CHANGED
|
@@ -28,7 +28,7 @@ class LLMConfig:
|
|
|
28
28
|
base_url: str = "https://api.openai.com/v1"
|
|
29
29
|
model: str = "gpt-4"
|
|
30
30
|
temperature: float = 0.1
|
|
31
|
-
max_tokens: int =
|
|
31
|
+
max_tokens: int = 30000
|
|
32
32
|
context_window: int = 128000 # 上下文窗口大小
|
|
33
33
|
input_modes: List[str] = field(default_factory=lambda: ["text"]) # 支持的输入模式
|
|
34
34
|
reasoning: bool = False # 是否支持推理
|
|
@@ -94,7 +94,7 @@ class ModelEntry:
|
|
|
94
94
|
model: str = "" # API 调用使用的实际模型字符串
|
|
95
95
|
base_url: str = "" # 自定义 Base URL(空=使用 provider 默认值)
|
|
96
96
|
api_key: str = "" # 专用 API Key(空=使用全局默认值)
|
|
97
|
-
max_tokens: int =
|
|
97
|
+
max_tokens: int = 30000
|
|
98
98
|
temperature: float = 0.1
|
|
99
99
|
context_window: int = 128000 # 上下文窗口大小(token)
|
|
100
100
|
input_modes: List[str] = field(default_factory=lambda: ["text"]) # 支持的输入模式: text, image, video, audio
|
package/core/llm.py
CHANGED
|
@@ -133,7 +133,7 @@ class LLMClient:
|
|
|
133
133
|
base_url: str = "",
|
|
134
134
|
model: str = "gpt-4",
|
|
135
135
|
temperature: float = 0.1,
|
|
136
|
-
max_tokens: int =
|
|
136
|
+
max_tokens: int = 30000,
|
|
137
137
|
timeout: int = 120,
|
|
138
138
|
max_retries: int = 3,
|
|
139
139
|
reasoning: bool = False,
|
|
@@ -567,13 +567,26 @@ class LLMClient:
|
|
|
567
567
|
"arguments": args,
|
|
568
568
|
})
|
|
569
569
|
|
|
570
|
+
# 提取推理模型的思考过程 (DeepSeek-R1, o1, o3, QwQ 等)
|
|
571
|
+
_reasoning = ""
|
|
572
|
+
_content = choice.message.content or ""
|
|
573
|
+
# 尝试多种属性名(不同 API 提供商的命名差异)
|
|
574
|
+
for _attr in ("reasoning_content", "reasoning", "thinking"):
|
|
575
|
+
_reasoning = getattr(choice.message, _attr, None) or ""
|
|
576
|
+
if _reasoning:
|
|
577
|
+
break
|
|
578
|
+
# 如果 content 为空但 reasoning 有内容,用 reasoning 作为 content
|
|
579
|
+
if not _content.strip() and _reasoning.strip():
|
|
580
|
+
_content = _reasoning
|
|
581
|
+
|
|
570
582
|
return LLMResponse(
|
|
571
|
-
content=
|
|
583
|
+
content=_content,
|
|
572
584
|
tool_calls=tool_calls,
|
|
573
585
|
usage=usage,
|
|
574
586
|
model=response.model,
|
|
575
587
|
finish_reason=choice.finish_reason or "",
|
|
576
588
|
raw_response=response,
|
|
589
|
+
reasoning=_reasoning,
|
|
577
590
|
)
|
|
578
591
|
|
|
579
592
|
async def _chat_anthropic(self, messages: List[Message], kwargs: dict) -> LLMResponse:
|
|
@@ -604,9 +617,16 @@ class LLMClient:
|
|
|
604
617
|
)
|
|
605
618
|
|
|
606
619
|
content = ""
|
|
620
|
+
_reasoning = ""
|
|
607
621
|
for block in response.content:
|
|
608
622
|
if block.type == "text":
|
|
609
623
|
content += block.text
|
|
624
|
+
elif block.type == "thinking":
|
|
625
|
+
_reasoning += getattr(block, "thinking", "") or ""
|
|
626
|
+
|
|
627
|
+
# 如果 content 为空但 reasoning 有内容,用 reasoning 作为 content
|
|
628
|
+
if not content.strip() and _reasoning.strip():
|
|
629
|
+
content = _reasoning
|
|
610
630
|
|
|
611
631
|
return LLMResponse(
|
|
612
632
|
content=content,
|
|
@@ -616,6 +636,7 @@ class LLMClient:
|
|
|
616
636
|
},
|
|
617
637
|
model=response.model,
|
|
618
638
|
finish_reason=response.stop_reason or "",
|
|
639
|
+
reasoning=_reasoning,
|
|
619
640
|
)
|
|
620
641
|
|
|
621
642
|
async def _chat_ollama(self, kwargs: dict) -> LLMResponse:
|
package/core/output_parser.py
CHANGED
|
@@ -429,8 +429,36 @@ def _custom_parse(raw_text: str) -> ParsedOutput:
|
|
|
429
429
|
parsed.needs_correction = True
|
|
430
430
|
return parsed
|
|
431
431
|
|
|
432
|
-
# ── Step 0:
|
|
433
|
-
|
|
432
|
+
# ── Step 0: 检查 <output> 块,处理缺少开始/闭合标签的情况 ──
|
|
433
|
+
_has_open = bool(re.search(r"<output[^>]*>", raw_text, re.IGNORECASE))
|
|
434
|
+
_has_close = bool(re.search(r"</output\s*>", raw_text, re.IGNORECASE))
|
|
435
|
+
|
|
436
|
+
if not _has_open and not _has_close:
|
|
437
|
+
# 完全没有 <output> 标签 — 检查是否包含已知子标签
|
|
438
|
+
_has_known_tags = any(
|
|
439
|
+
re.search(rf"<{re.escape(t)}[\s>]", raw_text, re.IGNORECASE)
|
|
440
|
+
for t in KNOWN_TOP_LEVEL_TAGS
|
|
441
|
+
)
|
|
442
|
+
if _has_known_tags:
|
|
443
|
+
# 有子标签但缺少 <output> 包装 — 自动补全后正常解析
|
|
444
|
+
logger.info(
|
|
445
|
+
"LLM 输出缺少 <output> 标签但包含已知子标签,"
|
|
446
|
+
"自动补全 <output> 包装后解析"
|
|
447
|
+
)
|
|
448
|
+
raw_text = "<output>\n" + raw_text.strip() + "\n</output>"
|
|
449
|
+
parsed.output_block_complete = True
|
|
450
|
+
else:
|
|
451
|
+
parsed.output_block_complete = False
|
|
452
|
+
elif _has_open and not _has_close:
|
|
453
|
+
parsed.output_block_complete = False
|
|
454
|
+
elif not _has_open and _has_close:
|
|
455
|
+
# 有闭合标签但没开始标签 — 补全开始标签
|
|
456
|
+
logger.info("LLM 输出缺少 <output> 开始标签但有 </output>,自动补全")
|
|
457
|
+
raw_text = "<output>\n" + raw_text.strip()
|
|
458
|
+
parsed.output_block_complete = True
|
|
459
|
+
else:
|
|
460
|
+
parsed.output_block_complete = True
|
|
461
|
+
|
|
434
462
|
conservative = not parsed.output_block_complete
|
|
435
463
|
|
|
436
464
|
if conservative:
|
|
@@ -520,6 +548,63 @@ def _custom_parse(raw_text: str) -> ParsedOutput:
|
|
|
520
548
|
if toolstocal_raw.strip():
|
|
521
549
|
parsed.tools_to_call = _parse_toolstocal(toolstocal_raw, conservative=conservative)
|
|
522
550
|
|
|
551
|
+
# ── Step 4.5: 兜底机制 — 宽松提取工具调用,确保执行不会因解析错误而中断 ──
|
|
552
|
+
# 策略优先级:
|
|
553
|
+
# 1. _parse_toolstocal 已成功提取 → 不做任何事
|
|
554
|
+
# 2. 直接在整个输出中搜索 <tool>...</tool> 块(跳过 toolstocal 包装)
|
|
555
|
+
# 3. 搜索散落的 <toolname>...</toolname> + <parms>...</parms> 配对
|
|
556
|
+
if not parsed.tools_to_call:
|
|
557
|
+
# 兜底 Level 1: 在整个原始文本中直接搜索 <tool> 块
|
|
558
|
+
_raw_tool_blocks = _extract_all_tag_blocks(
|
|
559
|
+
raw_text, "tool", parent_close_tag=None, conservative=False,
|
|
560
|
+
)
|
|
561
|
+
for block in _raw_tool_blocks:
|
|
562
|
+
tn = _safe_strip(_extract_tag_content(block, "toolname", TOOL_INNER_TAGS))
|
|
563
|
+
if tn:
|
|
564
|
+
parsed.tools_to_call.append({
|
|
565
|
+
"beforecalltext": _safe_strip(_extract_tag_content(block, "beforecalltext", TOOL_INNER_TAGS)),
|
|
566
|
+
"toolname": tn,
|
|
567
|
+
"parms": _safe_strip(_extract_tag_content(block, "parms", TOOL_INNER_TAGS)),
|
|
568
|
+
"timeout": _parse_int(_extract_tag_content(block, "timeout", TOOL_INNER_TAGS), _DEFAULT_TIMEOUT),
|
|
569
|
+
})
|
|
570
|
+
logger.info(f"[兜底L1] 从非<toolstocal>区域提取到工具调用: {tn}")
|
|
571
|
+
|
|
572
|
+
if not parsed.tools_to_call:
|
|
573
|
+
# 兜底 Level 2: 搜索散落的 <toolname>...</toolname>,然后在同一段中找最近的 <parms>
|
|
574
|
+
_toolname_positions = []
|
|
575
|
+
for m in re.finditer(r"<toolname[^>]*>(.*?)</toolname\s*>", raw_text, re.DOTALL | re.IGNORECASE):
|
|
576
|
+
tn = html.unescape(m.group(1)).strip()
|
|
577
|
+
if tn:
|
|
578
|
+
_toolname_positions.append((m.start(), m.end(), tn))
|
|
579
|
+
|
|
580
|
+
if _toolname_positions:
|
|
581
|
+
logger.info(f"[兜底L2] 找到 {len(_toolname_positions)} 个散落的 <toolname> 标签")
|
|
582
|
+
for _i, (_start, _end, _tn) in enumerate(_toolname_positions):
|
|
583
|
+
# 在 toolname 之后的 500 字符内搜索最近的 <parms>
|
|
584
|
+
_search_region = raw_text[_end:_end + 500]
|
|
585
|
+
_parms_match = re.search(
|
|
586
|
+
r"<parms[^>]*>(.*?)</parms\s*>",
|
|
587
|
+
_search_region, re.DOTALL | re.IGNORECASE,
|
|
588
|
+
)
|
|
589
|
+
_parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
|
|
590
|
+
|
|
591
|
+
# 也尝试在 toolname 之前的 200 字符内搜索(parms 可能在 toolname 前面)
|
|
592
|
+
if not _parms:
|
|
593
|
+
_pre_region = raw_text[max(0, _start - 200):_start]
|
|
594
|
+
_parms_match = re.search(
|
|
595
|
+
r"<parms[^>]*>(.*?)</parms\s*>",
|
|
596
|
+
_pre_region, re.DOTALL | re.IGNORECASE,
|
|
597
|
+
)
|
|
598
|
+
_parms = html.unescape(_parms_match.group(1)).strip() if _parms_match else ""
|
|
599
|
+
|
|
600
|
+
parsed.tools_to_call.append({
|
|
601
|
+
"beforecalltext": "",
|
|
602
|
+
"toolname": _tn,
|
|
603
|
+
"parms": _parms,
|
|
604
|
+
"timeout": _DEFAULT_TIMEOUT,
|
|
605
|
+
})
|
|
606
|
+
logger.info(f"[兜底L2] 散落提取工具: {_tn}, parms={'有' if _parms else '无'}")
|
|
607
|
+
|
|
523
608
|
# ── Step 5: Determine parse success ──
|
|
524
609
|
has_content = bool(
|
|
525
610
|
parsed.response
|
package/main.py
CHANGED
|
@@ -374,10 +374,6 @@ class MyAgentApp:
|
|
|
374
374
|
|
|
375
375
|
def _register_builtin_skills(self):
|
|
376
376
|
"""注册内置技能"""
|
|
377
|
-
from skills.file_skill import (
|
|
378
|
-
FileReadSkill, FileWriteSkill, FileListSkill,
|
|
379
|
-
FileDeleteSkill, FileSearchSkill, FileMoveSkill,
|
|
380
|
-
)
|
|
381
377
|
from skills.search_skill import WebSearchSkill, WebReadSkill, URLReadSkill
|
|
382
378
|
from skills.system_skill import (
|
|
383
379
|
SystemInfoSkill, ProcessListSkill,
|
|
@@ -392,13 +388,6 @@ class MyAgentApp:
|
|
|
392
388
|
TypeTextSkill, HotkeySkill, WindowListSkill, WindowFocusSkill, ScreenElementSkill,
|
|
393
389
|
)
|
|
394
390
|
|
|
395
|
-
# 文件技能
|
|
396
|
-
for skill_cls in [
|
|
397
|
-
FileReadSkill, FileWriteSkill, FileListSkill,
|
|
398
|
-
FileDeleteSkill, FileSearchSkill, FileMoveSkill,
|
|
399
|
-
]:
|
|
400
|
-
self.skill_registry.register(skill_cls())
|
|
401
|
-
|
|
402
391
|
# 搜索技能
|
|
403
392
|
for skill_cls in [WebSearchSkill, WebReadSkill, URLReadSkill]:
|
|
404
393
|
self.skill_registry.register(skill_cls())
|
package/package.json
CHANGED
package/scripts/cli.py
CHANGED
|
@@ -9,7 +9,7 @@ scripts/cli.py - MyAgent CLI 工具集
|
|
|
9
9
|
命令分类:
|
|
10
10
|
感知: ocr, analyze-image, transcribe
|
|
11
11
|
搜索: search, read-url, fetch-url
|
|
12
|
-
文件:
|
|
12
|
+
文件: send-file
|
|
13
13
|
文档: docx-create, docx-read, xlsx-create, xlsx-read, xlsx-edit,
|
|
14
14
|
ppt-create, ppt-read, pdf-create, pdf-read
|
|
15
15
|
系统: sysinfo, ps, env, pathinfo
|
|
@@ -90,12 +90,6 @@ async def _run():
|
|
|
90
90
|
"read-url": cmd_read_url,
|
|
91
91
|
"fetch-url": cmd_fetch_url,
|
|
92
92
|
# 文件操作
|
|
93
|
-
"read": cmd_read,
|
|
94
|
-
"write": cmd_write,
|
|
95
|
-
"ls": cmd_ls,
|
|
96
|
-
"rm": cmd_rm,
|
|
97
|
-
"grep": cmd_grep,
|
|
98
|
-
"mv": cmd_mv,
|
|
99
93
|
"send-file": cmd_send_file,
|
|
100
94
|
# 文档
|
|
101
95
|
"docx-create": cmd_docx_create,
|
|
@@ -332,123 +326,6 @@ async def cmd_fetch_url(args):
|
|
|
332
326
|
# 文件操作命令
|
|
333
327
|
# =============================================================================
|
|
334
328
|
|
|
335
|
-
async def cmd_read(args):
|
|
336
|
-
"""读取文件内容"""
|
|
337
|
-
import argparse
|
|
338
|
-
p = argparse.ArgumentParser(prog="myagent-ai read", description="读取文件内容")
|
|
339
|
-
p.add_argument("path", help="文件路径")
|
|
340
|
-
p.add_argument("-e", "--encoding", default="utf-8", help="文件编码 (默认 utf-8)")
|
|
341
|
-
p.add_argument("--offset", type=int, default=0, help="起始行号 (从0开始)")
|
|
342
|
-
p.add_argument("--limit", type=int, default=500, help="读取行数 (默认500)")
|
|
343
|
-
a = p.parse_args(args)
|
|
344
|
-
|
|
345
|
-
from skills.file_skill import FileReadSkill
|
|
346
|
-
skill = FileReadSkill()
|
|
347
|
-
result = await skill.execute(path=a.path, encoding=a.encoding, offset=a.offset, limit=a.limit)
|
|
348
|
-
_print_result({"success": result.success, "message": result.message,
|
|
349
|
-
"data": result.data, "error": result.error})
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
async def cmd_write(args):
|
|
353
|
-
"""写入文件"""
|
|
354
|
-
import argparse
|
|
355
|
-
p = argparse.ArgumentParser(prog="myagent-ai write", description="写入文件")
|
|
356
|
-
p.add_argument("path", help="文件路径")
|
|
357
|
-
p.add_argument("-c", "--content", default="", help="写入内容 (或通过 stdin)")
|
|
358
|
-
p.add_argument("-e", "--encoding", default="utf-8", help="文件编码")
|
|
359
|
-
p.add_argument("--append", action="store_true", help="追加模式")
|
|
360
|
-
a = p.parse_args(args)
|
|
361
|
-
|
|
362
|
-
content = a.content
|
|
363
|
-
if not content:
|
|
364
|
-
content = _read_stdin()
|
|
365
|
-
if content is None:
|
|
366
|
-
content = ""
|
|
367
|
-
|
|
368
|
-
from skills.file_skill import FileWriteSkill
|
|
369
|
-
skill = FileWriteSkill()
|
|
370
|
-
result = await skill.execute(path=a.path, content=content, encoding=a.encoding, append=a.append)
|
|
371
|
-
_print_result({"success": result.success, "message": result.message,
|
|
372
|
-
"data": result.data, "error": result.error})
|
|
373
|
-
|
|
374
|
-
# CLI send-file 标记
|
|
375
|
-
if result.success and result.files:
|
|
376
|
-
fpath = result.files[0]
|
|
377
|
-
print(f"\n__SEND_FILE__{fpath}|文件已写入: {Path(fpath).name}__END__")
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
async def cmd_ls(args):
|
|
381
|
-
"""列出目录内容"""
|
|
382
|
-
import argparse
|
|
383
|
-
# [v1.23.46] 过滤系统 ls 参数(-l, -a, -la 等)
|
|
384
|
-
# [v1.23.48] 过滤纯数字参数(LLM 误传如 myagent-ai ls 2)
|
|
385
|
-
args = [a for a in args if not a.isdigit()]
|
|
386
|
-
args = _filter_unknown_args(args, allowed_prefixes={"-p", "--pattern", "-r", "--recursive", "--max"})
|
|
387
|
-
p = argparse.ArgumentParser(prog="myagent-ai ls", description="列出目录内容")
|
|
388
|
-
p.add_argument("path", nargs="?", default=".", help="目录路径 (默认当前目录)")
|
|
389
|
-
p.add_argument("-p", "--pattern", default="*", help="文件匹配模式 (如 *.py)")
|
|
390
|
-
p.add_argument("-r", "--recursive", action="store_true", help="递归列出")
|
|
391
|
-
p.add_argument("--max", type=int, default=500, help="最大返回条目数 (默认500)")
|
|
392
|
-
a = p.parse_args(args)
|
|
393
|
-
|
|
394
|
-
from skills.file_skill import FileListSkill
|
|
395
|
-
skill = FileListSkill()
|
|
396
|
-
result = await skill.execute(path=a.path, pattern=a.pattern, recursive=a.recursive, max_items=a.max)
|
|
397
|
-
_print_result({"success": result.success, "message": result.message,
|
|
398
|
-
"data": result.data, "error": result.error})
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
async def cmd_rm(args):
|
|
402
|
-
"""删除文件或目录"""
|
|
403
|
-
import argparse
|
|
404
|
-
p = argparse.ArgumentParser(prog="myagent-ai rm", description="删除文件或目录")
|
|
405
|
-
p.add_argument("path", help="文件/目录路径")
|
|
406
|
-
p.add_argument("-r", "--recursive", action="store_true", help="递归删除目录")
|
|
407
|
-
a = p.parse_args(args)
|
|
408
|
-
|
|
409
|
-
from skills.file_skill import FileDeleteSkill
|
|
410
|
-
skill = FileDeleteSkill()
|
|
411
|
-
result = await skill.execute(path=a.path, recursive=a.recursive)
|
|
412
|
-
_print_result({"success": result.success, "message": result.message,
|
|
413
|
-
"data": result.data, "error": result.error})
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
async def cmd_grep(args):
|
|
417
|
-
"""搜索文件内容"""
|
|
418
|
-
import argparse
|
|
419
|
-
# [v1.23.46] 使用通用过滤器,过滤系统 grep 参数(-n, -i, -r 等)
|
|
420
|
-
args = _filter_unknown_args(args, allowed_prefixes={"-p", "--pattern", "--max", "--depth"})
|
|
421
|
-
p = argparse.ArgumentParser(prog="myagent-ai grep", description="在目录中搜索文件内容")
|
|
422
|
-
p.add_argument("query", help="搜索关键词")
|
|
423
|
-
p.add_argument("path", help="搜索目录路径")
|
|
424
|
-
p.add_argument("-p", "--pattern", default="*", help="文件匹配模式 (如 *.py)")
|
|
425
|
-
p.add_argument("--max", type=int, default=50, help="最大结果数 (默认50)")
|
|
426
|
-
p.add_argument("--depth", type=int, default=10, help="最大递归深度 (默认10)")
|
|
427
|
-
a = p.parse_args(args)
|
|
428
|
-
|
|
429
|
-
from skills.file_skill import FileSearchSkill
|
|
430
|
-
skill = FileSearchSkill()
|
|
431
|
-
result = await skill.execute(path=a.path, query=a.query, pattern=a.pattern,
|
|
432
|
-
max_results=a.max, max_depth=a.depth)
|
|
433
|
-
_print_result({"success": result.success, "message": result.message,
|
|
434
|
-
"data": result.data, "error": result.error})
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
async def cmd_mv(args):
|
|
438
|
-
"""移动/重命名文件"""
|
|
439
|
-
import argparse
|
|
440
|
-
p = argparse.ArgumentParser(prog="myagent-ai mv", description="移动或重命名文件/目录")
|
|
441
|
-
p.add_argument("source", help="源路径")
|
|
442
|
-
p.add_argument("dest", help="目标路径")
|
|
443
|
-
a = p.parse_args(args)
|
|
444
|
-
|
|
445
|
-
from skills.file_skill import FileMoveSkill
|
|
446
|
-
skill = FileMoveSkill()
|
|
447
|
-
result = await skill.execute(source=a.source, destination=a.dest)
|
|
448
|
-
_print_result({"success": result.success, "message": result.message,
|
|
449
|
-
"data": result.data, "error": result.error})
|
|
450
|
-
|
|
451
|
-
|
|
452
329
|
async def cmd_send_file(args):
|
|
453
330
|
"""发送文件给用户"""
|
|
454
331
|
import argparse
|
|
File without changes
|
package/skills/file_skill.py
DELETED
|
@@ -1,573 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
skills/file_skill.py - 文件操作技能
|
|
3
|
-
=====================================
|
|
4
|
-
提供文件读写、目录操作、文件搜索等功能。
|
|
5
|
-
|
|
6
|
-
[v1.16.18] FileReadSkill 增强:
|
|
7
|
-
- 自动检测 PDF/Excel/Word/PPT 等二进制格式,提取文本内容
|
|
8
|
-
- PDF: pdftotext (poppler-utils) → PyPDF2 双重 fallback
|
|
9
|
-
- Excel: openpyxl 提取表格文本
|
|
10
|
-
- Word/PPT: python-docx/python-pptx 提取文本
|
|
11
|
-
- CSV/JSON: 智能检测编码
|
|
12
|
-
"""
|
|
13
|
-
from __future__ import annotations
|
|
14
|
-
|
|
15
|
-
import fnmatch
|
|
16
|
-
import io
|
|
17
|
-
import os
|
|
18
|
-
import shutil
|
|
19
|
-
import glob as glob_module
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import List, Optional
|
|
22
|
-
|
|
23
|
-
from core.logger import get_logger
|
|
24
|
-
from skills.base import Skill, SkillResult, SkillParameter
|
|
25
|
-
|
|
26
|
-
logger = get_logger("myagent.skills.file")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _extract_text_from_binary(file_path: Path, max_chars: int = 50000) -> Optional[str]:
|
|
30
|
-
"""[v1.16.18] 从二进制文件中提取文本内容。
|
|
31
|
-
|
|
32
|
-
支持: PDF, Excel (.xlsx/.xls), Word (.docx), PPT (.pptx), CSV
|
|
33
|
-
返回提取的文本,不支持时返回 None。
|
|
34
|
-
"""
|
|
35
|
-
fname = file_path.name.lower()
|
|
36
|
-
suffix = file_path.suffix.lower()
|
|
37
|
-
|
|
38
|
-
# ── PDF ──
|
|
39
|
-
if suffix == ".pdf" or fname.endswith(".pdf"):
|
|
40
|
-
try:
|
|
41
|
-
data = file_path.read_bytes()
|
|
42
|
-
# 方法1: pdftotext (系统工具,更快更准)
|
|
43
|
-
import subprocess
|
|
44
|
-
try:
|
|
45
|
-
result = subprocess.run(
|
|
46
|
-
["pdftotext", str(file_path), "-", "-"],
|
|
47
|
-
capture_output=True, text=True, timeout=30,
|
|
48
|
-
)
|
|
49
|
-
if result.returncode == 0 and result.stdout.strip():
|
|
50
|
-
logger.debug(f"PDF 提取成功 (pdftotext): {file_path.name}")
|
|
51
|
-
return result.stdout.strip()[:max_chars]
|
|
52
|
-
except FileNotFoundError:
|
|
53
|
-
pass
|
|
54
|
-
# 方法2: PyPDF2 (纯 Python fallback)
|
|
55
|
-
try:
|
|
56
|
-
from PyPDF2 import PdfReader
|
|
57
|
-
reader = PdfReader(io.BytesIO(data))
|
|
58
|
-
texts = []
|
|
59
|
-
for page in reader.pages:
|
|
60
|
-
text = page.extract_text()
|
|
61
|
-
if text:
|
|
62
|
-
texts.append(text)
|
|
63
|
-
if texts:
|
|
64
|
-
logger.debug(f"PDF 提取成功 (PyPDF2): {file_path.name}")
|
|
65
|
-
return "\n".join(texts)[:max_chars]
|
|
66
|
-
except ImportError:
|
|
67
|
-
logger.warning(f"PyPDF2 未安装,无法读取 PDF: {file_path.name}")
|
|
68
|
-
except Exception as e:
|
|
69
|
-
logger.warning(f"PyPDF2 读取 PDF 失败: {file_path.name}: {e}")
|
|
70
|
-
return None
|
|
71
|
-
except Exception as e:
|
|
72
|
-
logger.warning(f"PDF 读取异常: {file_path.name}: {e}")
|
|
73
|
-
return None
|
|
74
|
-
|
|
75
|
-
# ── Excel ──
|
|
76
|
-
if suffix in (".xlsx", ".xls"):
|
|
77
|
-
try:
|
|
78
|
-
from openpyxl import load_workbook
|
|
79
|
-
wb = load_workbook(str(file_path), read_only=True, data_only=True)
|
|
80
|
-
texts = []
|
|
81
|
-
for ws in wb.worksheets:
|
|
82
|
-
rows = []
|
|
83
|
-
for row in ws.iter_rows(values_only=True):
|
|
84
|
-
cells = [str(c) if c is not None else "" for c in row]
|
|
85
|
-
rows.append("\t".join(cells))
|
|
86
|
-
if rows:
|
|
87
|
-
texts.append(f"[工作表: {ws.title}]\n" + "\n".join(rows))
|
|
88
|
-
wb.close()
|
|
89
|
-
if texts:
|
|
90
|
-
return "\n\n".join(texts)[:max_chars]
|
|
91
|
-
except ImportError:
|
|
92
|
-
logger.warning(f"openpyxl 未安装,无法读取 Excel: {file_path.name}")
|
|
93
|
-
return None
|
|
94
|
-
except Exception as e:
|
|
95
|
-
logger.warning(f"Excel 读取失败: {file_path.name}: {e}")
|
|
96
|
-
return None
|
|
97
|
-
|
|
98
|
-
# ── Word (.docx) ──
|
|
99
|
-
if suffix == ".docx":
|
|
100
|
-
try:
|
|
101
|
-
from docx import Document
|
|
102
|
-
doc = Document(str(file_path))
|
|
103
|
-
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
|
104
|
-
if paragraphs:
|
|
105
|
-
return "\n".join(paragraphs)[:max_chars]
|
|
106
|
-
except ImportError:
|
|
107
|
-
logger.warning(f"python-docx 未安装,无法读取 Word: {file_path.name}")
|
|
108
|
-
return None
|
|
109
|
-
except Exception as e:
|
|
110
|
-
logger.warning(f"Word 读取失败: {file_path.name}: {e}")
|
|
111
|
-
return None
|
|
112
|
-
|
|
113
|
-
# ── PowerPoint (.pptx) ──
|
|
114
|
-
if suffix == ".pptx":
|
|
115
|
-
try:
|
|
116
|
-
from pptx import Presentation
|
|
117
|
-
prs = Presentation(str(file_path))
|
|
118
|
-
texts = []
|
|
119
|
-
for i, slide in enumerate(prs.slides):
|
|
120
|
-
slide_texts = []
|
|
121
|
-
for shape in slide.shapes:
|
|
122
|
-
if shape.has_text_frame:
|
|
123
|
-
for para in shape.text_frame.paragraphs:
|
|
124
|
-
if para.text.strip():
|
|
125
|
-
slide_texts.append(para.text)
|
|
126
|
-
if slide_texts:
|
|
127
|
-
texts.append(f"[幻灯片 {i+1}]\n" + "\n".join(slide_texts))
|
|
128
|
-
if texts:
|
|
129
|
-
return "\n\n".join(texts)[:max_chars]
|
|
130
|
-
except ImportError:
|
|
131
|
-
logger.warning(f"python-pptx 未安装,无法读取 PPT: {file_path.name}")
|
|
132
|
-
return None
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.warning(f"PPT 读取失败: {file_path.name}: {e}")
|
|
135
|
-
return None
|
|
136
|
-
|
|
137
|
-
return None
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
# 常见二进制/不可读格式(不尝试文本提取)
|
|
141
|
-
_BINARY_EXTENSIONS = {
|
|
142
|
-
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff",
|
|
143
|
-
".mp3", ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".wav", ".ogg",
|
|
144
|
-
".zip", ".rar", ".7z", ".tar", ".gz", ".bz2",
|
|
145
|
-
".exe", ".dll", ".so", ".dylib",
|
|
146
|
-
".pyc", ".pyo", ".whl", ".egg",
|
|
147
|
-
".doc", # 旧版 Word (需要特殊处理)
|
|
148
|
-
".xls", # 旧版 Excel (BIFF 格式,openpyxl 不支持)
|
|
149
|
-
".ppt", # 旧版 PowerPoint
|
|
150
|
-
".sqlite", ".db", ".mdb",
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class FileReadSkill(Skill):
|
|
155
|
-
"""读取文件内容
|
|
156
|
-
|
|
157
|
-
[v1.16.18] 增强支持:
|
|
158
|
-
- 自动检测 PDF/Excel/Word/PPT 等二进制格式并提取文本
|
|
159
|
-
- 纯文本文件保持原有行为(按行读取,支持 offset/limit)
|
|
160
|
-
"""
|
|
161
|
-
name = "file_read"
|
|
162
|
-
description = "读取指定文件的内容。支持文本文件和二进制格式(PDF、Excel、Word、PPT、CSV、JSON等),自动检测编码和格式。"
|
|
163
|
-
category = "file"
|
|
164
|
-
parameters = [
|
|
165
|
-
SkillParameter("path", "string", "文件路径(绝对路径或相对路径)", required=True),
|
|
166
|
-
SkillParameter("encoding", "string", "文件编码(仅文本文件)", required=False, default="utf-8"),
|
|
167
|
-
SkillParameter("offset", "integer", "起始行号(从0开始,仅文本文件)", required=False, default=0),
|
|
168
|
-
SkillParameter("limit", "integer", "读取行数限制(仅文本文件)", required=False, default=500),
|
|
169
|
-
]
|
|
170
|
-
|
|
171
|
-
async def execute(self, path: str = "", encoding: str = "utf-8",
|
|
172
|
-
offset: int = 0, limit: int = 500, **kwargs) -> SkillResult:
|
|
173
|
-
try:
|
|
174
|
-
file_path = Path(path).expanduser().resolve()
|
|
175
|
-
if not file_path.exists():
|
|
176
|
-
return SkillResult(success=False, error=f"文件不存在: {path}")
|
|
177
|
-
if not file_path.is_file():
|
|
178
|
-
return SkillResult(success=False, error=f"不是文件: {path}")
|
|
179
|
-
|
|
180
|
-
suffix = file_path.suffix.lower()
|
|
181
|
-
|
|
182
|
-
# [v1.16.18] 二进制格式检测 — 不可读的格式直接返回错误
|
|
183
|
-
if suffix in _BINARY_EXTENSIONS:
|
|
184
|
-
return SkillResult(
|
|
185
|
-
success=False,
|
|
186
|
-
error=f"不支持的文件格式 ({suffix})。支持的格式: 文本文件、PDF、Excel(.xlsx)、Word(.docx)、PPT(.pptx)、CSV、JSON、XML 等"
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
# [v1.16.18] PDF/Excel/Word/PPT 等格式 — 提取文本内容
|
|
190
|
-
if suffix in (".pdf", ".xlsx", ".docx", ".pptx"):
|
|
191
|
-
extracted = _extract_text_from_binary(file_path)
|
|
192
|
-
if extracted is None:
|
|
193
|
-
# PyPDF2/openpyxl 等依赖未安装
|
|
194
|
-
dep_hint = {
|
|
195
|
-
".pdf": "pip install PyPDF2 或 apt install poppler-utils",
|
|
196
|
-
".xlsx": "pip install openpyxl",
|
|
197
|
-
".docx": "pip install python-docx",
|
|
198
|
-
".pptx": "pip install python-pptx",
|
|
199
|
-
}
|
|
200
|
-
hint = dep_hint.get(suffix, "请安装对应的 Python 库")
|
|
201
|
-
return SkillResult(
|
|
202
|
-
success=False,
|
|
203
|
-
error=f"无法读取 {suffix} 文件 {file_path.name},缺少必要的库。请执行: {hint}"
|
|
204
|
-
)
|
|
205
|
-
if not extracted.strip():
|
|
206
|
-
return SkillResult(
|
|
207
|
-
success=False,
|
|
208
|
-
error=f"文件 {file_path.name} 内容为空(可能是扫描件 PDF 或空文件)"
|
|
209
|
-
)
|
|
210
|
-
total_chars = len(extracted)
|
|
211
|
-
return SkillResult(
|
|
212
|
-
success=True,
|
|
213
|
-
data={
|
|
214
|
-
"path": str(file_path),
|
|
215
|
-
"content": extracted,
|
|
216
|
-
"format": suffix,
|
|
217
|
-
"total_chars": total_chars,
|
|
218
|
-
},
|
|
219
|
-
message=f"已读取 {file_path.name} ({suffix}),提取 {total_chars} 字符",
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
# ── 纯文本文件(原有逻辑) ──
|
|
223
|
-
# 尝试多种编码
|
|
224
|
-
content = None
|
|
225
|
-
for enc in [encoding, "utf-8", "gbk", "gb2312", "latin-1"]:
|
|
226
|
-
try:
|
|
227
|
-
with open(file_path, "r", encoding=enc) as f:
|
|
228
|
-
content = f.read()
|
|
229
|
-
break
|
|
230
|
-
except (UnicodeDecodeError, UnicodeError):
|
|
231
|
-
continue
|
|
232
|
-
except Exception:
|
|
233
|
-
break
|
|
234
|
-
|
|
235
|
-
if content is None:
|
|
236
|
-
return SkillResult(
|
|
237
|
-
success=False,
|
|
238
|
-
error=f"无法解码文件 {file_path.name}(尝试了 {encoding}/utf-8/gbk/gb2312/latin-1 均失败)"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
lines = content.splitlines()
|
|
242
|
-
total_lines = len(lines)
|
|
243
|
-
selected = lines[offset:offset + limit]
|
|
244
|
-
text = "\n".join(selected)
|
|
245
|
-
|
|
246
|
-
return SkillResult(
|
|
247
|
-
success=True,
|
|
248
|
-
data={
|
|
249
|
-
"path": str(file_path),
|
|
250
|
-
"content": text,
|
|
251
|
-
"total_lines": total_lines,
|
|
252
|
-
"showed_lines": len(selected),
|
|
253
|
-
"offset": offset,
|
|
254
|
-
},
|
|
255
|
-
message=f"已读取 {file_path.name} ({len(selected)}/{total_lines} 行)",
|
|
256
|
-
)
|
|
257
|
-
except Exception as e:
|
|
258
|
-
return SkillResult(success=False, error=str(e))
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
class FileWriteSkill(Skill):
|
|
262
|
-
"""写入文件内容"""
|
|
263
|
-
name = "file_write"
|
|
264
|
-
description = "将内容写入指定文件,支持创建目录和追加模式"
|
|
265
|
-
category = "file"
|
|
266
|
-
parameters = [
|
|
267
|
-
SkillParameter("path", "string", "文件路径", required=True),
|
|
268
|
-
SkillParameter("content", "string", "要写入的内容", required=True),
|
|
269
|
-
SkillParameter("encoding", "string", "文件编码", required=False, default="utf-8"),
|
|
270
|
-
SkillParameter("append", "boolean", "是否追加模式", required=False, default=False),
|
|
271
|
-
]
|
|
272
|
-
dangerous = True
|
|
273
|
-
|
|
274
|
-
async def execute(self, path: str = "", content: str = "",
|
|
275
|
-
encoding: str = "utf-8", append: bool = False, **kwargs) -> SkillResult:
|
|
276
|
-
try:
|
|
277
|
-
file_path = Path(path).expanduser().resolve()
|
|
278
|
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
279
|
-
|
|
280
|
-
mode = "a" if append else "w"
|
|
281
|
-
with open(file_path, mode, encoding=encoding) as f:
|
|
282
|
-
f.write(content)
|
|
283
|
-
|
|
284
|
-
return SkillResult(
|
|
285
|
-
success=True,
|
|
286
|
-
data={"path": str(file_path), "size": len(content)},
|
|
287
|
-
message=f"已写入 {file_path.name} ({len(content)} 字符)",
|
|
288
|
-
files=[str(file_path)],
|
|
289
|
-
)
|
|
290
|
-
except Exception as e:
|
|
291
|
-
return SkillResult(success=False, error=str(e))
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
class FileListSkill(Skill):
|
|
295
|
-
"""列出目录内容
|
|
296
|
-
|
|
297
|
-
[v1.16.19] 增加 max_items 限制,防止递归列出时返回过多条目
|
|
298
|
-
"""
|
|
299
|
-
name = "file_list"
|
|
300
|
-
description = "列出指定目录下的文件和子目录。递归列出时最多返回 500 条。"
|
|
301
|
-
category = "file"
|
|
302
|
-
parameters = [
|
|
303
|
-
SkillParameter("path", "string", "目录路径", required=True),
|
|
304
|
-
SkillParameter("pattern", "string", "文件匹配模式(如 *.py)", required=False, default="*"),
|
|
305
|
-
SkillParameter("recursive", "boolean", "是否递归", required=False, default=False),
|
|
306
|
-
SkillParameter("max_items", "integer", "最大返回条目数", required=False, default=500),
|
|
307
|
-
]
|
|
308
|
-
|
|
309
|
-
async def execute(self, path: str = "", pattern: str = "*",
|
|
310
|
-
recursive: bool = False, max_items: int = 500, **kwargs) -> SkillResult:
|
|
311
|
-
try:
|
|
312
|
-
dir_path = Path(path).expanduser().resolve()
|
|
313
|
-
if not dir_path.exists():
|
|
314
|
-
return SkillResult(success=False, error=f"目录不存在: {path}")
|
|
315
|
-
if not dir_path.is_dir():
|
|
316
|
-
return SkillResult(success=False, error=f"不是目录: {path}")
|
|
317
|
-
|
|
318
|
-
if recursive:
|
|
319
|
-
items = sorted(dir_path.rglob(pattern))
|
|
320
|
-
else:
|
|
321
|
-
items = sorted(dir_path.glob(pattern))
|
|
322
|
-
|
|
323
|
-
result = []
|
|
324
|
-
truncated = False
|
|
325
|
-
for item in items:
|
|
326
|
-
if len(result) >= max_items:
|
|
327
|
-
truncated = True
|
|
328
|
-
break
|
|
329
|
-
try:
|
|
330
|
-
stat = item.stat()
|
|
331
|
-
result.append({
|
|
332
|
-
"name": item.name,
|
|
333
|
-
"path": str(item),
|
|
334
|
-
"is_dir": item.is_dir(),
|
|
335
|
-
"size": stat.st_size,
|
|
336
|
-
})
|
|
337
|
-
except OSError:
|
|
338
|
-
result.append({
|
|
339
|
-
"name": item.name,
|
|
340
|
-
"path": str(item),
|
|
341
|
-
"is_dir": item.is_dir(),
|
|
342
|
-
"size": 0,
|
|
343
|
-
})
|
|
344
|
-
|
|
345
|
-
_msg = f"共 {len(result)} 个项目"
|
|
346
|
-
if truncated:
|
|
347
|
-
_msg += f"(已达上限 {max_items},部分条目未显示)"
|
|
348
|
-
return SkillResult(
|
|
349
|
-
success=True,
|
|
350
|
-
data={"path": str(dir_path), "items": result, "count": len(result), "truncated": truncated},
|
|
351
|
-
message=_msg,
|
|
352
|
-
)
|
|
353
|
-
except Exception as e:
|
|
354
|
-
return SkillResult(success=False, error=str(e))
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
class FileDeleteSkill(Skill):
|
|
358
|
-
"""删除文件或目录"""
|
|
359
|
-
name = "file_delete"
|
|
360
|
-
description = "删除指定文件或目录"
|
|
361
|
-
category = "file"
|
|
362
|
-
parameters = [
|
|
363
|
-
SkillParameter("path", "string", "文件/目录路径", required=True),
|
|
364
|
-
SkillParameter("recursive", "boolean", "递归删除目录", required=False, default=False),
|
|
365
|
-
]
|
|
366
|
-
dangerous = True
|
|
367
|
-
|
|
368
|
-
async def execute(self, path: str = "", recursive: bool = False, **kwargs) -> SkillResult:
|
|
369
|
-
try:
|
|
370
|
-
target = Path(path).expanduser().resolve()
|
|
371
|
-
if not target.exists():
|
|
372
|
-
return SkillResult(success=False, error=f"不存在: {path}")
|
|
373
|
-
|
|
374
|
-
if target.is_dir():
|
|
375
|
-
if recursive:
|
|
376
|
-
shutil.rmtree(target)
|
|
377
|
-
else:
|
|
378
|
-
shutil.rmdir(target)
|
|
379
|
-
else:
|
|
380
|
-
target.unlink()
|
|
381
|
-
|
|
382
|
-
return SkillResult(
|
|
383
|
-
success=True,
|
|
384
|
-
data={"path": str(target), "deleted": True, "was_dir": target.is_dir()},
|
|
385
|
-
message=f"已删除: {path}",
|
|
386
|
-
)
|
|
387
|
-
except Exception as e:
|
|
388
|
-
return SkillResult(success=False, error=str(e))
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
class FileSearchSkill(Skill):
|
|
392
|
-
"""搜索文件内容
|
|
393
|
-
|
|
394
|
-
[v1.16.19] 安全增强:
|
|
395
|
-
- 搜索深度限制(默认10层,防止遍历整个文件系统)
|
|
396
|
-
- 排除危险目录(/proc, /sys, /dev 等)
|
|
397
|
-
- 文件大小限制(跳过>10MB的文件,防止卡在读大文件上)
|
|
398
|
-
- 超时保护(30秒强制中断)
|
|
399
|
-
- 搜索根目录 / 时自动警告并缩小范围
|
|
400
|
-
"""
|
|
401
|
-
name = "file_search"
|
|
402
|
-
description = "在指定目录的文件中搜索包含指定文本的行。注意:不要搜索 / 根目录,应在具体的子目录中搜索。"
|
|
403
|
-
category = "file"
|
|
404
|
-
parameters = [
|
|
405
|
-
SkillParameter("path", "string", "搜索目录路径", required=True),
|
|
406
|
-
SkillParameter("query", "string", "搜索关键词或正则表达式", required=True),
|
|
407
|
-
SkillParameter("pattern", "string", "文件匹配模式(如 *.py, *.txt)", required=False, default="*"),
|
|
408
|
-
SkillParameter("max_results", "integer", "最大结果数", required=False, default=50),
|
|
409
|
-
SkillParameter("max_depth", "integer", "最大递归深度(目录层级)", required=False, default=10),
|
|
410
|
-
]
|
|
411
|
-
|
|
412
|
-
# 排除的系统目录(防止遍历 /proc, /sys 等虚拟文件系统导致卡死)
|
|
413
|
-
_EXCLUDED_DIRS = {
|
|
414
|
-
"/proc", "/sys", "/dev", "/run", "/snap", "/boot",
|
|
415
|
-
"/lib", "/lib64", "/sbin", "/bin", "/usr", "/var/cache",
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
# 不可读的二进制扩展名(跳过,避免读取大文件卡住)
|
|
419
|
-
_SKIP_EXTENSIONS = {
|
|
420
|
-
".pyc", ".pyo", ".whl", ".egg", ".so", ".a", ".o",
|
|
421
|
-
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".ico", ".tiff",
|
|
422
|
-
".mp3", ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".wav", ".ogg",
|
|
423
|
-
".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz",
|
|
424
|
-
".exe", ".dll", ".dylib",
|
|
425
|
-
".sqlite", ".db",
|
|
426
|
-
".iso", ".dmg",
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
async def execute(self, path: str = "", query: str = "", pattern: str = "*",
|
|
430
|
-
max_results: int = 50, max_depth: int = 10, **kwargs) -> SkillResult:
|
|
431
|
-
import time as _time
|
|
432
|
-
|
|
433
|
-
_start_time = _time.time()
|
|
434
|
-
_timeout = 30 # 30 秒超时保护
|
|
435
|
-
|
|
436
|
-
try:
|
|
437
|
-
search_dir = Path(path).expanduser().resolve()
|
|
438
|
-
if not search_dir.is_dir():
|
|
439
|
-
return SkillResult(success=False, error=f"不是目录: {path}")
|
|
440
|
-
|
|
441
|
-
# [v1.16.19] 安全检查: 禁止搜索根目录和危险路径
|
|
442
|
-
search_str = str(search_dir)
|
|
443
|
-
if search_str == "/":
|
|
444
|
-
return SkillResult(
|
|
445
|
-
success=False,
|
|
446
|
-
error="不允许搜索根目录 /。请在具体的子目录中搜索,例如 /home 或 /tmp。"
|
|
447
|
-
)
|
|
448
|
-
# 检查是否在排除目录下
|
|
449
|
-
for excl_dir in self._EXCLUDED_DIRS:
|
|
450
|
-
if search_str == excl_dir or search_str.startswith(excl_dir + "/"):
|
|
451
|
-
return SkillResult(
|
|
452
|
-
success=False,
|
|
453
|
-
error=f"不允许搜索系统目录 {excl_dir},请在用户目录中搜索。"
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
results = []
|
|
457
|
-
_files_scanned = 0
|
|
458
|
-
_files_skipped = 0
|
|
459
|
-
|
|
460
|
-
# 使用 os.walk 替代 rglob,可以精确控制深度
|
|
461
|
-
for root, dirs, files in os.walk(search_dir):
|
|
462
|
-
# 检查超时
|
|
463
|
-
if _time.time() - _start_time > _timeout:
|
|
464
|
-
logger.warning(f"file_search 超时 ({_timeout}s),已扫描 {_files_scanned} 文件")
|
|
465
|
-
break
|
|
466
|
-
|
|
467
|
-
# 计算当前深度
|
|
468
|
-
try:
|
|
469
|
-
rel_parts = Path(root).relative_to(search_dir).parts
|
|
470
|
-
current_depth = len(rel_parts)
|
|
471
|
-
except ValueError:
|
|
472
|
-
current_depth = max_depth + 1
|
|
473
|
-
|
|
474
|
-
if current_depth > max_depth:
|
|
475
|
-
# 修剪 dirs 列表,阻止 os.walk 继续深入
|
|
476
|
-
dirs.clear()
|
|
477
|
-
continue
|
|
478
|
-
|
|
479
|
-
# 排除系统目录(不进入)
|
|
480
|
-
dirs[:] = [
|
|
481
|
-
d for d in dirs
|
|
482
|
-
if not any(str(Path(root) / d).startswith(e) for e in self._EXCLUDED_DIRS)
|
|
483
|
-
and not d.startswith(".") # 跳过隐藏目录
|
|
484
|
-
and d not in ("node_modules", "__pycache__", ".git", "venv", ".venv", "env")
|
|
485
|
-
]
|
|
486
|
-
|
|
487
|
-
for fname in files:
|
|
488
|
-
# 再次检查超时
|
|
489
|
-
if _time.time() - _start_time > _timeout:
|
|
490
|
-
break
|
|
491
|
-
if len(results) >= max_results:
|
|
492
|
-
break
|
|
493
|
-
|
|
494
|
-
# 文件名模式过滤
|
|
495
|
-
if pattern != "*":
|
|
496
|
-
if not any(fnmatch.fnmatch(fname, p) for p in pattern.split(",")):
|
|
497
|
-
continue
|
|
498
|
-
|
|
499
|
-
file_path = Path(root) / fname
|
|
500
|
-
suffix = file_path.suffix.lower()
|
|
501
|
-
|
|
502
|
-
# 跳过不可读的二进制文件
|
|
503
|
-
if suffix in self._SKIP_EXTENSIONS:
|
|
504
|
-
_files_skipped += 1
|
|
505
|
-
continue
|
|
506
|
-
|
|
507
|
-
# 跳过过大的文件(>10MB)
|
|
508
|
-
try:
|
|
509
|
-
if file_path.stat().st_size > 10 * 1024 * 1024:
|
|
510
|
-
_files_skipped += 1
|
|
511
|
-
continue
|
|
512
|
-
except OSError:
|
|
513
|
-
continue
|
|
514
|
-
|
|
515
|
-
_files_scanned += 1
|
|
516
|
-
try:
|
|
517
|
-
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
518
|
-
for i, line in enumerate(f):
|
|
519
|
-
if query.lower() in line.lower():
|
|
520
|
-
results.append({
|
|
521
|
-
"file": str(file_path),
|
|
522
|
-
"line": i + 1,
|
|
523
|
-
"content": line.strip()[:200],
|
|
524
|
-
})
|
|
525
|
-
if len(results) >= max_results:
|
|
526
|
-
break
|
|
527
|
-
except (IOError, OSError, PermissionError):
|
|
528
|
-
continue
|
|
529
|
-
|
|
530
|
-
_elapsed = _time.time() - _start_time
|
|
531
|
-
logger.debug(f"file_search 完成: {_files_scanned} 文件扫描, {_files_skipped} 跳过, "
|
|
532
|
-
f"{len(results)} 匹配, {_elapsed:.1f}s")
|
|
533
|
-
|
|
534
|
-
if not results and _files_scanned == 0:
|
|
535
|
-
return SkillResult(
|
|
536
|
-
success=True,
|
|
537
|
-
data={"query": query, "results": [], "count": 0, "scanned": 0},
|
|
538
|
-
message=f"未找到匹配项(目录中无可搜索的文件)",
|
|
539
|
-
)
|
|
540
|
-
|
|
541
|
-
return SkillResult(
|
|
542
|
-
success=True,
|
|
543
|
-
data={"query": query, "results": results, "count": len(results), "scanned": _files_scanned},
|
|
544
|
-
message=f"找到 {len(results)} 处匹配(扫描 {_files_scanned} 个文件,耗时 {_elapsed:.1f}s)",
|
|
545
|
-
)
|
|
546
|
-
except Exception as e:
|
|
547
|
-
return SkillResult(success=False, error=str(e))
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
class FileMoveSkill(Skill):
|
|
551
|
-
"""移动/重命名文件"""
|
|
552
|
-
name = "file_move"
|
|
553
|
-
description = "移动或重命名文件/目录"
|
|
554
|
-
category = "file"
|
|
555
|
-
parameters = [
|
|
556
|
-
SkillParameter("source", "string", "源路径", required=True),
|
|
557
|
-
SkillParameter("destination", "string", "目标路径", required=True),
|
|
558
|
-
]
|
|
559
|
-
dangerous = True
|
|
560
|
-
|
|
561
|
-
async def execute(self, source: str = "", destination: str = "", **kwargs) -> SkillResult:
|
|
562
|
-
try:
|
|
563
|
-
src = Path(source).expanduser().resolve()
|
|
564
|
-
dst = Path(destination).expanduser().resolve()
|
|
565
|
-
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
566
|
-
shutil.move(str(src), str(dst))
|
|
567
|
-
return SkillResult(
|
|
568
|
-
success=True,
|
|
569
|
-
data={"source": str(src), "destination": str(dst)},
|
|
570
|
-
message=f"已移动: {source} → {destination}",
|
|
571
|
-
)
|
|
572
|
-
except Exception as e:
|
|
573
|
-
return SkillResult(success=False, error=str(e))
|