myagent-ai 1.16.13 → 1.16.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/main_agent.py +35 -0
- package/package.json +2 -2
- package/web/api_server.py +34 -0
package/agents/main_agent.py
CHANGED
|
@@ -693,6 +693,41 @@ class MainAgent(BaseAgent):
|
|
|
693
693
|
)
|
|
694
694
|
break
|
|
695
695
|
|
|
696
|
+
# [v1.16.13] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
|
|
697
|
+
_vision_keywords = ["doesn't support image", "does not support image", "model_incompatible", "image input", "not support vision", "unsupported multimodal", "image capability"]
|
|
698
|
+
if any(kw in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
|
|
699
|
+
logger.warning(f"[{task_id}] 模型不支持图片输入,去掉图片用纯文本重试")
|
|
700
|
+
context.metadata["user_images"] = []
|
|
701
|
+
# 用纯文本消息替换最后一条多模态消息
|
|
702
|
+
_text_only_msg = context.user_message or "请处理上述上下文。"
|
|
703
|
+
if len(messages) > 0 and isinstance(messages[-1].content, list):
|
|
704
|
+
messages[-1] = Message(role="user", content=_text_only_msg)
|
|
705
|
+
# 重试 LLM 调用
|
|
706
|
+
if stream_response and self.llm:
|
|
707
|
+
response = await self._call_llm_stream(
|
|
708
|
+
messages, text_delta_callback=text_delta_callback,
|
|
709
|
+
stream_response=stream_response,
|
|
710
|
+
)
|
|
711
|
+
else:
|
|
712
|
+
response = await self._call_llm(messages)
|
|
713
|
+
if response.success:
|
|
714
|
+
# 纯文本重试成功,给回复加上提示前缀
|
|
715
|
+
_vision_prefix = "⚠️ 当前模型不支持图片识别,已自动使用纯文本模式处理(图片未发送给模型)。\n\n"
|
|
716
|
+
llm_raw = _vision_prefix + response.content
|
|
717
|
+
context.working_memory["final_response"] = llm_raw
|
|
718
|
+
await self._emit_v2_event("v2_reasoning", {"content": llm_raw}, stream_callback)
|
|
719
|
+
if self.memory:
|
|
720
|
+
self.memory.add_session(
|
|
721
|
+
session_id=context.session_id,
|
|
722
|
+
role="assistant",
|
|
723
|
+
content=llm_raw,
|
|
724
|
+
)
|
|
725
|
+
break
|
|
726
|
+
else:
|
|
727
|
+
# 纯文本也失败了,走下面的通用错误处理
|
|
728
|
+
_llm_error = response.error or ""
|
|
729
|
+
logger.error(f"[{task_id}] 纯文本重试也失败: {_llm_error}")
|
|
730
|
+
|
|
696
731
|
# 其他 LLM 错误
|
|
697
732
|
error_msg = f"LLM 调用失败: {response.error}"
|
|
698
733
|
context.working_memory["final_response"] = error_msg
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "myagent-ai",
|
|
3
|
-
"version": "1.16.
|
|
3
|
+
"version": "1.16.15",
|
|
4
4
|
"description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
|
|
5
5
|
"main": "main.py",
|
|
6
6
|
"bin": {
|
|
@@ -43,4 +43,4 @@
|
|
|
43
43
|
"python": ">=3.10",
|
|
44
44
|
"node": ">=18"
|
|
45
45
|
}
|
|
46
|
-
}
|
|
46
|
+
}
|
package/web/api_server.py
CHANGED
|
@@ -827,6 +827,8 @@ class ApiServer:
|
|
|
827
827
|
try:
|
|
828
828
|
agent_cfg = self._read_agent_config(agent_path)
|
|
829
829
|
model_chain = self._build_model_chain(agent_cfg, agent_path)
|
|
830
|
+
# [v1.16.14] 有图片时,优先使用支持 vision 的模型
|
|
831
|
+
model_chain = self._reorder_model_chain_for_images(model_chain, bool(user_images))
|
|
830
832
|
logger.info(f"[{session_id}] model_chain={'有' if model_chain else '无'}, llm={'有' if self.core.llm else '无'}")
|
|
831
833
|
|
|
832
834
|
task_plan_context = self._build_task_plan_context(agent_path, chat_mode, message, session_id=session_id)
|
|
@@ -3572,6 +3574,38 @@ class ApiServer:
|
|
|
3572
3574
|
|
|
3573
3575
|
return chain
|
|
3574
3576
|
|
|
3577
|
+
def _reorder_model_chain_for_images(self, model_chain: list[dict], has_images: bool) -> list[dict]:
|
|
3578
|
+
"""[v1.16.14] 当消息包含图片时,将支持 vision 的模型优先排列
|
|
3579
|
+
|
|
3580
|
+
检查每个模型的 input_modes 字段,如果包含 "image",则优先使用。
|
|
3581
|
+
如果都没有 vision 能力,则保持原序(会触发 main_agent 的纯文本降级)。
|
|
3582
|
+
"""
|
|
3583
|
+
if not has_images or not model_chain:
|
|
3584
|
+
return model_chain
|
|
3585
|
+
|
|
3586
|
+
# 从 models_library 获取每个模型的 input_modes
|
|
3587
|
+
vision_models = []
|
|
3588
|
+
text_only_models = []
|
|
3589
|
+
for mc in model_chain:
|
|
3590
|
+
mc_id = mc.get("id", "")
|
|
3591
|
+
input_modes = ["text"] # 默认
|
|
3592
|
+
if mc_id:
|
|
3593
|
+
for me in self.core.config.models_library:
|
|
3594
|
+
if me.id == mc_id:
|
|
3595
|
+
input_modes = me.input_modes or ["text"]
|
|
3596
|
+
break
|
|
3597
|
+
if "image" in input_modes:
|
|
3598
|
+
vision_models.append(mc)
|
|
3599
|
+
else:
|
|
3600
|
+
text_only_models.append(mc)
|
|
3601
|
+
|
|
3602
|
+
if vision_models:
|
|
3603
|
+
logger.info(f"消息含图片,优先使用 vision 模型: {[m.get('name', m.get('model')) for m in vision_models]}")
|
|
3604
|
+
return vision_models + text_only_models
|
|
3605
|
+
else:
|
|
3606
|
+
logger.info("消息含图片,但模型链中无 vision 模型,将尝试当前主模型(失败后自动降级纯文本)")
|
|
3607
|
+
return model_chain
|
|
3608
|
+
|
|
3575
3609
|
async def _try_model_chain(self, model_chain: list[dict], message: str, session_id: str,
|
|
3576
3610
|
agent_path: str = None, agent_system_prompt: str = None,
|
|
3577
3611
|
chat_mode: str = "") -> str:
|