npm - myagent-ai - Versions diffs - 1.17.0 → 1.17.2 - Mend

myagent-ai 1.17.0 → 1.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/agents/main_agent.py CHANGED Viewed

@@ -644,6 +644,9 @@ class MainAgent(BaseAgent):
             else:
                 # [v1.16.12] 支持多模态消息（图片+文本）
                 user_images = context.metadata.get("user_images", [])
+                # [v1.17.1] 图片调试
+                if user_images:
+                    logger.info(f"[{task_id}] 检测到 {len(user_images)} 张图片, 构建多模态消息")
                 if user_images:
                     # OpenAI Vision 格式: [{type: "text"}, {type: "image_url"}]
                     multimodal_content = [{"type": "text", "text": context.user_message or "请描述这些图片。"}]
@@ -653,6 +656,8 @@ class MainAgent(BaseAgent):
                                 "type": "image_url",
                                 "image_url": {"url": img["url"]}
                             })
+                    logger.info(f"[{task_id}] 多模态消息构建完成: {len(multimodal_content)} parts, content_types={[p.get('type') for p in multimodal_content]}")
+                    logger.info(f"[{task_id}] LLM model={self.llm.model}, provider={self.llm.provider}")
                     messages.append(Message(role="user", content=multimodal_content))
                 else:
                     messages.append(Message(
@@ -701,9 +706,15 @@ class MainAgent(BaseAgent):
                         )
                     break
-                # [v1.16.13] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
-                _vision_keywords = ["doesn't support image", "does not support image", "model_incompatible", "image input", "not support vision", "unsupported multimodal", "image capability"]
-                if any(kw in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
+                # [v1.16.13→17.0] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
+                # 支持中英文错误关键词匹配（ModelScope 等国产 API 可能返回中文错误）
+                _vision_keywords = [
+                    "doesn't support image", "does not support image", "model_incompatible",
+                    "image input", "not support vision", "unsupported multimodal", "image capability",
+                    "不支持图片", "不支持图像", "图片输入", "图像输入", "不支持多模态",
+                    "视觉", "image_url", "multimodal", "vision",
+                ]
+                if any(kw.lower() in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
                     logger.warning(f"[{task_id}] 模型不支持图片输入，去掉图片用纯文本重试")
                     context.metadata["user_images"] = []
                     # 用纯文本消息替换最后一条多模态消息

package/config.py CHANGED Viewed

@@ -316,8 +316,21 @@ class ConfigManager:
             elif isinstance(value, list) and key == "models_library":
                 models = []
                 for item in value:
+                    # [v1.17.0] 兼容 "input" → "input_modes" 字段映射
+                    # 用户配置中常使用 "input": ["text", "image"]，内部字段名是 "input_modes"
+                    normalized = dict(item)
+                    if "input" in normalized and "input_modes" not in normalized:
+                        normalized["input_modes"] = normalized.pop("input")
+                    if "baseUrl" in normalized and "base_url" not in normalized:
+                        normalized["base_url"] = normalized.pop("baseUrl")
+                    if "apiKey" in normalized and "api_key" not in normalized:
+                        normalized["api_key"] = normalized.pop("apiKey")
+                    if "reasoning" in normalized:
+                        # 确保 reasoning 是 bool 类型
+                        v = normalized["reasoning"]
+                        normalized["reasoning"] = bool(v) if not isinstance(v, bool) else v
                     me = ModelEntry(**{
-                        k: v for k, v in item.items() if k in ModelEntry.__dataclass_fields__
+                        k: v for k, v in normalized.items() if k in ModelEntry.__dataclass_fields__
                     })
                     models.append(me)
                 setattr(target, key, models)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "myagent-ai",
-  "version": "1.17.0",
+  "version": "1.17.2",
   "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
   "main": "main.py",
   "bin": {

package/web/api_server.py CHANGED Viewed

@@ -1091,6 +1091,14 @@ window.toggleFullscreen = function() {{
         # 支持文件附件（文档等，提取文本后作为上下文）
         user_files = data.get("files", [])    # [{"name": "xxx.pdf", "type": "application/pdf", "data": "base64..."}, ...]
+        # [v1.17.1] 图片调试日志
+        if user_images:
+            _img_info = []
+            for _img in user_images:
+                _b64len = len(_img.get("data", "")) if isinstance(_img.get("data"), str) else 0
+                _img_info.append(f"{ _img.get('name','?') }(type={_img.get('type','?')}, b64len={_b64len})")
+            logger.info(f"[{session_id if 'session_id' in dir() else 'req'}] 收到图片: {_img_info}")
         # 如果消息为空但有图片，设置默认提示
         if not message and user_images:
             message = "请查看这些图片"
@@ -1189,7 +1197,12 @@ window.toggleFullscreen = function() {{
                 model_chain = self._build_model_chain(agent_cfg, agent_path)
                 # [v1.16.14] 有图片时，优先使用支持 vision 的模型
                 model_chain = self._reorder_model_chain_for_images(model_chain, bool(user_images))
-                logger.info(f"[{session_id}] model_chain={'有' if model_chain else '无'}, llm={'有' if self.core.llm else '无'}")
+                # [v1.17.1] 详细模型链日志
+                _chain_detail = []
+                for _mc in model_chain:
+                    _chain_detail.append(f"{_mc.get('name','?')}@{_mc.get('model','?')}(modes={_mc.get('input_modes',[])}provider={_mc.get('provider','?')})")
+                logger.info(f"[{session_id}] model_chain ({len(model_chain)} models): {_chain_detail}")
+                logger.info(f"[{session_id}] user_images={len(user_images) if user_images else 0}, has_images={bool(user_images)}")
                 task_plan_context = self._build_task_plan_context(agent_path, chat_mode, message, session_id=session_id)
                 clean_message, agent_system_prompt = self._build_agent_chat_context(agent_path, agent_cfg, message)
@@ -4079,6 +4092,8 @@ window.toggleFullscreen = function() {{
                             break
             logger.debug(f"模型 {model_name} (id={mc_id}) input_modes={input_modes}")
+            # [v1.17.1] 改为 info 级别，方便排查
+            logger.info(f"[reorder] 模型 {model_name} (id={mc_id}) input_modes={input_modes}")
             if "image" in input_modes:
                 vision_models.append(mc)
             else:
@@ -4371,7 +4386,9 @@ window.toggleFullscreen = function() {{
                         await stream_response.write(("data: " + json.dumps({"type": "user_files", "images": _image_file_ids}) + "\n\n").encode())
                     except Exception:
                         pass
-                logger.info(f"[{session_id}] 用户发送了 {len(_processed_images)} 张图片")
+                # [v1.17.1] 调试日志
+                logger.info(f"[{session_id}] 用户发送了 {len(_processed_images)} 张图片, data_uri_len={len(_processed_images[0].get('url',''))}")
+                logger.info(f"[{session_id}] context.metadata['user_images'] 已设置, 将传给 main_agent")
         # [v1.16.12→17] 处理用户文件附件 — 保存到磁盘 + 提取文本附加到消息
         if user_files: