myagent-ai 1.17.0 → 1.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -644,6 +644,9 @@ class MainAgent(BaseAgent):
644
644
  else:
645
645
  # [v1.16.12] 支持多模态消息(图片+文本)
646
646
  user_images = context.metadata.get("user_images", [])
647
+ # [v1.17.1] 图片调试
648
+ if user_images:
649
+ logger.info(f"[{task_id}] 检测到 {len(user_images)} 张图片, 构建多模态消息")
647
650
  if user_images:
648
651
  # OpenAI Vision 格式: [{type: "text"}, {type: "image_url"}]
649
652
  multimodal_content = [{"type": "text", "text": context.user_message or "请描述这些图片。"}]
@@ -653,6 +656,8 @@ class MainAgent(BaseAgent):
653
656
  "type": "image_url",
654
657
  "image_url": {"url": img["url"]}
655
658
  })
659
+ logger.info(f"[{task_id}] 多模态消息构建完成: {len(multimodal_content)} parts, content_types={[p.get('type') for p in multimodal_content]}")
660
+ logger.info(f"[{task_id}] LLM model={self.llm.model}, provider={self.llm.provider}")
656
661
  messages.append(Message(role="user", content=multimodal_content))
657
662
  else:
658
663
  messages.append(Message(
@@ -701,9 +706,15 @@ class MainAgent(BaseAgent):
701
706
  )
702
707
  break
703
708
 
704
- # [v1.16.13] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
705
- _vision_keywords = ["doesn't support image", "does not support image", "model_incompatible", "image input", "not support vision", "unsupported multimodal", "image capability"]
706
- if any(kw in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
709
+ # [v1.16.13→17.0] 特殊处理模型不支持图片输入 — 去掉图片用纯文本重试
710
+ # 支持中英文错误关键词匹配(ModelScope 等国产 API 可能返回中文错误)
711
+ _vision_keywords = [
712
+ "doesn't support image", "does not support image", "model_incompatible",
713
+ "image input", "not support vision", "unsupported multimodal", "image capability",
714
+ "不支持图片", "不支持图像", "图片输入", "图像输入", "不支持多模态",
715
+ "视觉", "image_url", "multimodal", "vision",
716
+ ]
717
+ if any(kw.lower() in _llm_error.lower() for kw in _vision_keywords) and context.metadata.get("user_images"):
707
718
  logger.warning(f"[{task_id}] 模型不支持图片输入,去掉图片用纯文本重试")
708
719
  context.metadata["user_images"] = []
709
720
  # 用纯文本消息替换最后一条多模态消息
package/config.py CHANGED
@@ -316,8 +316,21 @@ class ConfigManager:
316
316
  elif isinstance(value, list) and key == "models_library":
317
317
  models = []
318
318
  for item in value:
319
+ # [v1.17.0] 兼容 "input" → "input_modes" 字段映射
320
+ # 用户配置中常使用 "input": ["text", "image"],内部字段名是 "input_modes"
321
+ normalized = dict(item)
322
+ if "input" in normalized and "input_modes" not in normalized:
323
+ normalized["input_modes"] = normalized.pop("input")
324
+ if "baseUrl" in normalized and "base_url" not in normalized:
325
+ normalized["base_url"] = normalized.pop("baseUrl")
326
+ if "apiKey" in normalized and "api_key" not in normalized:
327
+ normalized["api_key"] = normalized.pop("apiKey")
328
+ if "reasoning" in normalized:
329
+ # 确保 reasoning 是 bool 类型
330
+ v = normalized["reasoning"]
331
+ normalized["reasoning"] = bool(v) if not isinstance(v, bool) else v
319
332
  me = ModelEntry(**{
320
- k: v for k, v in item.items() if k in ModelEntry.__dataclass_fields__
333
+ k: v for k, v in normalized.items() if k in ModelEntry.__dataclass_fields__
321
334
  })
322
335
  models.append(me)
323
336
  setattr(target, key, models)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.17.0",
3
+ "version": "1.17.2",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -1091,6 +1091,14 @@ window.toggleFullscreen = function() {{
1091
1091
  # 支持文件附件(文档等,提取文本后作为上下文)
1092
1092
  user_files = data.get("files", []) # [{"name": "xxx.pdf", "type": "application/pdf", "data": "base64..."}, ...]
1093
1093
 
1094
+ # [v1.17.1] 图片调试日志
1095
+ if user_images:
1096
+ _img_info = []
1097
+ for _img in user_images:
1098
+ _b64len = len(_img.get("data", "")) if isinstance(_img.get("data"), str) else 0
1099
+ _img_info.append(f"{ _img.get('name','?') }(type={_img.get('type','?')}, b64len={_b64len})")
1100
+ logger.info(f"[{session_id if 'session_id' in dir() else 'req'}] 收到图片: {_img_info}")
1101
+
1094
1102
  # 如果消息为空但有图片,设置默认提示
1095
1103
  if not message and user_images:
1096
1104
  message = "请查看这些图片"
@@ -1189,7 +1197,12 @@ window.toggleFullscreen = function() {{
1189
1197
  model_chain = self._build_model_chain(agent_cfg, agent_path)
1190
1198
  # [v1.16.14] 有图片时,优先使用支持 vision 的模型
1191
1199
  model_chain = self._reorder_model_chain_for_images(model_chain, bool(user_images))
1192
- logger.info(f"[{session_id}] model_chain={'有' if model_chain else '无'}, llm={'有' if self.core.llm else '无'}")
1200
+ # [v1.17.1] 详细模型链日志
1201
+ _chain_detail = []
1202
+ for _mc in model_chain:
1203
+ _chain_detail.append(f"{_mc.get('name','?')}@{_mc.get('model','?')}(modes={_mc.get('input_modes',[])}provider={_mc.get('provider','?')})")
1204
+ logger.info(f"[{session_id}] model_chain ({len(model_chain)} models): {_chain_detail}")
1205
+ logger.info(f"[{session_id}] user_images={len(user_images) if user_images else 0}, has_images={bool(user_images)}")
1193
1206
 
1194
1207
  task_plan_context = self._build_task_plan_context(agent_path, chat_mode, message, session_id=session_id)
1195
1208
  clean_message, agent_system_prompt = self._build_agent_chat_context(agent_path, agent_cfg, message)
@@ -4079,6 +4092,8 @@ window.toggleFullscreen = function() {{
4079
4092
  break
4080
4093
 
4081
4094
  logger.debug(f"模型 {model_name} (id={mc_id}) input_modes={input_modes}")
4095
+ # [v1.17.1] 改为 info 级别,方便排查
4096
+ logger.info(f"[reorder] 模型 {model_name} (id={mc_id}) input_modes={input_modes}")
4082
4097
  if "image" in input_modes:
4083
4098
  vision_models.append(mc)
4084
4099
  else:
@@ -4371,7 +4386,9 @@ window.toggleFullscreen = function() {{
4371
4386
  await stream_response.write(("data: " + json.dumps({"type": "user_files", "images": _image_file_ids}) + "\n\n").encode())
4372
4387
  except Exception:
4373
4388
  pass
4374
- logger.info(f"[{session_id}] 用户发送了 {len(_processed_images)} 张图片")
4389
+ # [v1.17.1] 调试日志
4390
+ logger.info(f"[{session_id}] 用户发送了 {len(_processed_images)} 张图片, data_uri_len={len(_processed_images[0].get('url',''))}")
4391
+ logger.info(f"[{session_id}] context.metadata['user_images'] 已设置, 将传给 main_agent")
4375
4392
 
4376
4393
  # [v1.16.12→17] 处理用户文件附件 — 保存到磁盘 + 提取文本附加到消息
4377
4394
  if user_files: