myagent-ai 1.18.2 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/web/api_server.py +76 -11
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.18.2",
3
+ "version": "1.18.3",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/web/api_server.py CHANGED
@@ -4061,13 +4061,14 @@ window.toggleFullscreen = function() {{
4061
4061
  return chain
4062
4062
 
4063
4063
  def _reorder_model_chain_for_images(self, model_chain: list[dict], has_images: bool) -> list[dict]:
4064
- """[v1.16.14→18] 当消息包含图片时,将支持 vision 的模型优先排列
4064
+ """[v1.16.14→18.2] 当消息包含图片时,将支持 vision 的模型优先排列
4065
4065
 
4066
4066
  优先从 model_chain 自带的 input_modes 读取(v1.16.18 改进),
4067
4067
  其次从 models_library 二次查找(兼容旧逻辑)。
4068
4068
 
4069
4069
  检查每个模型的 input_modes 字段,如果包含 "image",则优先使用。
4070
- 如果都没有 vision 能力,则保持原序(会触发 main_agent 的纯文本降级)。
4070
+ 如果 chain 中没有 vision 模型,自动从全局 models_library 查找 vision
4071
+ 兜底模型追加到 chain 末尾(v1.18.2 改进)。
4071
4072
  """
4072
4073
  if not has_images or not model_chain:
4073
4074
  return model_chain
@@ -4075,9 +4076,11 @@ window.toggleFullscreen = function() {{
4075
4076
  # 从 model_chain 自带的 input_modes 或 models_library 获取
4076
4077
  vision_models = []
4077
4078
  text_only_models = []
4079
+ chain_ids = set()
4078
4080
  for mc in model_chain:
4079
4081
  mc_id = mc.get("id", "")
4080
4082
  model_name = mc.get("name", mc.get("model", "?"))
4083
+ chain_ids.add(mc_id)
4081
4084
 
4082
4085
  # [v1.16.18] 优先使用 chain 自带的 input_modes
4083
4086
  input_modes = mc.get("input_modes", None)
@@ -4091,8 +4094,6 @@ window.toggleFullscreen = function() {{
4091
4094
  input_modes = me.input_modes or ["text"]
4092
4095
  break
4093
4096
 
4094
- logger.debug(f"模型 {model_name} (id={mc_id}) input_modes={input_modes}")
4095
- # [v1.17.1] 改为 info 级别,方便排查
4096
4097
  logger.info(f"[reorder] 模型 {model_name} (id={mc_id}) input_modes={input_modes}")
4097
4098
  if "image" in input_modes:
4098
4099
  vision_models.append(mc)
@@ -4102,13 +4103,43 @@ window.toggleFullscreen = function() {{
4102
4103
  if vision_models:
4103
4104
  logger.info(f"消息含图片,优先使用 vision 模型: {[m.get('name', m.get('model')) for m in vision_models]}")
4104
4105
  return vision_models + text_only_models
4105
- else:
4106
- # [v1.16.18] 详细日志帮助排查问题
4107
- chain_info = []
4108
- for mc in model_chain:
4109
- chain_info.append(f"{mc.get('name', mc.get('model', '?'))}(id={mc.get('id','')}, input_modes={mc.get('input_modes','?')})")
4110
- logger.warning(f"消息含图片,但模型链中无 vision 模型,链详情: {chain_info},将尝试当前主模型(失败后自动降级纯文本)")
4111
- return model_chain
4106
+
4107
+ # [v1.18.2] chain 中没有 vision 模型 → 从全局 models_library 自动找 vision 兜底
4108
+ llm_defaults = self.core.config.llm
4109
+ auto_vision = None
4110
+ for me in self.core.config.models_library:
4111
+ if me.id in chain_ids:
4112
+ continue # 跳过已在 chain 中的
4113
+ modes = me.input_modes or ["text"]
4114
+ if "image" in modes and me.enabled:
4115
+ auto_vision = me
4116
+ break
4117
+
4118
+ if auto_vision:
4119
+ vision_mc = {
4120
+ "id": auto_vision.id,
4121
+ "name": auto_vision.name,
4122
+ "provider": auto_vision.provider or llm_defaults.provider,
4123
+ "api_type": getattr(auto_vision, 'api_type', '') or llm_defaults.api_type,
4124
+ "model": auto_vision.model or auto_vision.id,
4125
+ "base_url": auto_vision.base_url or llm_defaults.base_url,
4126
+ "api_key": auto_vision.api_key or llm_defaults.api_key,
4127
+ "temperature": auto_vision.temperature if auto_vision.temperature is not None else llm_defaults.temperature,
4128
+ "max_tokens": auto_vision.max_tokens if auto_vision.max_tokens else llm_defaults.max_tokens,
4129
+ "context_window": getattr(auto_vision, 'context_window', None) or llm_defaults.context_window,
4130
+ "input_modes": list(auto_vision.input_modes or ["text"]),
4131
+ "is_backup": True,
4132
+ "_auto_vision": True, # 标记:自动追加的 vision 兜底
4133
+ }
4134
+ logger.warning(f"消息含图片,agent 模型链无 vision 模型,自动追加全局 vision 兜底: {auto_vision.name} (id={auto_vision.id})")
4135
+ return [vision_mc] + text_only_models
4136
+
4137
+ # 既没有 chain 内 vision,也没有全局兜底 → 保持原序,让 main_agent 降级纯文本
4138
+ chain_info = []
4139
+ for mc in model_chain:
4140
+ chain_info.append(f"{mc.get('name', mc.get('model', '?'))}(id={mc.get('id','')}, modes={mc.get('input_modes','?')})")
4141
+ logger.warning(f"消息含图片,但无可用 vision 模型,链详情: {chain_info},将降级纯文本")
4142
+ return model_chain
4112
4143
 
4113
4144
  async def _try_model_chain(self, model_chain: list[dict], message: str, session_id: str,
4114
4145
  agent_path: str = None, agent_system_prompt: str = None,
@@ -4135,6 +4166,7 @@ window.toggleFullscreen = function() {{
4135
4166
 
4136
4167
  for i, mc in enumerate(model_chain):
4137
4168
  is_backup = mc.get("is_backup", False)
4169
+ is_auto_vision = mc.get("_auto_vision", False)
4138
4170
  model_label = f"{'备用' if is_backup else '主'}模型 {mc.get('name', mc.get('model', '?'))}"
4139
4171
  logger.info(f"尝试 {model_label} ({i+1}/{len(model_chain)}): provider={mc.get('provider')}, model={mc.get('model')}")
4140
4172
 
@@ -4208,6 +4240,10 @@ window.toggleFullscreen = function() {{
4208
4240
  used_model_name = model_label
4209
4241
  if is_backup:
4210
4242
  logger.warning(f"🔄 主模型失败,成功切换到 {model_label}")
4243
+ # [v1.18.2] 自动 vision 兜底成功的提醒
4244
+ if is_auto_vision:
4245
+ _hint = f"💡 当前绑定的模型不支持图片,已自动切换到 {mc.get('name', mc.get('model', '?'))} 处理。\n\n"
4246
+ return _hint + response
4211
4247
  return response
4212
4248
 
4213
4249
  last_error = response
@@ -4265,6 +4301,8 @@ window.toggleFullscreen = function() {{
4265
4301
  """_try_model_chain_stream 的实际执行体(已在 _model_chain_lock 保护下)"""
4266
4302
  llm = self.core.llm
4267
4303
  full_text = ""
4304
+ _auto_vision_switched = False # [v1.18.2] 标记是否发生了 vision 自动切换
4305
+ _auto_vision_model_name = ""
4268
4306
 
4269
4307
  for i, mc in enumerate(model_chain):
4270
4308
  orig = {
@@ -4288,6 +4326,11 @@ window.toggleFullscreen = function() {{
4288
4326
  agent.context_builder.context_window = mc["context_window"]
4289
4327
  llm._client = None
4290
4328
 
4329
+ # [v1.18.2] 检测是否是自动追加的 vision 兜底模型
4330
+ is_auto_vision = mc.get("_auto_vision", False)
4331
+ if is_auto_vision:
4332
+ _auto_vision_model_name = mc.get("name", mc.get("model", "?"))
4333
+
4291
4334
  # Pass agent context through AgentContext instead of instance attrs
4292
4335
  result = await self._stream_process_message(
4293
4336
  message, session_id, stream_response,
@@ -4296,6 +4339,11 @@ window.toggleFullscreen = function() {{
4296
4339
  user_images=user_images, user_files=user_files,
4297
4340
  )
4298
4341
  if result and not result.startswith("⚠️") and not result.startswith("❌"):
4342
+ # [v1.18.2] 如果是通过自动 vision 兜底成功的,在响应前追加提醒
4343
+ if is_auto_vision and user_images:
4344
+ _hint = f"💡 当前绑定的模型不支持图片,已自动切换到 {_auto_vision_model_name} 处理。\n\n"
4345
+ logger.info(f"自动 vision 兜底成功: {_auto_vision_model_name}")
4346
+ return _hint + result
4299
4347
  return result
4300
4348
  # 如果返回了错误消息,保存它以便最后返回
4301
4349
  if result:
@@ -4314,6 +4362,23 @@ window.toggleFullscreen = function() {{
4314
4362
  if hasattr(agent, 'context_builder') and agent.context_builder:
4315
4363
  agent.context_builder.context_window = orig["context_window"]
4316
4364
 
4365
+ # [v1.18.2] 所有模型都失败,且包含图片,给出明确的配置提示
4366
+ if user_images and full_text and "不支持图片" in full_text:
4367
+ # 查找可用的 vision 模型名称
4368
+ vision_names = []
4369
+ for me in self.core.config.models_library:
4370
+ modes = me.input_modes or ["text"]
4371
+ if "image" in modes and me.enabled:
4372
+ vision_names.append(me.name or me.id)
4373
+ if vision_names:
4374
+ return (f"⚠️ 当前绑定的模型不支持图片识别,自动切换也未找到可用的图片模型。\n\n"
4375
+ f"📋 模型库中支持图片的模型: {', '.join(vision_names[:5])}\n\n"
4376
+ f"请在 agent 设置中将其中一个绑定为模型或备用模型,以便识别图片。")
4377
+ else:
4378
+ return (f"⚠️ 当前绑定的模型不支持图片识别,且模型库中没有任何支持图片的模型。\n\n"
4379
+ f"请在 models_library 中添加一个支持 vision 的模型(input_modes 包含 \"image\"),"
4380
+ f"然后绑定到当前 agent。")
4381
+
4317
4382
  return full_text
4318
4383
 
4319
4384
  async def _stream_text_chunked(self, text: str, write_sse, chunk_size: int = 4, delay: float = 0.015):