myagent-ai 1.28.1 → 1.28.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -802,7 +802,7 @@ class MainAgent(BaseAgent):
802
802
  # Step 1: 构建 Context XML
803
803
  # 获取 MemoryAgent 预加载的用户偏好/错误模式(如果有)
804
804
  _memory_ctx_prompt = context.working_memory.get("memory_context_prompt", "")
805
- context_xml = self.context_builder.build_context(
805
+ context_xml, context_static_xml, context_dynamic_xml = self.context_builder.build_context(
806
806
  agent_name=agent_name,
807
807
  agent_description=agent_description,
808
808
  session_id=context.session_id,
@@ -823,18 +823,26 @@ class MainAgent(BaseAgent):
823
823
  stream_callback,
824
824
  )
825
825
 
826
- # Step 2: 构建系统消息 — 将 context XML 插入 SYSTEM_PROMPT 的 "上下文" 占位处
826
+ # Step 2: 构建系统消息 — 拆分为静态(可缓存)和动态两部分
827
+ # 静态部分:SYSTEM_PROMPT 主体 + 工具列表 + 技能指令 + Agent 信息 + 运行环境
828
+ # 动态部分:datetime + 记忆 + 知识库 + 对话历史 + 用户输入 + 任务计划
827
829
  _CONTEXT_PLACEHOLDER = "__CONTEXT_PLACEHOLDER__"
828
- _prompt_with_placeholder = (
829
- "你是一个智能AI助手,请深入分析以下上下文内容。\n\n"
830
- + _CONTEXT_PLACEHOLDER + "\n\n"
831
- + self.SYSTEM_PROMPT.split("\n", 1)[1]
832
- )
833
- system_content = _prompt_with_placeholder.replace(_CONTEXT_PLACEHOLDER, context_xml)
834
- system_content = system_content + "\n最后,再检查输出格式,确保满足以下要求:" + self.xml_prompt
830
+ _prompt_header = "你是一个智能AI助手,请深入分析以下上下文内容。\n\n"
831
+ _prompt_tail = self.SYSTEM_PROMPT.split("\n", 1)[1] + "\n最后,再检查输出格式,确保满足以下要求:" + self.xml_prompt
832
+
833
+ # 构建 system 内容:[静态缓存头] + [动态区] + [静态尾]
834
+ # 这样同一个 session 的多轮对话中,静态部分可被 API 提供商缓存
835
+ static_head = _prompt_header + context_static_xml + "\n\n" + _prompt_tail
836
+ dynamic_mid = context_dynamic_xml
837
+
838
+ # 使用 content_parts 构建 Message(llm.py 会根据 provider 格式化)
839
+ content_parts = [
840
+ {"type": "text", "text": static_head, "cache_control": True}, # 可缓存
841
+ {"type": "text", "text": dynamic_mid}, # 动态,不缓存
842
+ ]
835
843
 
836
844
  # Step 3: 构建 LLM 消息(必须包含 role=user,否则 OpenAI 兼容 API 返回 400)
837
- messages = [Message(role="system", content=system_content)]
845
+ messages = [Message(role="system", content=content_parts)]
838
846
 
839
847
  if all_tool_outputs:
840
848
  messages.append(Message(
@@ -162,32 +162,43 @@ class ContextBuilder:
162
162
  # 优先使用 get_knowledge(LLM 指定的检索关键词),否则使用用户消息
163
163
  kb_query = get_knowledge.strip() if get_knowledge else query
164
164
 
165
- sections: List[str] = [
166
- self._build_datetime(),
165
+ # ── [v1.28.1] 按缓存特性分两组:静态段落(可缓存) vs 动态段落(每次变化) ──
166
+ # 静态段落:同 session 内基本不变,适合 prompt caching
167
+ static_sections: List[str] = [
167
168
  self._build_whomi(agent_name, agent_description, agent_override_prompt, agent_path=agent_path),
169
+ self._build_tools(self.skill_registry),
170
+ self._build_skill_prompts(self.skill_registry),
171
+ self._build_runtime_env(),
172
+ ]
173
+ # 动态段落:每轮 LLM 调用都可能不同
174
+ dynamic_sections: List[str] = [
175
+ self._build_datetime(),
168
176
  self._build_memory(query, session_id, recall, memory_context_prompt),
169
177
  self._build_knowledge(kb_query),
170
178
  # 轻量近期对话兜底:最近 3 轮对话摘要,补充 automemory 搜索的盲区
171
179
  self._build_recent_summary(session_id),
172
180
  self._build_user_input(user_typed_text, user_voice_text),
173
181
  self._build_task_plan(task_plan),
174
- self._build_tools(self.skill_registry),
175
- self._build_skill_prompts(self.skill_registry),
176
- self._build_runtime_env(),
177
182
  self._build_exec_warnings(),
178
183
  ]
179
184
 
180
- context_body = "\n".join(sections)
185
+ # 合并为完整 context XML(保持向后兼容)
186
+ all_sections = static_sections + dynamic_sections
187
+ context_body = "\n".join(all_sections)
181
188
  context_xml = f"<context>\n{context_body}\n</context>"
182
189
 
183
190
  # ── Token 预算检查与自动裁剪 ──
184
191
  context_xml = self._enforce_token_budget(context_xml)
185
192
 
193
+ # 构建静态/动态 XML 片段(供 prompt caching 使用)
194
+ static_xml = "<context>\n" + "\n".join(static_sections)
195
+ dynamic_xml = "\n".join(dynamic_sections) + "\n</context>"
196
+
186
197
  logger.debug(
187
198
  f"上下文已构建 (session={session_id}, 对话条数={len(conversation_history)}, "
188
- f"context长度={len(context_xml)})"
199
+ f"context长度={len(context_xml)}, static={len(static_xml)}, dynamic={len(dynamic_xml)})"
189
200
  )
190
- return context_xml
201
+ return context_xml, static_xml, dynamic_xml
191
202
 
192
203
  # =========================================================================
193
204
  # 各段落构建方法
package/core/llm.py CHANGED
@@ -49,7 +49,25 @@ class Message:
49
49
  metadata: Dict[str, Any] = field(default_factory=dict)
50
50
 
51
51
  def to_dict(self) -> dict:
52
- result = {"role": self.role, "content": self.content}
52
+ # [v1.28.1] 处理带 cache_control content parts
53
+ # content_parts: [{"type": "text", "text": "...", "cache_control": True}, ...]
54
+ # - OpenAI/兼容: 直接作为 content 数组发送(前缀相同会自动缓存)
55
+ # - Anthropic: 在 _chat_anthropic/_stream_anthropic 中提取并添加 cache_control
56
+ if isinstance(self.content, list) and self.content and isinstance(self.content[0], dict):
57
+ # 多模态 / content-parts 格式
58
+ cleaned = []
59
+ for item in self.content:
60
+ part = {"type": item.get("type", "text"), "text": item.get("text", "")}
61
+ # 保留 image_url 类型的完整结构
62
+ if item.get("type") == "image_url":
63
+ part["image_url"] = item["image_url"]
64
+ # 保留 cache_control 标记(供 Anthropic 路径使用)
65
+ if item.get("cache_control"):
66
+ part["cache_control"] = item["cache_control"]
67
+ cleaned.append(part)
68
+ result = {"role": self.role, "content": cleaned}
69
+ else:
70
+ result = {"role": self.role, "content": self.content}
53
71
  if self.name:
54
72
  result["name"] = self.name
55
73
  if self.tool_call_id:
@@ -238,6 +256,22 @@ class LLMClient:
238
256
  # 所有使用 OpenAI 兼容接口的提供商
239
257
  _OPENAI_COMPATIBLE_PROVIDERS = ("openai", "custom", "modelscope", "deepseek", "moonshot", "qwen", "dashscope")
240
258
 
259
+ @staticmethod
260
+ def _strip_cache_control(messages: list) -> list:
261
+ """[v1.28.1] 从消息列表中移除 cache_control 标记(OpenAI 不识别此字段)"""
262
+ cleaned = []
263
+ for msg in messages:
264
+ m = dict(msg)
265
+ content = m.get("content")
266
+ if isinstance(content, list):
267
+ new_content = []
268
+ for part in content:
269
+ p = {k: v for k, v in part.items() if k != "cache_control"}
270
+ new_content.append(p)
271
+ m["content"] = new_content
272
+ cleaned.append(m)
273
+ return cleaned
274
+
241
275
  @staticmethod
242
276
  def _convert_to_anthropic_content(content):
243
277
  """将 OpenAI Vision 格式的 content 转换为 Anthropic 格式
@@ -323,15 +357,19 @@ class LLMClient:
323
357
  logger.info(f"AsyncOpenAI 客户端已初始化 (model={self.model})")
324
358
 
325
359
  def _init_anthropic(self):
326
- """初始化 Anthropic 客户端"""
360
+ """初始化 Anthropic 客户端(启用 prompt caching)"""
327
361
  try:
328
362
  import anthropic
329
363
  key = self.extra.get("anthropic_api_key") or self.api_key
330
364
  if not key:
331
365
  raise ValueError("Anthropic API Key 未设置")
332
- self._client = anthropic.Anthropic(api_key=key)
366
+ # [v1.28.1] 启用 prompt-caching beta 功能
367
+ self._client = anthropic.Anthropic(
368
+ api_key=key,
369
+ default_headers={"anthropic-beta": "prompt-caching-2024-07-31"},
370
+ )
333
371
  self.model = self.model or "claude-3-sonnet-20240229"
334
- logger.info(f"Anthropic 客户端已初始化 (model={self.model})")
372
+ logger.info(f"Anthropic 客户端已初始化 (model={self.model}, prompt-caching=on)")
335
373
  except ImportError:
336
374
  raise ImportError("请安装 anthropic: pip install anthropic")
337
375
 
@@ -514,7 +552,10 @@ class LLMClient:
514
552
  # ------------------------------------------------------------------
515
553
 
516
554
  async def _chat_openai(self, kwargs: dict) -> LLMResponse:
517
- """OpenAI / 兼容接口调用 (异步)"""
555
+ """OpenAI / 兼容接口调用(异步)— 自动前缀缓存无需额外参数"""
556
+ # [v1.28.1] 清理 cache_control 标记(OpenAI 不识别此字段,依赖自动前缀缓存)
557
+ _cleaned_msgs = self._strip_cache_control(kwargs.get("messages", []))
558
+ kwargs = {**kwargs, "messages": _cleaned_msgs}
518
559
  try:
519
560
  response = await self._client.chat.completions.create(**kwargs)
520
561
  except Exception as api_err:
@@ -590,11 +631,11 @@ class LLMClient:
590
631
  )
591
632
 
592
633
  async def _chat_anthropic(self, messages: List[Message], kwargs: dict) -> LLMResponse:
593
- """Anthropic Claude 接口调用"""
634
+ """Anthropic Claude 接口调用(支持 prompt caching)"""
594
635
  loop = asyncio.get_running_loop()
595
636
 
596
637
  # 转换消息格式
597
- system_msg = ""
638
+ system_msg = None
598
639
  anth_messages = []
599
640
  for m in messages:
600
641
  if m.role == "system":
@@ -610,7 +651,19 @@ class LLMClient:
610
651
  "max_tokens": self.max_tokens,
611
652
  }
612
653
  if system_msg:
613
- create_kwargs["system"] = system_msg
654
+ # [v1.28.1] 支持 content block 格式(带 cache_control)
655
+ # system_msg 可以是 str 或 list[dict]
656
+ if isinstance(system_msg, list):
657
+ # content-parts 格式:转换为 Anthropic cache_control 格式
658
+ anth_system = []
659
+ for part in system_msg:
660
+ block = {"type": "text", "text": part.get("text", "")}
661
+ if part.get("cache_control"):
662
+ block["cache_control"] = {"type": "ephemeral"}
663
+ anth_system.append(block)
664
+ create_kwargs["system"] = anth_system
665
+ else:
666
+ create_kwargs["system"] = system_msg
614
667
 
615
668
  response = await loop.run_in_executor(
616
669
  None, lambda: self._client.messages.create(**create_kwargs)
@@ -628,12 +681,22 @@ class LLMClient:
628
681
  if not content.strip() and _reasoning.strip():
629
682
  content = _reasoning
630
683
 
684
+ # [v1.28.1] 记录缓存使用情况
685
+ _usage = {
686
+ "input_tokens": getattr(response.usage, "input_tokens", 0) or 0,
687
+ "output_tokens": getattr(response.usage, "output_tokens", 0) or 0,
688
+ }
689
+ if hasattr(response.usage, "cache_creation_input_tokens"):
690
+ _cc = response.usage.cache_creation_input_tokens or 0
691
+ _cr = response.usage.cache_read_input_tokens or 0
692
+ if _cc or _cr:
693
+ _usage["cache_creation"] = _cc
694
+ _usage["cache_read"] = _cr
695
+ logger.info(f"[Anthropic Prompt Cache] 创建={_cc} 读取={_cr} tokens")
696
+
631
697
  return LLMResponse(
632
698
  content=content,
633
- usage={
634
- "input_tokens": response.usage.input_tokens,
635
- "output_tokens": response.usage.output_tokens,
636
- },
699
+ usage=_usage,
637
700
  model=response.model,
638
701
  finish_reason=response.stop_reason or "",
639
702
  reasoning=_reasoning,
@@ -730,12 +793,15 @@ class LLMClient:
730
793
  raise
731
794
 
732
795
  async def _stream_openai(self, kwargs: dict) -> AsyncGenerator[str, None]:
733
- """OpenAI / 兼容接口 (含 Zhipu) 流式调用
796
+ """OpenAI / 兼容接口 (含 Zhipu) 流式调用 — 自动前缀缓存
734
797
 
735
798
  支持两种客户端:
736
799
  - AsyncOpenAI: 使用 async for 直接异步迭代
737
800
  - 同步 OpenAI: 在 executor 中同步迭代
738
801
  """
802
+ # [v1.28.1] 清理 cache_control 标记
803
+ _cleaned_msgs = self._strip_cache_control(kwargs.get("messages", []))
804
+ kwargs = {**kwargs, "messages": _cleaned_msgs}
739
805
  # 判断客户端类型,选择合适的流式迭代方式
740
806
  is_async = hasattr(self._client, '__aenter__')
741
807
 
@@ -764,11 +830,11 @@ class LLMClient:
764
830
  async def _stream_anthropic(
765
831
  self, messages: List[Message], kwargs: dict
766
832
  ) -> AsyncGenerator[str, None]:
767
- """Anthropic Claude 流式调用"""
833
+ """Anthropic Claude 流式调用(支持 prompt caching)"""
768
834
  loop = asyncio.get_running_loop()
769
835
 
770
836
  # 转换消息格式
771
- system_msg = ""
837
+ system_msg = None
772
838
  anth_messages = []
773
839
  for m in messages:
774
840
  if m.role == "system":
@@ -785,7 +851,17 @@ class LLMClient:
785
851
  "stream": True,
786
852
  }
787
853
  if system_msg:
788
- create_kwargs["system"] = system_msg
854
+ # [v1.28.1] 支持 content block 格式(带 cache_control)
855
+ if isinstance(system_msg, list):
856
+ anth_system = []
857
+ for part in system_msg:
858
+ block = {"type": "text", "text": part.get("text", "")}
859
+ if part.get("cache_control"):
860
+ block["cache_control"] = {"type": "ephemeral"}
861
+ anth_system.append(block)
862
+ create_kwargs["system"] = anth_system
863
+ else:
864
+ create_kwargs["system"] = system_msg
789
865
 
790
866
  def _create_stream():
791
867
  return self._client.messages.create(**create_kwargs)
@@ -806,6 +882,13 @@ class LLMClient:
806
882
  if event.type == "content_block_delta":
807
883
  if hasattr(event.delta, "text"):
808
884
  yield event.delta.text
885
+ # [v1.28.1] 记录流式缓存的 usage
886
+ elif event.type == "message_delta":
887
+ if hasattr(event, "usage"):
888
+ _cr = getattr(event.usage, "cache_read_input_tokens", 0) or 0
889
+ _cc = getattr(event.usage, "cache_creation_input_tokens", 0) or 0
890
+ if _cc or _cr:
891
+ logger.info(f"[Anthropic Cache/stream] 创建={_cc} 读取={_cr} tokens")
809
892
 
810
893
  async def _stream_ollama(self, kwargs: dict) -> AsyncGenerator[str, None]:
811
894
  """Ollama 流式调用"""
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.28.1",
3
+ "version": "1.28.2",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {