PyPI - letta-nightly - Versions diffs - 0.8.4.dev20250618104304__py3-none-any.whl → 0.8.5.dev20250619180801__py3-none-any.whl - Mend

letta-nightly 0.8.4.dev20250618104304py3-none-any.whl → 0.8.5.dev20250619180801py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

letta/__init__.py +1 -1
letta/agents/letta_agent.py +54 -20
letta/agents/voice_agent.py +47 -31
letta/constants.py +1 -1
letta/data_sources/redis_client.py +11 -6
letta/functions/function_sets/builtin.py +35 -11
letta/functions/prompts.py +26 -0
letta/functions/types.py +6 -0
letta/interfaces/openai_chat_completions_streaming_interface.py +0 -1
letta/llm_api/anthropic.py +9 -1
letta/llm_api/anthropic_client.py +22 -3
letta/llm_api/aws_bedrock.py +10 -6
letta/llm_api/llm_api_tools.py +3 -0
letta/llm_api/openai_client.py +1 -1
letta/orm/agent.py +14 -1
letta/orm/job.py +3 -0
letta/orm/provider.py +3 -1
letta/schemas/agent.py +7 -0
letta/schemas/embedding_config.py +8 -0
letta/schemas/enums.py +0 -1
letta/schemas/job.py +1 -0
letta/schemas/providers.py +13 -5
letta/server/rest_api/routers/v1/agents.py +76 -35
letta/server/rest_api/routers/v1/providers.py +7 -7
letta/server/rest_api/routers/v1/sources.py +39 -19
letta/server/rest_api/routers/v1/tools.py +96 -31
letta/services/agent_manager.py +8 -2
letta/services/file_processor/chunker/llama_index_chunker.py +89 -1
letta/services/file_processor/embedder/openai_embedder.py +6 -1
letta/services/file_processor/parser/mistral_parser.py +2 -2
letta/services/helpers/agent_manager_helper.py +44 -16
letta/services/job_manager.py +35 -17
letta/services/mcp/base_client.py +26 -1
letta/services/mcp_manager.py +33 -18
letta/services/provider_manager.py +30 -0
letta/services/tool_executor/builtin_tool_executor.py +335 -43
letta/services/tool_manager.py +25 -1
letta/services/user_manager.py +1 -1
letta/settings.py +3 -0
{letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/METADATA +4 -3
{letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/RECORD +44 -42
{letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.4.dev20250618104304.dist-info → letta_nightly-0.8.5.dev20250619180801.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
-__version__ = "0.8.4"
+__version__ = "0.8.5"
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]

letta/agents/letta_agent.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import json
 import uuid
+from datetime import datetime
 from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
 from openai import AsyncStream
@@ -13,7 +14,7 @@ from letta.agents.helpers import _create_letta_response, _prepare_in_context_mes
 from letta.constants import DEFAULT_MAX_STEPS
 from letta.errors import ContextWindowExceededError
 from letta.helpers import ToolRulesSolver
-from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
+from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
 from letta.helpers.tool_execution_helper import enable_strict_mode
 from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
 from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -25,7 +26,7 @@ from letta.orm.enums import ToolType
 from letta.otel.context import get_ctx_attributes
 from letta.otel.metric_registry import MetricRegistry
 from letta.otel.tracing import log_event, trace_method, tracer
-from letta.schemas.agent import AgentState
+from letta.schemas.agent import AgentState, UpdateAgent
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_message import MessageType
 from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -283,8 +284,13 @@ class LettaAgent(BaseAgent):
         # log request time
         if request_start_timestamp_ns:
             now = get_utc_timestamp_ns()
-            request_ns = now - request_start_timestamp_ns
-            request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
+            duration_ms = ns_to_ms(now - request_start_timestamp_ns)
+            request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
+            # update agent's last run metrics
+            now_datetime = get_utc_time()
+            await self._update_agent_last_run_metrics(now_datetime, duration_ms)
         request_span.end()
         # Return back usage
@@ -410,8 +416,13 @@ class LettaAgent(BaseAgent):
         # log request time
         if request_start_timestamp_ns:
             now = get_utc_timestamp_ns()
-            request_ns = now - request_start_timestamp_ns
-            request_span.add_event(name="request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
+            duration_ms = ns_to_ms(now - request_start_timestamp_ns)
+            request_span.add_event(name="request_ms", attributes={"duration_ms": duration_ms})
+            # update agent's last run metrics
+            now_datetime = get_utc_time()
+            await self._update_agent_last_run_metrics(now_datetime, duration_ms)
         request_span.end()
         # Extend the in context message ids
@@ -426,6 +437,16 @@ class LettaAgent(BaseAgent):
         return current_in_context_messages, new_in_context_messages, usage, stop_reason
+    async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
+        try:
+            await self.agent_manager.update_agent_async(
+                agent_id=self.agent_id,
+                agent_update=UpdateAgent(last_run_completion=completion_time, last_run_duration_ms=duration_ms),
+                actor=self.actor,
+            )
+        except Exception as e:
+            logger.error(f"Failed to update agent's last run metrics: {e}")
     @trace_method
     async def step_stream(
         self,
@@ -631,8 +652,13 @@ class LettaAgent(BaseAgent):
         # log time of entire request
         if request_start_timestamp_ns:
             now = get_utc_timestamp_ns()
-            request_ns = now - request_start_timestamp_ns
-            request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(request_ns)})
+            duration_ms = ns_to_ms(now - request_start_timestamp_ns)
+            request_span.add_event(name="letta_request_ms", attributes={"duration_ms": duration_ms})
+            # update agent's last run metrics
+            completion_time = get_utc_time()
+            await self._update_agent_last_run_metrics(completion_time, duration_ms)
         request_span.end()
         for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
@@ -913,13 +939,13 @@ class LettaAgent(BaseAgent):
         except AssertionError:
             tool_args = json.loads(tool_args)
+        # Get request heartbeats and coerce to bool
+        request_heartbeat = tool_args.pop("request_heartbeat", False)
         if is_final_step:
             stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
             logger.info("Agent has reached max steps.")
             request_heartbeat = False
         else:
-            # Get request heartbeats and coerce to bool
-            request_heartbeat = tool_args.pop("request_heartbeat", False)
             # Pre-emptively pop out inner_thoughts
             tool_args.pop(INNER_THOUGHTS_KWARG, "")
@@ -940,7 +966,10 @@ class LettaAgent(BaseAgent):
             tool_call_id=tool_call_id,
             request_heartbeat=request_heartbeat,
         )
-        if tool_call_name not in valid_tool_names:
+        # Check if tool rule is violated - if so, we'll force continuation
+        tool_rule_violated = tool_call_name not in valid_tool_names
+        if tool_rule_violated:
             base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
             violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
             if violated_rule_messages:
@@ -969,7 +998,7 @@ class LettaAgent(BaseAgent):
         # get the function response limit
         target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
-        return_char_limit = target_tool.return_char_limit
+        return_char_limit = target_tool.return_char_limit if target_tool else None
         function_response_string = validate_function_response(
             tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
         )
@@ -981,15 +1010,20 @@ class LettaAgent(BaseAgent):
         # 4. Register tool call with tool rule solver
         # Resolve whether or not to continue stepping
         continue_stepping = request_heartbeat
-        tool_rules_solver.register_tool_call(tool_name=tool_call_name)
-        if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
-            if continue_stepping:
-                stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
-            continue_stepping = False
-        elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
-            continue_stepping = True
-        elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
+        # Force continuation if tool rule was violated to give the model another chance
+        if tool_rule_violated:
             continue_stepping = True
+        else:
+            tool_rules_solver.register_tool_call(tool_name=tool_call_name)
+            if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
+                if continue_stepping:
+                    stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
+                continue_stepping = False
+            elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
+                continue_stepping = True
+            elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
+                continue_stepping = True
         # 5a. Persist Steps to DB
         # Following agent loop to persist this before messages

letta/agents/voice_agent.py CHANGED Viewed

@@ -9,14 +9,9 @@ import openai
 from letta.agents.base_agent import BaseAgent
 from letta.agents.exceptions import IncompatibleAgentType
 from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
-from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
+from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG, REQUEST_HEARTBEAT_PARAM
 from letta.helpers.datetime_helpers import get_utc_time
-from letta.helpers.tool_execution_helper import (
-    add_pre_execution_message,
-    enable_strict_mode,
-    execute_external_tool,
-    remove_request_heartbeat,
-)
+from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
 from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
 from letta.log import get_logger
 from letta.orm.enums import ToolType
@@ -47,6 +42,7 @@ from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
 from letta.services.summarizer.enums import SummarizationMode
 from letta.services.summarizer.summarizer import Summarizer
+from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
 from letta.settings import model_settings
 logger = get_logger(__name__)
@@ -124,7 +120,11 @@ class VoiceAgent(BaseAgent):
         user_query = input_messages[0].content[0].text
-        agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
+        agent_state = await self.agent_manager.get_agent_by_id_async(
+            agent_id=self.agent_id,
+            include_relationships=["tools", "memory", "tool_exec_environment_variables", "multi_agent_group"],
+            actor=self.actor,
+        )
         # TODO: Refactor this so it uses our in-house clients
         # TODO: For now, piggyback off of OpenAI client for ease
@@ -332,7 +332,12 @@ class VoiceAgent(BaseAgent):
     def _build_tool_schemas(self, agent_state: AgentState, external_tools_only=True) -> List[Tool]:
         if external_tools_only:
-            tools = [t for t in agent_state.tools if t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
+            tools = [
+                t
+                for t in agent_state.tools
+                if t.tool_type
+                in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM, ToolType.LETTA_FILES_CORE, ToolType.LETTA_BUILTIN, ToolType.EXTERNAL_MCP}
+            ]
         else:
             tools = agent_state.tools
@@ -401,12 +406,10 @@ class VoiceAgent(BaseAgent):
     async def _execute_tool(self, user_query: str, tool_name: str, tool_args: dict, agent_state: AgentState) -> "ToolExecutionResult":
         """
-        Executes a tool and returns (result, success_flag).
+        Executes a tool and returns the ToolExecutionResult.
         """
         from letta.schemas.tool_execution_result import ToolExecutionResult
-        print("EXECUTING TOOL")
         # Special memory case
         if tool_name == "search_memory":
             tool_result = await self._search_memory(
@@ -420,26 +423,39 @@ class VoiceAgent(BaseAgent):
                 func_return=tool_result,
                 status="success",
             )
-        else:
-            target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
-            if not target_tool:
-                return ToolExecutionResult(
-                    func_return=f"Tool not found: {tool_name}",
-                    status="error",
-                )
-            try:
-                tool_result, _ = execute_external_tool(
-                    agent_state=agent_state,
-                    function_name=tool_name,
-                    function_args=tool_args,
-                    target_letta_tool=target_tool,
-                    actor=self.actor,
-                    allow_agent_state_modifications=False,
-                )
-                return ToolExecutionResult(func_return=tool_result, status="success")
-            except Exception as e:
-                return ToolExecutionResult(func_return=f"Failed to call tool. Error: {e}", status="error")
+        # Find the target tool
+        target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
+        if not target_tool:
+            return ToolExecutionResult(
+                func_return=f"Tool {tool_name} not found",
+                status="error",
+            )
+        # Use ToolExecutionManager for modern tool execution
+        sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
+        tool_execution_manager = ToolExecutionManager(
+            agent_state=agent_state,
+            message_manager=self.message_manager,
+            agent_manager=self.agent_manager,
+            block_manager=self.block_manager,
+            passage_manager=self.passage_manager,
+            sandbox_env_vars=sandbox_env_vars,
+            actor=self.actor,
+        )
+        # Remove request heartbeat / pre_exec_message
+        tool_args.pop(PRE_EXECUTION_MESSAGE_ARG, None)
+        tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
+        tool_execution_result = await tool_execution_manager.execute_tool_async(
+            function_name=tool_name,
+            function_args=tool_args,
+            tool=target_tool,
+            step_id=None,  # VoiceAgent doesn't use step tracking currently
+        )
+        return tool_execution_result
     async def _search_memory(
         self,

letta/constants.py CHANGED Viewed

@@ -65,7 +65,7 @@ DEFAULT_EMBEDDING_CHUNK_SIZE = 300
 # tokenizers
 EMBEDDING_TO_TOKENIZER_MAP = {
-    "text-embedding-ada-002": "cl100k_base",
+    "text-embedding-3-small": "cl100k_base",
 }
 EMBEDDING_TO_TOKENIZER_DEFAULT = "cl100k_base"

letta/data_sources/redis_client.py CHANGED Viewed

@@ -290,12 +290,17 @@ async def get_redis_client() -> AsyncRedisClient:
         try:
             from letta.settings import settings
-            _client_instance = AsyncRedisClient(
-                host=settings.redis_host or "localhost",
-                port=settings.redis_port or 6379,
-            )
-            await _client_instance.wait_for_ready(timeout=5)
-            logger.info("Redis client initialized")
+            # If Redis settings are not configured, use noop client
+            if settings.redis_host is None or settings.redis_port is None:
+                logger.info("Redis not configured, using noop client")
+                _client_instance = NoopAsyncRedisClient()
+            else:
+                _client_instance = AsyncRedisClient(
+                    host=settings.redis_host,
+                    port=settings.redis_port,
+                )
+                await _client_instance.wait_for_ready(timeout=5)
+                logger.info("Redis client initialized")
         except Exception as e:
             logger.warning(f"Failed to initialize Redis: {e}")
             _client_instance = NoopAsyncRedisClient()

letta/functions/function_sets/builtin.py CHANGED Viewed

@@ -1,27 +1,51 @@
-from typing import Literal
+from typing import List, Literal
+from letta.functions.types import SearchTask
-async def web_search(query: str) -> str:
+def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
     """
-    Search the web for information.
+    Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
     Args:
-        query (str): The query to search the web for.
+        code (str): The code to run.
+        language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
     Returns:
-        str: The search results.
+        str: The output of the code, the stdout, the stderr, and error traces (if any).
     """
     raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
-def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
+async def web_search(
+    tasks: List[SearchTask],
+    limit: int = 3,
+    return_raw: bool = False,
+) -> str:
     """
-    Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
+    Search the web with a list of query/question pairs and extract passages that answer the corresponding questions.
+    Examples:
+    tasks -> [
+        SearchTask(
+            query="Tesla Q1 2025 earnings report PDF",
+            question="What was Tesla's net profit in Q1 2025?"
+        ),
+        SearchTask(
+            query="Letta API prebuilt tools core_memory_append",
+            question="What does the core_memory_append tool do in Letta?"
+        )
+    ]
     Args:
-        code (str): The code to run.
-        language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
+        tasks (List[SearchTask]): A list of search tasks, each containing a `query` and a corresponding `question`.
+        limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 3.
+        return_raw (bool, optional): If set to True, returns the raw content of the web pages.
+                                     This should be False unless otherwise specified by the user. Defaults to False.
     Returns:
-        str: The output of the code, the stdout, the stderr, and error traces (if any).
+        str: A JSON-encoded string containing a list of search results.
+             Each result includes ranked snippets with their source URLs and relevance scores,
+             corresponding to each search task.
     """
     raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")

letta/functions/prompts.py ADDED Viewed

@@ -0,0 +1,26 @@
+FIRECRAWL_SEARCH_SYSTEM_PROMPT = """You are an expert at extracting relevant information from web content.
+Given a document with line numbers (format: "LINE_NUM: content"), identify passages that answer the provided question by returning line ranges:
+- start_line: The starting line number (inclusive)
+- end_line: The ending line number (inclusive)
+SELECTION PRINCIPLES:
+1. Prefer comprehensive passages that include full context
+2. Capture complete thoughts, examples, and explanations
+3. When relevant content spans multiple paragraphs, include the entire section
+4. Favor fewer, substantial passages over many fragments
+Focus on passages that can stand alone as complete, meaningful responses."""
+def get_firecrawl_search_user_prompt(query: str, question: str, numbered_content: str) -> str:
+    """Generate the user prompt for line-number based search analysis."""
+    return f"""Search Query: {query}
+Question to Answer: {question}
+Document Content (with line numbers):
+{numbered_content}
+Identify line ranges that best answer: "{question}"
+Select comprehensive passages with full context. Include entire sections when relevant."""

letta/functions/types.py ADDED Viewed

@@ -0,0 +1,6 @@
+from pydantic import BaseModel, Field
+class SearchTask(BaseModel):
+    query: str = Field(description="Search query for web search")
+    question: str = Field(description="Question to answer from search results, considering full conversation context")

letta/interfaces/openai_chat_completions_streaming_interface.py CHANGED Viewed

@@ -16,7 +16,6 @@ class OpenAIChatCompletionsStreamingInterface:
     """
     def __init__(self, stream_pre_execution_message: bool = True):
-        print("CHAT COMPLETITION INTERFACE")
         self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
         self.stream_pre_execution_message: bool = stream_pre_execution_message

letta/llm_api/anthropic.py CHANGED Viewed

@@ -823,12 +823,20 @@ def anthropic_chat_completions_request(
 def anthropic_bedrock_chat_completions_request(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    provider_name: Optional[str] = None,
+    provider_category: Optional[ProviderCategory] = None,
+    user_id: Optional[str] = None,
 ) -> ChatCompletionResponse:
     """Make a chat completion request to Anthropic via AWS Bedrock."""
     data = _prepare_anthropic_request(data, inner_thoughts_xml_tag, bedrock=True)
     # Get the client
-    client = get_bedrock_client()
+    if provider_category == ProviderCategory.byok:
+        actor = UserManager().get_user_or_default(user_id=user_id)
+        access_key, secret_key, region = ProviderManager().get_bedrock_credentials_async(provider_name, actor=actor)
+        client = get_bedrock_client(access_key, secret_key, region)
+    else:
+        client = get_bedrock_client()
     # Make the request
     try:

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -243,7 +243,8 @@ class AnthropicClient(LLMClientBase):
         # Move 'system' to the top level
         if messages[0].role != "system":
             raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
-        data["system"] = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
+        system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
+        data["system"] = self._add_cache_control_to_system_message(system_content)
         data["messages"] = [
             m.to_anthropic_dict(
                 inner_thoughts_xml_tag=inner_thoughts_xml_tag,
@@ -315,9 +316,11 @@ class AnthropicClient(LLMClientBase):
         if isinstance(e, anthropic.BadRequestError):
             logger.warning(f"[Anthropic] Bad request: {str(e)}")
-            if "prompt is too long" in str(e).lower():
-                # If the context window is too large, we expect to receive:
+            error_str = str(e).lower()
+            if "prompt is too long" in error_str or "exceed context limit" in error_str:
+                # If the context window is too large, we expect to receive either:
                 # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 200758 tokens > 200000 maximum'}}
+                # 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'input length and `max_tokens` exceed context limit: 173298 + 32000 > 200000, decrease input length or `max_tokens` and try again'}}
                 return ContextWindowExceededError(
                     message=f"Bad request to Anthropic (context window exceeded): {str(e)}",
                 )
@@ -490,6 +493,22 @@ class AnthropicClient(LLMClientBase):
         return chat_completion_response
+    def _add_cache_control_to_system_message(self, system_content):
+        """Add cache control to system message content"""
+        if isinstance(system_content, str):
+            # For string content, convert to list format with cache control
+            return [{"type": "text", "text": system_content, "cache_control": {"type": "ephemeral"}}]
+        elif isinstance(system_content, list):
+            # For list content, add cache control to the last text block
+            cached_content = system_content.copy()
+            for i in range(len(cached_content) - 1, -1, -1):
+                if cached_content[i].get("type") == "text":
+                    cached_content[i]["cache_control"] = {"type": "ephemeral"}
+                    break
+            return cached_content
+        return system_content
 def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
     """See: https://docs.anthropic.com/claude/docs/tool-use

letta/llm_api/aws_bedrock.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from anthropic import AnthropicBedrock
@@ -19,7 +19,11 @@ def has_valid_aws_credentials() -> bool:
     return valid_aws_credentials
-def get_bedrock_client():
+def get_bedrock_client(
+    access_key: Optional[str] = None,
+    secret_key: Optional[str] = None,
+    region: Optional[str] = None,
+):
     """
     Get a Bedrock client
     """
@@ -28,9 +32,9 @@ def get_bedrock_client():
     logger.debug(f"Getting Bedrock client for {model_settings.aws_region}")
     sts_client = boto3.client(
         "sts",
-        aws_access_key_id=model_settings.aws_access_key,
-        aws_secret_access_key=model_settings.aws_secret_access_key,
-        region_name=model_settings.aws_region,
+        aws_access_key_id=access_key or model_settings.aws_access_key,
+        aws_secret_access_key=secret_key or model_settings.aws_secret_access_key,
+        region_name=region or model_settings.aws_region,
     )
     credentials = sts_client.get_session_token()["Credentials"]
@@ -38,7 +42,7 @@ def get_bedrock_client():
         aws_access_key=credentials["AccessKeyId"],
         aws_secret_key=credentials["SecretAccessKey"],
         aws_session_token=credentials["SessionToken"],
-        aws_region=model_settings.aws_region,
+        aws_region=region or model_settings.aws_region,
     )
     return bedrock

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -569,6 +569,9 @@ def create(
                 # NOTE: max_tokens is required for Anthropic API
                 max_tokens=llm_config.max_tokens,
             ),
+            provider_name=llm_config.provider_name,
+            provider_category=llm_config.provider_category,
+            user_id=user_id,
         )
     elif llm_config.model_endpoint_type == "deepseek":

letta/llm_api/openai_client.py CHANGED Viewed

@@ -53,7 +53,7 @@ def accepts_developer_role(model: str) -> bool:
     See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
     """
-    if is_openai_reasoning_model(model):
+    if is_openai_reasoning_model(model) and not "o1-mini" in model or "o1-preview" in model:
         return True
     else:
         return False

letta/orm/agent.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import asyncio
 import uuid
+from datetime import datetime
 from typing import TYPE_CHECKING, List, Optional, Set
-from sqlalchemy import JSON, Boolean, Index, String
+from sqlalchemy import JSON, Boolean, DateTime, Index, Integer, String
 from sqlalchemy.ext.asyncio import AsyncAttrs
 from sqlalchemy.orm import Mapped, mapped_column, relationship
@@ -80,6 +81,14 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
         Boolean, doc="If set to True, memory management will move to a background agent thread."
     )
+    # Run metrics
+    last_run_completion: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True, doc="The timestamp when the agent last completed a run."
+    )
+    last_run_duration_ms: Mapped[Optional[int]] = mapped_column(
+        Integer, nullable=True, doc="The duration in milliseconds of the agent's last run."
+    )
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
     tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
@@ -176,6 +185,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
             "updated_at": self.updated_at,
             "enable_sleeptime": self.enable_sleeptime,
             "response_format": self.response_format,
+            "last_run_completion": self.last_run_completion,
+            "last_run_duration_ms": self.last_run_duration_ms,
             # optional field defaults
             "tags": [],
             "tools": [],
@@ -252,6 +263,8 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
             "updated_at": self.updated_at,
             "enable_sleeptime": self.enable_sleeptime,
             "response_format": self.response_format,
+            "last_run_completion": self.last_run_completion,
+            "last_run_duration_ms": self.last_run_duration_ms,
         }
         optional_fields = {
             "tags": [],

letta/orm/job.py CHANGED Viewed

@@ -43,6 +43,9 @@ class Job(SqlalchemyBase, UserMixin):
     callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
     callback_sent_at: Mapped[Optional[datetime]] = mapped_column(nullable=True, doc="Timestamp when the callback was last attempted.")
     callback_status_code: Mapped[Optional[int]] = mapped_column(nullable=True, doc="HTTP status code returned by the callback endpoint.")
+    callback_error: Mapped[Optional[str]] = mapped_column(
+        nullable=True, doc="Optional error message from attempting to POST the callback endpoint."
+    )
     # relationships
     user: Mapped["User"] = relationship("User", back_populates="jobs")

letta/orm/provider.py CHANGED Viewed

@@ -27,8 +27,10 @@ class Provider(SqlalchemyBase, OrganizationMixin):
     name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
     provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
     provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
-    api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
+    api_key: Mapped[str] = mapped_column(nullable=True, doc="API key or secret key used for requests to the provider.")
     base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")
+    access_key: Mapped[str] = mapped_column(nullable=True, doc="Access key used for requests to the provider.")
+    region: Mapped[str] = mapped_column(nullable=True, doc="Region used for requests to the provider.")
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="providers")

letta-nightly 0.8.4.dev20250618104304__py3-none-any.whl → 0.8.5.dev20250619180801__py3-none-any.whl

letta-nightly 0.8.4.dev20250618104304py3-none-any.whl → 0.8.5.dev20250619180801py3-none-any.whl