PyPI - letta-nightly - Versions diffs - 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl - Mend

letta-nightly 0.8.4.dev20250614104137py3-none-any.whl → 0.8.4.dev20250615221417py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

letta/__init__.py +1 -0
letta/agents/base_agent.py +12 -1
letta/agents/helpers.py +5 -2
letta/agents/letta_agent.py +98 -61
letta/agents/voice_sleeptime_agent.py +2 -1
letta/constants.py +3 -5
letta/data_sources/redis_client.py +30 -10
letta/functions/function_sets/files.py +4 -4
letta/functions/helpers.py +6 -1
letta/functions/mcp_client/types.py +95 -0
letta/groups/sleeptime_multi_agent_v2.py +2 -1
letta/helpers/decorators.py +91 -0
letta/interfaces/anthropic_streaming_interface.py +11 -0
letta/interfaces/openai_streaming_interface.py +244 -225
letta/llm_api/openai_client.py +1 -1
letta/local_llm/utils.py +5 -1
letta/orm/enums.py +1 -0
letta/orm/mcp_server.py +3 -0
letta/orm/tool.py +3 -0
letta/otel/metric_registry.py +12 -0
letta/otel/metrics.py +16 -7
letta/schemas/letta_response.py +6 -1
letta/schemas/letta_stop_reason.py +22 -0
letta/schemas/mcp.py +48 -6
letta/schemas/openai/chat_completion_request.py +1 -1
letta/schemas/openai/chat_completion_response.py +1 -1
letta/schemas/pip_requirement.py +14 -0
letta/schemas/sandbox_config.py +1 -19
letta/schemas/tool.py +5 -0
letta/server/rest_api/json_parser.py +39 -3
letta/server/rest_api/routers/v1/tools.py +3 -1
letta/server/rest_api/routers/v1/voice.py +2 -3
letta/server/rest_api/utils.py +1 -1
letta/server/server.py +11 -2
letta/services/agent_manager.py +37 -29
letta/services/helpers/tool_execution_helper.py +39 -9
letta/services/mcp/base_client.py +13 -2
letta/services/mcp/sse_client.py +8 -1
letta/services/mcp/streamable_http_client.py +56 -0
letta/services/mcp_manager.py +23 -9
letta/services/message_manager.py +30 -3
letta/services/tool_executor/files_tool_executor.py +2 -3
letta/services/tool_sandbox/e2b_sandbox.py +53 -3
letta/services/tool_sandbox/local_sandbox.py +3 -1
letta/services/user_manager.py +22 -0
letta/settings.py +3 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0

letta/functions/helpers.py CHANGED Viewed

@@ -14,6 +14,7 @@ from letta.orm.errors import NoResultFound
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_message import AssistantMessage
 from letta.schemas.letta_response import LettaResponse
+from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message, MessageCreate
 from letta.schemas.user import User
 from letta.server.rest_api.utils import get_letta_server
@@ -292,7 +293,11 @@ async def _send_message_to_agent_no_stream(
     )
     final_messages = interface.get_captured_send_messages()
-    return LettaResponse(messages=final_messages, usage=usage_stats)
+    return LettaResponse(
+        messages=final_messages,
+        stop_reason=LettaStopReason(stop_reason=StopReasonType.end_turn.value),
+        usage=usage_stats,
+    )
 async def _async_send_message_with_retries(

letta/functions/mcp_client/types.py CHANGED Viewed

@@ -4,6 +4,10 @@ from typing import List, Optional
 from mcp import Tool
 from pydantic import BaseModel, Field
+# MCP Authentication Constants
+MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
+MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
 class MCPTool(Tool):
     """A simple wrapper around MCP's tool definition (to avoid conflict with our own)"""
@@ -12,6 +16,7 @@ class MCPTool(Tool):
 class MCPServerType(str, Enum):
     SSE = "sse"
     STDIO = "stdio"
+    STREAMABLE_HTTP = "streamable_http"
 class BaseServerConfig(BaseModel):
@@ -20,14 +25,44 @@ class BaseServerConfig(BaseModel):
 class SSEServerConfig(BaseServerConfig):
+    """
+    Configuration for an MCP server using SSE
+    Authentication can be provided in multiple ways:
+    1. Using auth_header + auth_token: Will add a specific header with the token
+       Example: auth_header="Authorization", auth_token="Bearer abc123"
+    2. Using the custom_headers dict: For more complex authentication scenarios
+       Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
+    """
     type: MCPServerType = MCPServerType.SSE
     server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
+    auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
+    auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
+    custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with SSE requests")
+    def resolve_token(self) -> Optional[str]:
+        if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
+            return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
+        return self.auth_token
     def to_dict(self) -> dict:
         values = {
             "transport": "sse",
             "url": self.server_url,
         }
+        # TODO: handle custom headers
+        if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
+            headers = self.custom_headers.copy() if self.custom_headers else {}
+            # Add auth header if specified
+            if self.auth_header is not None and self.auth_token is not None:
+                headers[self.auth_header] = self.auth_token
+            values["headers"] = headers
         return values
@@ -46,3 +81,63 @@ class StdioServerConfig(BaseServerConfig):
         if self.env is not None:
             values["env"] = self.env
         return values
+class StreamableHTTPServerConfig(BaseServerConfig):
+    """
+    Configuration for an MCP server using Streamable HTTP
+    Authentication can be provided in multiple ways:
+    1. Using auth_header + auth_token: Will add a specific header with the token
+       Example: auth_header="Authorization", auth_token="Bearer abc123"
+    2. Using the custom_headers dict: For more complex authentication scenarios
+       Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
+    """
+    type: MCPServerType = MCPServerType.STREAMABLE_HTTP
+    server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
+    auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
+    auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
+    custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with streamable HTTP requests")
+    def resolve_token(self) -> Optional[str]:
+        if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
+            return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
+        return self.auth_token
+    def model_post_init(self, __context) -> None:
+        """Validate the server URL format."""
+        # Basic validation for streamable HTTP URLs
+        if not self.server_url:
+            raise ValueError("server_url cannot be empty")
+        # For streamable HTTP, the URL should typically be a path or full URL
+        # We'll be lenient and allow both formats
+        if self.server_url.startswith("http://") or self.server_url.startswith("https://"):
+            # Full URL format - this is what the user is trying
+            pass
+        elif "/" in self.server_url:
+            # Path format like "example/mcp" - this is the typical format
+            pass
+        else:
+            # Single word - might be valid but warn in logs
+            pass
+    def to_dict(self) -> dict:
+        values = {
+            "transport": "streamable_http",
+            "url": self.server_url,
+        }
+        # Handle custom headers
+        if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
+            headers = self.custom_headers.copy() if self.custom_headers else {}
+            # Add auth header if specified
+            if self.auth_header is not None and self.auth_token is not None:
+                headers[self.auth_header] = self.auth_token
+            values["headers"] = headers
+        return values

letta/groups/sleeptime_multi_agent_v2.py CHANGED Viewed

@@ -144,7 +144,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
         for message in response.messages:
             yield f"data: {message.model_dump_json()}\n\n"
-        yield f"data: {response.usage.model_dump_json()}\n\n"
+        for finish_chunk in self.get_finish_chunks_for_stream(response.usage):
+            yield f"data: {finish_chunk}\n\n"
     @trace_method
     async def step_stream(

letta/helpers/decorators.py CHANGED Viewed

@@ -1,7 +1,13 @@
 import inspect
+import json
+from dataclasses import dataclass
 from functools import wraps
 from typing import Callable
+from pydantic import BaseModel
+from letta.constants import REDIS_DEFAULT_CACHE_PREFIX
+from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
 from letta.log import get_logger
 from letta.plugins.plugins import get_experimental_checker
 from letta.settings import settings
@@ -67,3 +73,88 @@ def deprecated(message: str):
         return wrapper
     return decorator
+@dataclass
+class CacheStats:
+    """Note: this will be approximate to not add overhead of locking on counters.
+    For exact measurements, use redis or track in other places.
+    """
+    hits: int = 0
+    misses: int = 0
+    invalidations: int = 0
+def async_redis_cache(
+    key_func: Callable, prefix: str = REDIS_DEFAULT_CACHE_PREFIX, ttl_s: int = 300, model_class: type[BaseModel] | None = None
+):
+    """
+    Decorator for caching async function results in Redis. May be a Noop if redis is not available.
+    Will handle pydantic objects and raw values.
+    Attempts to write to and retrieve from cache, but does not fail on those cases
+    Args:
+        key_func: function to generate cache key (preferably lowercase strings to follow redis convention)
+        prefix: cache key prefix
+        ttl_s: time to live (s)
+        model_class: custom pydantic model class for serialization/deserialization
+    TODO (cliandy): move to class with generics for type hints
+    """
+    def decorator(func):
+        stats = CacheStats()
+        @wraps(func)
+        async def async_wrapper(*args, **kwargs):
+            redis_client = await get_redis_client()
+            # Don't bother going through other operations for no reason.
+            if isinstance(redis_client, NoopAsyncRedisClient):
+                return await func(*args, **kwargs)
+            cache_key = get_cache_key(*args, **kwargs)
+            cached_value = await redis_client.get(cache_key)
+            try:
+                if cached_value is not None:
+                    stats.hits += 1
+                    if model_class:
+                        return model_class.model_validate_json(cached_value)
+                    return json.loads(cached_value)
+            except Exception as e:
+                logger.warning(f"Failed to retrieve value from cache: {e}")
+            stats.misses += 1
+            result = await func(*args, **kwargs)
+            try:
+                if model_class:
+                    await redis_client.set(cache_key, result.model_dump_json(), ex=ttl_s)
+                elif isinstance(result, (dict, list, str, int, float, bool)):
+                    await redis_client.set(cache_key, json.dumps(result), ex=ttl_s)
+                else:
+                    logger.warning(f"Cannot cache result of type {type(result).__name__} for {func.__name__}")
+            except Exception as e:
+                logger.warning(f"Redis cache set failed: {e}")
+            return result
+        async def invalidate(*args, **kwargs) -> bool:
+            stats.invalidations += 1
+            try:
+                redis_client = await get_redis_client()
+                cache_key = get_cache_key(*args, **kwargs)
+                return (await redis_client.delete(cache_key)) > 0
+            except Exception as e:
+                logger.error(f"Failed to invalidate cache: {e}")
+                return False
+        def get_cache_key(*args, **kwargs):
+            return f"{prefix}:{key_func(*args, **kwargs)}"
+        # async_wrapper.cache_invalidate = invalidate
+        async_wrapper.cache_key_func = get_cache_key
+        async_wrapper.cache_stats = stats
+        return async_wrapper
+    return decorator

letta/interfaces/anthropic_streaming_interface.py CHANGED Viewed

@@ -26,6 +26,8 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.log import get_logger
+from letta.otel.context import get_ctx_attributes
+from letta.otel.metric_registry import MetricRegistry
 from letta.schemas.letta_message import (
     AssistantMessage,
     HiddenReasoningMessage,
@@ -35,6 +37,7 @@ from letta.schemas.letta_message import (
     ToolCallMessage,
 )
 from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
+from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
 from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
@@ -90,6 +93,8 @@ class AnthropicStreamingInterface:
     def get_tool_call_object(self) -> ToolCall:
         """Useful for agent loop"""
+        if not self.tool_call_name:
+            raise ValueError("No tool call returned")
         # hack for tool rules
         try:
             tool_input = json.loads(self.accumulated_tool_call_args)
@@ -140,6 +145,10 @@ class AnthropicStreamingInterface:
                         ttft_span.add_event(
                             name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
                         )
+                        metric_attributes = get_ctx_attributes()
+                        if isinstance(event, BetaRawMessageStartEvent):
+                            metric_attributes["model.name"] = event.message.model
+                        MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
                         first_chunk = False
                     # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
@@ -377,6 +386,8 @@ class AnthropicStreamingInterface:
                         self.anthropic_mode = None
         except Exception as e:
             logger.error("Error processing stream: %s", e)
+            stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
+            yield stop_reason
             raise
         finally:
             logger.info("AnthropicStreamingInterface: Stream processing complete.")

letta-nightly 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl

letta-nightly 0.8.4.dev20250614104137py3-none-any.whl → 0.8.4.dev20250615221417py3-none-any.whl