letta-nightly 0.8.4.dev20250615104252__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -0
- letta/agents/base_agent.py +12 -1
- letta/agents/helpers.py +5 -2
- letta/agents/letta_agent.py +98 -61
- letta/agents/voice_sleeptime_agent.py +2 -1
- letta/constants.py +3 -5
- letta/data_sources/redis_client.py +30 -10
- letta/functions/function_sets/files.py +4 -4
- letta/functions/helpers.py +6 -1
- letta/functions/mcp_client/types.py +95 -0
- letta/groups/sleeptime_multi_agent_v2.py +2 -1
- letta/helpers/decorators.py +91 -0
- letta/interfaces/anthropic_streaming_interface.py +11 -0
- letta/interfaces/openai_streaming_interface.py +244 -225
- letta/llm_api/openai_client.py +1 -1
- letta/local_llm/utils.py +5 -1
- letta/orm/enums.py +1 -0
- letta/orm/mcp_server.py +3 -0
- letta/orm/tool.py +3 -0
- letta/otel/metric_registry.py +12 -0
- letta/otel/metrics.py +16 -7
- letta/schemas/letta_response.py +6 -1
- letta/schemas/letta_stop_reason.py +22 -0
- letta/schemas/mcp.py +48 -6
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/openai/chat_completion_response.py +1 -1
- letta/schemas/pip_requirement.py +14 -0
- letta/schemas/sandbox_config.py +1 -19
- letta/schemas/tool.py +5 -0
- letta/server/rest_api/json_parser.py +39 -3
- letta/server/rest_api/routers/v1/tools.py +3 -1
- letta/server/rest_api/routers/v1/voice.py +2 -3
- letta/server/rest_api/utils.py +1 -1
- letta/server/server.py +11 -2
- letta/services/agent_manager.py +37 -29
- letta/services/helpers/tool_execution_helper.py +39 -9
- letta/services/mcp/base_client.py +13 -2
- letta/services/mcp/sse_client.py +8 -1
- letta/services/mcp/streamable_http_client.py +56 -0
- letta/services/mcp_manager.py +23 -9
- letta/services/message_manager.py +30 -3
- letta/services/tool_executor/files_tool_executor.py +2 -3
- letta/services/tool_sandbox/e2b_sandbox.py +53 -3
- letta/services/tool_sandbox/local_sandbox.py +3 -1
- letta/services/user_manager.py +22 -0
- letta/settings.py +3 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.4.dev20250615104252.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
letta/functions/helpers.py
CHANGED
@@ -14,6 +14,7 @@ from letta.orm.errors import NoResultFound
|
|
14
14
|
from letta.schemas.enums import MessageRole
|
15
15
|
from letta.schemas.letta_message import AssistantMessage
|
16
16
|
from letta.schemas.letta_response import LettaResponse
|
17
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
17
18
|
from letta.schemas.message import Message, MessageCreate
|
18
19
|
from letta.schemas.user import User
|
19
20
|
from letta.server.rest_api.utils import get_letta_server
|
@@ -292,7 +293,11 @@ async def _send_message_to_agent_no_stream(
|
|
292
293
|
)
|
293
294
|
|
294
295
|
final_messages = interface.get_captured_send_messages()
|
295
|
-
return LettaResponse(
|
296
|
+
return LettaResponse(
|
297
|
+
messages=final_messages,
|
298
|
+
stop_reason=LettaStopReason(stop_reason=StopReasonType.end_turn.value),
|
299
|
+
usage=usage_stats,
|
300
|
+
)
|
296
301
|
|
297
302
|
|
298
303
|
async def _async_send_message_with_retries(
|
@@ -4,6 +4,10 @@ from typing import List, Optional
|
|
4
4
|
from mcp import Tool
|
5
5
|
from pydantic import BaseModel, Field
|
6
6
|
|
7
|
+
# MCP Authentication Constants
|
8
|
+
MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
|
9
|
+
MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
|
10
|
+
|
7
11
|
|
8
12
|
class MCPTool(Tool):
|
9
13
|
"""A simple wrapper around MCP's tool definition (to avoid conflict with our own)"""
|
@@ -12,6 +16,7 @@ class MCPTool(Tool):
|
|
12
16
|
class MCPServerType(str, Enum):
|
13
17
|
SSE = "sse"
|
14
18
|
STDIO = "stdio"
|
19
|
+
STREAMABLE_HTTP = "streamable_http"
|
15
20
|
|
16
21
|
|
17
22
|
class BaseServerConfig(BaseModel):
|
@@ -20,14 +25,44 @@ class BaseServerConfig(BaseModel):
|
|
20
25
|
|
21
26
|
|
22
27
|
class SSEServerConfig(BaseServerConfig):
|
28
|
+
"""
|
29
|
+
Configuration for an MCP server using SSE
|
30
|
+
|
31
|
+
Authentication can be provided in multiple ways:
|
32
|
+
1. Using auth_header + auth_token: Will add a specific header with the token
|
33
|
+
Example: auth_header="Authorization", auth_token="Bearer abc123"
|
34
|
+
|
35
|
+
2. Using the custom_headers dict: For more complex authentication scenarios
|
36
|
+
Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
|
37
|
+
"""
|
38
|
+
|
23
39
|
type: MCPServerType = MCPServerType.SSE
|
24
40
|
server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
|
41
|
+
auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
|
42
|
+
auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
|
43
|
+
custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with SSE requests")
|
44
|
+
|
45
|
+
def resolve_token(self) -> Optional[str]:
|
46
|
+
if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
|
47
|
+
return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
|
48
|
+
return self.auth_token
|
25
49
|
|
26
50
|
def to_dict(self) -> dict:
|
27
51
|
values = {
|
28
52
|
"transport": "sse",
|
29
53
|
"url": self.server_url,
|
30
54
|
}
|
55
|
+
|
56
|
+
# TODO: handle custom headers
|
57
|
+
if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
|
58
|
+
headers = self.custom_headers.copy() if self.custom_headers else {}
|
59
|
+
|
60
|
+
# Add auth header if specified
|
61
|
+
if self.auth_header is not None and self.auth_token is not None:
|
62
|
+
headers[self.auth_header] = self.auth_token
|
63
|
+
|
64
|
+
values["headers"] = headers
|
65
|
+
|
31
66
|
return values
|
32
67
|
|
33
68
|
|
@@ -46,3 +81,63 @@ class StdioServerConfig(BaseServerConfig):
|
|
46
81
|
if self.env is not None:
|
47
82
|
values["env"] = self.env
|
48
83
|
return values
|
84
|
+
|
85
|
+
|
86
|
+
class StreamableHTTPServerConfig(BaseServerConfig):
|
87
|
+
"""
|
88
|
+
Configuration for an MCP server using Streamable HTTP
|
89
|
+
|
90
|
+
Authentication can be provided in multiple ways:
|
91
|
+
1. Using auth_header + auth_token: Will add a specific header with the token
|
92
|
+
Example: auth_header="Authorization", auth_token="Bearer abc123"
|
93
|
+
|
94
|
+
2. Using the custom_headers dict: For more complex authentication scenarios
|
95
|
+
Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
|
96
|
+
"""
|
97
|
+
|
98
|
+
type: MCPServerType = MCPServerType.STREAMABLE_HTTP
|
99
|
+
server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
|
100
|
+
auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
|
101
|
+
auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
|
102
|
+
custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with streamable HTTP requests")
|
103
|
+
|
104
|
+
def resolve_token(self) -> Optional[str]:
|
105
|
+
if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
|
106
|
+
return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
|
107
|
+
return self.auth_token
|
108
|
+
|
109
|
+
def model_post_init(self, __context) -> None:
|
110
|
+
"""Validate the server URL format."""
|
111
|
+
# Basic validation for streamable HTTP URLs
|
112
|
+
if not self.server_url:
|
113
|
+
raise ValueError("server_url cannot be empty")
|
114
|
+
|
115
|
+
# For streamable HTTP, the URL should typically be a path or full URL
|
116
|
+
# We'll be lenient and allow both formats
|
117
|
+
if self.server_url.startswith("http://") or self.server_url.startswith("https://"):
|
118
|
+
# Full URL format - this is what the user is trying
|
119
|
+
pass
|
120
|
+
elif "/" in self.server_url:
|
121
|
+
# Path format like "example/mcp" - this is the typical format
|
122
|
+
pass
|
123
|
+
else:
|
124
|
+
# Single word - might be valid but warn in logs
|
125
|
+
pass
|
126
|
+
|
127
|
+
def to_dict(self) -> dict:
|
128
|
+
values = {
|
129
|
+
"transport": "streamable_http",
|
130
|
+
"url": self.server_url,
|
131
|
+
}
|
132
|
+
|
133
|
+
# Handle custom headers
|
134
|
+
if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
|
135
|
+
headers = self.custom_headers.copy() if self.custom_headers else {}
|
136
|
+
|
137
|
+
# Add auth header if specified
|
138
|
+
if self.auth_header is not None and self.auth_token is not None:
|
139
|
+
headers[self.auth_header] = self.auth_token
|
140
|
+
|
141
|
+
values["headers"] = headers
|
142
|
+
|
143
|
+
return values
|
@@ -144,7 +144,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
|
|
144
144
|
for message in response.messages:
|
145
145
|
yield f"data: {message.model_dump_json()}\n\n"
|
146
146
|
|
147
|
-
|
147
|
+
for finish_chunk in self.get_finish_chunks_for_stream(response.usage):
|
148
|
+
yield f"data: {finish_chunk}\n\n"
|
148
149
|
|
149
150
|
@trace_method
|
150
151
|
async def step_stream(
|
letta/helpers/decorators.py
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
import inspect
|
2
|
+
import json
|
3
|
+
from dataclasses import dataclass
|
2
4
|
from functools import wraps
|
3
5
|
from typing import Callable
|
4
6
|
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
from letta.constants import REDIS_DEFAULT_CACHE_PREFIX
|
10
|
+
from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
|
5
11
|
from letta.log import get_logger
|
6
12
|
from letta.plugins.plugins import get_experimental_checker
|
7
13
|
from letta.settings import settings
|
@@ -67,3 +73,88 @@ def deprecated(message: str):
|
|
67
73
|
return wrapper
|
68
74
|
|
69
75
|
return decorator
|
76
|
+
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class CacheStats:
|
80
|
+
"""Note: this will be approximate to not add overhead of locking on counters.
|
81
|
+
For exact measurements, use redis or track in other places.
|
82
|
+
"""
|
83
|
+
|
84
|
+
hits: int = 0
|
85
|
+
misses: int = 0
|
86
|
+
invalidations: int = 0
|
87
|
+
|
88
|
+
|
89
|
+
def async_redis_cache(
|
90
|
+
key_func: Callable, prefix: str = REDIS_DEFAULT_CACHE_PREFIX, ttl_s: int = 300, model_class: type[BaseModel] | None = None
|
91
|
+
):
|
92
|
+
"""
|
93
|
+
Decorator for caching async function results in Redis. May be a Noop if redis is not available.
|
94
|
+
Will handle pydantic objects and raw values.
|
95
|
+
|
96
|
+
Attempts to write to and retrieve from cache, but does not fail on those cases
|
97
|
+
|
98
|
+
Args:
|
99
|
+
key_func: function to generate cache key (preferably lowercase strings to follow redis convention)
|
100
|
+
prefix: cache key prefix
|
101
|
+
ttl_s: time to live (s)
|
102
|
+
model_class: custom pydantic model class for serialization/deserialization
|
103
|
+
|
104
|
+
TODO (cliandy): move to class with generics for type hints
|
105
|
+
"""
|
106
|
+
|
107
|
+
def decorator(func):
|
108
|
+
stats = CacheStats()
|
109
|
+
|
110
|
+
@wraps(func)
|
111
|
+
async def async_wrapper(*args, **kwargs):
|
112
|
+
redis_client = await get_redis_client()
|
113
|
+
|
114
|
+
# Don't bother going through other operations for no reason.
|
115
|
+
if isinstance(redis_client, NoopAsyncRedisClient):
|
116
|
+
return await func(*args, **kwargs)
|
117
|
+
cache_key = get_cache_key(*args, **kwargs)
|
118
|
+
cached_value = await redis_client.get(cache_key)
|
119
|
+
|
120
|
+
try:
|
121
|
+
if cached_value is not None:
|
122
|
+
stats.hits += 1
|
123
|
+
if model_class:
|
124
|
+
return model_class.model_validate_json(cached_value)
|
125
|
+
return json.loads(cached_value)
|
126
|
+
except Exception as e:
|
127
|
+
logger.warning(f"Failed to retrieve value from cache: {e}")
|
128
|
+
|
129
|
+
stats.misses += 1
|
130
|
+
result = await func(*args, **kwargs)
|
131
|
+
try:
|
132
|
+
if model_class:
|
133
|
+
await redis_client.set(cache_key, result.model_dump_json(), ex=ttl_s)
|
134
|
+
elif isinstance(result, (dict, list, str, int, float, bool)):
|
135
|
+
await redis_client.set(cache_key, json.dumps(result), ex=ttl_s)
|
136
|
+
else:
|
137
|
+
logger.warning(f"Cannot cache result of type {type(result).__name__} for {func.__name__}")
|
138
|
+
except Exception as e:
|
139
|
+
logger.warning(f"Redis cache set failed: {e}")
|
140
|
+
return result
|
141
|
+
|
142
|
+
async def invalidate(*args, **kwargs) -> bool:
|
143
|
+
stats.invalidations += 1
|
144
|
+
try:
|
145
|
+
redis_client = await get_redis_client()
|
146
|
+
cache_key = get_cache_key(*args, **kwargs)
|
147
|
+
return (await redis_client.delete(cache_key)) > 0
|
148
|
+
except Exception as e:
|
149
|
+
logger.error(f"Failed to invalidate cache: {e}")
|
150
|
+
return False
|
151
|
+
|
152
|
+
def get_cache_key(*args, **kwargs):
|
153
|
+
return f"{prefix}:{key_func(*args, **kwargs)}"
|
154
|
+
|
155
|
+
# async_wrapper.cache_invalidate = invalidate
|
156
|
+
async_wrapper.cache_key_func = get_cache_key
|
157
|
+
async_wrapper.cache_stats = stats
|
158
|
+
return async_wrapper
|
159
|
+
|
160
|
+
return decorator
|
@@ -26,6 +26,8 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
26
26
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
|
27
27
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
28
28
|
from letta.log import get_logger
|
29
|
+
from letta.otel.context import get_ctx_attributes
|
30
|
+
from letta.otel.metric_registry import MetricRegistry
|
29
31
|
from letta.schemas.letta_message import (
|
30
32
|
AssistantMessage,
|
31
33
|
HiddenReasoningMessage,
|
@@ -35,6 +37,7 @@ from letta.schemas.letta_message import (
|
|
35
37
|
ToolCallMessage,
|
36
38
|
)
|
37
39
|
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
40
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
38
41
|
from letta.schemas.message import Message
|
39
42
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
40
43
|
from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
|
@@ -90,6 +93,8 @@ class AnthropicStreamingInterface:
|
|
90
93
|
|
91
94
|
def get_tool_call_object(self) -> ToolCall:
|
92
95
|
"""Useful for agent loop"""
|
96
|
+
if not self.tool_call_name:
|
97
|
+
raise ValueError("No tool call returned")
|
93
98
|
# hack for tool rules
|
94
99
|
try:
|
95
100
|
tool_input = json.loads(self.accumulated_tool_call_args)
|
@@ -140,6 +145,10 @@ class AnthropicStreamingInterface:
|
|
140
145
|
ttft_span.add_event(
|
141
146
|
name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
|
142
147
|
)
|
148
|
+
metric_attributes = get_ctx_attributes()
|
149
|
+
if isinstance(event, BetaRawMessageStartEvent):
|
150
|
+
metric_attributes["model.name"] = event.message.model
|
151
|
+
MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
|
143
152
|
first_chunk = False
|
144
153
|
|
145
154
|
# TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
|
@@ -377,6 +386,8 @@ class AnthropicStreamingInterface:
|
|
377
386
|
self.anthropic_mode = None
|
378
387
|
except Exception as e:
|
379
388
|
logger.error("Error processing stream: %s", e)
|
389
|
+
stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
|
390
|
+
yield stop_reason
|
380
391
|
raise
|
381
392
|
finally:
|
382
393
|
logger.info("AnthropicStreamingInterface: Stream processing complete.")
|