letta-nightly 0.8.4.dev20250614104137__py3-none-any.whl → 0.8.4.dev20250615221417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. letta/__init__.py +1 -0
  2. letta/agents/base_agent.py +12 -1
  3. letta/agents/helpers.py +5 -2
  4. letta/agents/letta_agent.py +98 -61
  5. letta/agents/voice_sleeptime_agent.py +2 -1
  6. letta/constants.py +3 -5
  7. letta/data_sources/redis_client.py +30 -10
  8. letta/functions/function_sets/files.py +4 -4
  9. letta/functions/helpers.py +6 -1
  10. letta/functions/mcp_client/types.py +95 -0
  11. letta/groups/sleeptime_multi_agent_v2.py +2 -1
  12. letta/helpers/decorators.py +91 -0
  13. letta/interfaces/anthropic_streaming_interface.py +11 -0
  14. letta/interfaces/openai_streaming_interface.py +244 -225
  15. letta/llm_api/openai_client.py +1 -1
  16. letta/local_llm/utils.py +5 -1
  17. letta/orm/enums.py +1 -0
  18. letta/orm/mcp_server.py +3 -0
  19. letta/orm/tool.py +3 -0
  20. letta/otel/metric_registry.py +12 -0
  21. letta/otel/metrics.py +16 -7
  22. letta/schemas/letta_response.py +6 -1
  23. letta/schemas/letta_stop_reason.py +22 -0
  24. letta/schemas/mcp.py +48 -6
  25. letta/schemas/openai/chat_completion_request.py +1 -1
  26. letta/schemas/openai/chat_completion_response.py +1 -1
  27. letta/schemas/pip_requirement.py +14 -0
  28. letta/schemas/sandbox_config.py +1 -19
  29. letta/schemas/tool.py +5 -0
  30. letta/server/rest_api/json_parser.py +39 -3
  31. letta/server/rest_api/routers/v1/tools.py +3 -1
  32. letta/server/rest_api/routers/v1/voice.py +2 -3
  33. letta/server/rest_api/utils.py +1 -1
  34. letta/server/server.py +11 -2
  35. letta/services/agent_manager.py +37 -29
  36. letta/services/helpers/tool_execution_helper.py +39 -9
  37. letta/services/mcp/base_client.py +13 -2
  38. letta/services/mcp/sse_client.py +8 -1
  39. letta/services/mcp/streamable_http_client.py +56 -0
  40. letta/services/mcp_manager.py +23 -9
  41. letta/services/message_manager.py +30 -3
  42. letta/services/tool_executor/files_tool_executor.py +2 -3
  43. letta/services/tool_sandbox/e2b_sandbox.py +53 -3
  44. letta/services/tool_sandbox/local_sandbox.py +3 -1
  45. letta/services/user_manager.py +22 -0
  46. letta/settings.py +3 -0
  47. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/METADATA +5 -6
  48. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/RECORD +51 -48
  49. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/LICENSE +0 -0
  50. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/WHEEL +0 -0
  51. {letta_nightly-0.8.4.dev20250614104137.dist-info → letta_nightly-0.8.4.dev20250615221417.dist-info}/entry_points.txt +0 -0
@@ -14,6 +14,7 @@ from letta.orm.errors import NoResultFound
14
14
  from letta.schemas.enums import MessageRole
15
15
  from letta.schemas.letta_message import AssistantMessage
16
16
  from letta.schemas.letta_response import LettaResponse
17
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
17
18
  from letta.schemas.message import Message, MessageCreate
18
19
  from letta.schemas.user import User
19
20
  from letta.server.rest_api.utils import get_letta_server
@@ -292,7 +293,11 @@ async def _send_message_to_agent_no_stream(
292
293
  )
293
294
 
294
295
  final_messages = interface.get_captured_send_messages()
295
- return LettaResponse(messages=final_messages, usage=usage_stats)
296
+ return LettaResponse(
297
+ messages=final_messages,
298
+ stop_reason=LettaStopReason(stop_reason=StopReasonType.end_turn.value),
299
+ usage=usage_stats,
300
+ )
296
301
 
297
302
 
298
303
  async def _async_send_message_with_retries(
@@ -4,6 +4,10 @@ from typing import List, Optional
4
4
  from mcp import Tool
5
5
  from pydantic import BaseModel, Field
6
6
 
7
+ # MCP Authentication Constants
8
+ MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
9
+ MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
10
+
7
11
 
8
12
  class MCPTool(Tool):
9
13
  """A simple wrapper around MCP's tool definition (to avoid conflict with our own)"""
@@ -12,6 +16,7 @@ class MCPTool(Tool):
12
16
  class MCPServerType(str, Enum):
13
17
  SSE = "sse"
14
18
  STDIO = "stdio"
19
+ STREAMABLE_HTTP = "streamable_http"
15
20
 
16
21
 
17
22
  class BaseServerConfig(BaseModel):
@@ -20,14 +25,44 @@ class BaseServerConfig(BaseModel):
20
25
 
21
26
 
22
27
  class SSEServerConfig(BaseServerConfig):
28
+ """
29
+ Configuration for an MCP server using SSE
30
+
31
+ Authentication can be provided in multiple ways:
32
+ 1. Using auth_header + auth_token: Will add a specific header with the token
33
+ Example: auth_header="Authorization", auth_token="Bearer abc123"
34
+
35
+ 2. Using the custom_headers dict: For more complex authentication scenarios
36
+ Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
37
+ """
38
+
23
39
  type: MCPServerType = MCPServerType.SSE
24
40
  server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
41
+ auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
42
+ auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
43
+ custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with SSE requests")
44
+
45
+ def resolve_token(self) -> Optional[str]:
46
+ if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
47
+ return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
48
+ return self.auth_token
25
49
 
26
50
  def to_dict(self) -> dict:
27
51
  values = {
28
52
  "transport": "sse",
29
53
  "url": self.server_url,
30
54
  }
55
+
56
+ # TODO: handle custom headers
57
+ if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
58
+ headers = self.custom_headers.copy() if self.custom_headers else {}
59
+
60
+ # Add auth header if specified
61
+ if self.auth_header is not None and self.auth_token is not None:
62
+ headers[self.auth_header] = self.auth_token
63
+
64
+ values["headers"] = headers
65
+
31
66
  return values
32
67
 
33
68
 
@@ -46,3 +81,63 @@ class StdioServerConfig(BaseServerConfig):
46
81
  if self.env is not None:
47
82
  values["env"] = self.env
48
83
  return values
84
+
85
+
86
+ class StreamableHTTPServerConfig(BaseServerConfig):
87
+ """
88
+ Configuration for an MCP server using Streamable HTTP
89
+
90
+ Authentication can be provided in multiple ways:
91
+ 1. Using auth_header + auth_token: Will add a specific header with the token
92
+ Example: auth_header="Authorization", auth_token="Bearer abc123"
93
+
94
+ 2. Using the custom_headers dict: For more complex authentication scenarios
95
+ Example: custom_headers={"X-API-Key": "abc123", "X-Custom-Header": "value"}
96
+ """
97
+
98
+ type: MCPServerType = MCPServerType.STREAMABLE_HTTP
99
+ server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
100
+ auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
101
+ auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
102
+ custom_headers: Optional[dict[str, str]] = Field(None, description="Custom HTTP headers to include with streamable HTTP requests")
103
+
104
+ def resolve_token(self) -> Optional[str]:
105
+ if self.auth_token and self.auth_token.startswith(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} "):
106
+ return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
107
+ return self.auth_token
108
+
109
+ def model_post_init(self, __context) -> None:
110
+ """Validate the server URL format."""
111
+ # Basic validation for streamable HTTP URLs
112
+ if not self.server_url:
113
+ raise ValueError("server_url cannot be empty")
114
+
115
+ # For streamable HTTP, the URL should typically be a path or full URL
116
+ # We'll be lenient and allow both formats
117
+ if self.server_url.startswith("http://") or self.server_url.startswith("https://"):
118
+ # Full URL format - this is what the user is trying
119
+ pass
120
+ elif "/" in self.server_url:
121
+ # Path format like "example/mcp" - this is the typical format
122
+ pass
123
+ else:
124
+ # Single word - might be valid but warn in logs
125
+ pass
126
+
127
+ def to_dict(self) -> dict:
128
+ values = {
129
+ "transport": "streamable_http",
130
+ "url": self.server_url,
131
+ }
132
+
133
+ # Handle custom headers
134
+ if self.custom_headers is not None or (self.auth_header is not None and self.auth_token is not None):
135
+ headers = self.custom_headers.copy() if self.custom_headers else {}
136
+
137
+ # Add auth header if specified
138
+ if self.auth_header is not None and self.auth_token is not None:
139
+ headers[self.auth_header] = self.auth_token
140
+
141
+ values["headers"] = headers
142
+
143
+ return values
@@ -144,7 +144,8 @@ class SleeptimeMultiAgentV2(BaseAgent):
144
144
  for message in response.messages:
145
145
  yield f"data: {message.model_dump_json()}\n\n"
146
146
 
147
- yield f"data: {response.usage.model_dump_json()}\n\n"
147
+ for finish_chunk in self.get_finish_chunks_for_stream(response.usage):
148
+ yield f"data: {finish_chunk}\n\n"
148
149
 
149
150
  @trace_method
150
151
  async def step_stream(
@@ -1,7 +1,13 @@
1
1
  import inspect
2
+ import json
3
+ from dataclasses import dataclass
2
4
  from functools import wraps
3
5
  from typing import Callable
4
6
 
7
+ from pydantic import BaseModel
8
+
9
+ from letta.constants import REDIS_DEFAULT_CACHE_PREFIX
10
+ from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
5
11
  from letta.log import get_logger
6
12
  from letta.plugins.plugins import get_experimental_checker
7
13
  from letta.settings import settings
@@ -67,3 +73,88 @@ def deprecated(message: str):
67
73
  return wrapper
68
74
 
69
75
  return decorator
76
+
77
+
78
+ @dataclass
79
+ class CacheStats:
80
+ """Note: this will be approximate to not add overhead of locking on counters.
81
+ For exact measurements, use redis or track in other places.
82
+ """
83
+
84
+ hits: int = 0
85
+ misses: int = 0
86
+ invalidations: int = 0
87
+
88
+
89
+ def async_redis_cache(
90
+ key_func: Callable, prefix: str = REDIS_DEFAULT_CACHE_PREFIX, ttl_s: int = 300, model_class: type[BaseModel] | None = None
91
+ ):
92
+ """
93
+ Decorator for caching async function results in Redis. May be a Noop if redis is not available.
94
+ Will handle pydantic objects and raw values.
95
+
96
+ Attempts to write to and retrieve from cache, but does not fail on those cases
97
+
98
+ Args:
99
+ key_func: function to generate cache key (preferably lowercase strings to follow redis convention)
100
+ prefix: cache key prefix
101
+ ttl_s: time to live (s)
102
+ model_class: custom pydantic model class for serialization/deserialization
103
+
104
+ TODO (cliandy): move to class with generics for type hints
105
+ """
106
+
107
+ def decorator(func):
108
+ stats = CacheStats()
109
+
110
+ @wraps(func)
111
+ async def async_wrapper(*args, **kwargs):
112
+ redis_client = await get_redis_client()
113
+
114
+ # Don't bother going through other operations for no reason.
115
+ if isinstance(redis_client, NoopAsyncRedisClient):
116
+ return await func(*args, **kwargs)
117
+ cache_key = get_cache_key(*args, **kwargs)
118
+ cached_value = await redis_client.get(cache_key)
119
+
120
+ try:
121
+ if cached_value is not None:
122
+ stats.hits += 1
123
+ if model_class:
124
+ return model_class.model_validate_json(cached_value)
125
+ return json.loads(cached_value)
126
+ except Exception as e:
127
+ logger.warning(f"Failed to retrieve value from cache: {e}")
128
+
129
+ stats.misses += 1
130
+ result = await func(*args, **kwargs)
131
+ try:
132
+ if model_class:
133
+ await redis_client.set(cache_key, result.model_dump_json(), ex=ttl_s)
134
+ elif isinstance(result, (dict, list, str, int, float, bool)):
135
+ await redis_client.set(cache_key, json.dumps(result), ex=ttl_s)
136
+ else:
137
+ logger.warning(f"Cannot cache result of type {type(result).__name__} for {func.__name__}")
138
+ except Exception as e:
139
+ logger.warning(f"Redis cache set failed: {e}")
140
+ return result
141
+
142
+ async def invalidate(*args, **kwargs) -> bool:
143
+ stats.invalidations += 1
144
+ try:
145
+ redis_client = await get_redis_client()
146
+ cache_key = get_cache_key(*args, **kwargs)
147
+ return (await redis_client.delete(cache_key)) > 0
148
+ except Exception as e:
149
+ logger.error(f"Failed to invalidate cache: {e}")
150
+ return False
151
+
152
+ def get_cache_key(*args, **kwargs):
153
+ return f"{prefix}:{key_func(*args, **kwargs)}"
154
+
155
+ # async_wrapper.cache_invalidate = invalidate
156
+ async_wrapper.cache_key_func = get_cache_key
157
+ async_wrapper.cache_stats = stats
158
+ return async_wrapper
159
+
160
+ return decorator
@@ -26,6 +26,8 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
26
26
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
27
27
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
28
28
  from letta.log import get_logger
29
+ from letta.otel.context import get_ctx_attributes
30
+ from letta.otel.metric_registry import MetricRegistry
29
31
  from letta.schemas.letta_message import (
30
32
  AssistantMessage,
31
33
  HiddenReasoningMessage,
@@ -35,6 +37,7 @@ from letta.schemas.letta_message import (
35
37
  ToolCallMessage,
36
38
  )
37
39
  from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
40
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
38
41
  from letta.schemas.message import Message
39
42
  from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
40
43
  from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
@@ -90,6 +93,8 @@ class AnthropicStreamingInterface:
90
93
 
91
94
  def get_tool_call_object(self) -> ToolCall:
92
95
  """Useful for agent loop"""
96
+ if not self.tool_call_name:
97
+ raise ValueError("No tool call returned")
93
98
  # hack for tool rules
94
99
  try:
95
100
  tool_input = json.loads(self.accumulated_tool_call_args)
@@ -140,6 +145,10 @@ class AnthropicStreamingInterface:
140
145
  ttft_span.add_event(
141
146
  name="anthropic_time_to_first_token_ms", attributes={"anthropic_time_to_first_token_ms": ns_to_ms(ttft_ns)}
142
147
  )
148
+ metric_attributes = get_ctx_attributes()
149
+ if isinstance(event, BetaRawMessageStartEvent):
150
+ metric_attributes["model.name"] = event.message.model
151
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
143
152
  first_chunk = False
144
153
 
145
154
  # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
@@ -377,6 +386,8 @@ class AnthropicStreamingInterface:
377
386
  self.anthropic_mode = None
378
387
  except Exception as e:
379
388
  logger.error("Error processing stream: %s", e)
389
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
390
+ yield stop_reason
380
391
  raise
381
392
  finally:
382
393
  logger.info("AnthropicStreamingInterface: Stream processing complete.")