PyPI - letta-nightly - Versions diffs - 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl - Mend

letta-nightly 0.7.7.dev20250430205840py3-none-any.whl → 0.7.8.dev20250501064110py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

letta/__init__.py +1 -1
letta/agent.py +8 -12
letta/agents/exceptions.py +6 -0
letta/agents/letta_agent.py +48 -35
letta/agents/letta_agent_batch.py +6 -2
letta/agents/voice_agent.py +10 -7
letta/constants.py +5 -1
letta/functions/composio_helpers.py +100 -0
letta/functions/functions.py +4 -2
letta/functions/helpers.py +19 -99
letta/groups/helpers.py +1 -0
letta/groups/sleeptime_multi_agent.py +5 -1
letta/helpers/message_helper.py +21 -4
letta/helpers/tool_execution_helper.py +1 -1
letta/interfaces/anthropic_streaming_interface.py +165 -158
letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
letta/llm_api/anthropic.py +15 -10
letta/llm_api/anthropic_client.py +5 -1
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/llm_api_tools.py +7 -0
letta/llm_api/llm_client.py +12 -2
letta/llm_api/llm_client_base.py +4 -0
letta/llm_api/openai.py +9 -3
letta/llm_api/openai_client.py +18 -4
letta/memory.py +3 -1
letta/orm/group.py +2 -0
letta/orm/provider.py +10 -0
letta/schemas/agent.py +0 -1
letta/schemas/enums.py +11 -0
letta/schemas/group.py +24 -0
letta/schemas/llm_config.py +1 -0
letta/schemas/llm_config_overrides.py +2 -2
letta/schemas/providers.py +75 -20
letta/schemas/tool.py +3 -8
letta/server/rest_api/app.py +12 -0
letta/server/rest_api/chat_completions_interface.py +1 -1
letta/server/rest_api/interface.py +8 -10
letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
letta/server/rest_api/routers/v1/agents.py +1 -1
letta/server/rest_api/routers/v1/llms.py +4 -3
letta/server/rest_api/routers/v1/providers.py +4 -1
letta/server/rest_api/routers/v1/voice.py +0 -2
letta/server/rest_api/utils.py +8 -19
letta/server/server.py +25 -11
letta/services/group_manager.py +58 -0
letta/services/provider_manager.py +25 -14
letta/services/summarizer/summarizer.py +15 -7
letta/services/tool_executor/tool_execution_manager.py +1 -1
letta/services/tool_executor/tool_executor.py +3 -3
{letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
{letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
{letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0

letta/interfaces/anthropic_streaming_interface.py CHANGED Viewed

@@ -35,7 +35,7 @@ from letta.schemas.letta_message import (
 from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
-from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
 logger = get_logger(__name__)
@@ -56,7 +56,7 @@ class AnthropicStreamingInterface:
     """
     def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
-        self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
+        self.json_parser: JSONParser = PydanticJSONParser()
         self.use_assistant_message = use_assistant_message
         # Premake IDs for database writes
@@ -68,7 +68,7 @@ class AnthropicStreamingInterface:
         self.accumulated_inner_thoughts = []
         self.tool_call_id = None
         self.tool_call_name = None
-        self.accumulated_tool_call_args = []
+        self.accumulated_tool_call_args = ""
         self.previous_parse = {}
         # usage trackers
@@ -85,193 +85,200 @@ class AnthropicStreamingInterface:
     def get_tool_call_object(self) -> ToolCall:
         """Useful for agent loop"""
-        return ToolCall(
-            id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
-        )
+        return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=self.accumulated_tool_call_args, name=self.tool_call_name))
     def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
         """
         Check if inner thoughts are complete in the current tool call arguments
         by looking for a closing quote after the inner_thoughts field
         """
-        if not self.put_inner_thoughts_in_kwarg:
-            # None of the things should have inner thoughts in kwargs
-            return True
-        else:
-            parsed = self.optimistic_json_parser.parse(combined_args)
-            # TODO: This will break on tools with 0 input
-            return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
+        try:
+            if not self.put_inner_thoughts_in_kwarg:
+                # None of the things should have inner thoughts in kwargs
+                return True
+            else:
+                parsed = self.json_parser.parse(combined_args)
+                # TODO: This will break on tools with 0 input
+                return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
+        except Exception as e:
+            logger.error("Error checking inner thoughts: %s", e)
+            raise
     async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
-        async with stream:
-            async for event in stream:
-                # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
-                if isinstance(event, BetaRawContentBlockStartEvent):
-                    content = event.content_block
-                    if isinstance(content, BetaTextBlock):
-                        self.anthropic_mode = EventMode.TEXT
-                        # TODO: Can capture citations, etc.
-                    elif isinstance(content, BetaToolUseBlock):
-                        self.anthropic_mode = EventMode.TOOL_USE
-                        self.tool_call_id = content.id
-                        self.tool_call_name = content.name
-                        self.inner_thoughts_complete = False
-                        if not self.use_assistant_message:
-                            # Buffer the initial tool call message instead of yielding immediately
-                            tool_call_msg = ToolCallMessage(
-                                id=self.letta_tool_message_id,
-                                tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
+        try:
+            async with stream:
+                async for event in stream:
+                    # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
+                    if isinstance(event, BetaRawContentBlockStartEvent):
+                        content = event.content_block
+                        if isinstance(content, BetaTextBlock):
+                            self.anthropic_mode = EventMode.TEXT
+                            # TODO: Can capture citations, etc.
+                        elif isinstance(content, BetaToolUseBlock):
+                            self.anthropic_mode = EventMode.TOOL_USE
+                            self.tool_call_id = content.id
+                            self.tool_call_name = content.name
+                            self.inner_thoughts_complete = False
+                            if not self.use_assistant_message:
+                                # Buffer the initial tool call message instead of yielding immediately
+                                tool_call_msg = ToolCallMessage(
+                                    id=self.letta_tool_message_id,
+                                    tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
+                                    date=datetime.now(timezone.utc).isoformat(),
+                                )
+                                self.tool_call_buffer.append(tool_call_msg)
+                        elif isinstance(content, BetaThinkingBlock):
+                            self.anthropic_mode = EventMode.THINKING
+                            # TODO: Can capture signature, etc.
+                        elif isinstance(content, BetaRedactedThinkingBlock):
+                            self.anthropic_mode = EventMode.REDACTED_THINKING
+                            hidden_reasoning_message = HiddenReasoningMessage(
+                                id=self.letta_assistant_message_id,
+                                state="redacted",
+                                hidden_reasoning=content.data,
                                 date=datetime.now(timezone.utc).isoformat(),
                             )
-                            self.tool_call_buffer.append(tool_call_msg)
-                    elif isinstance(content, BetaThinkingBlock):
-                        self.anthropic_mode = EventMode.THINKING
-                        # TODO: Can capture signature, etc.
-                    elif isinstance(content, BetaRedactedThinkingBlock):
-                        self.anthropic_mode = EventMode.REDACTED_THINKING
-                        hidden_reasoning_message = HiddenReasoningMessage(
-                            id=self.letta_assistant_message_id,
-                            state="redacted",
-                            hidden_reasoning=content.data,
-                            date=datetime.now(timezone.utc).isoformat(),
-                        )
-                        self.reasoning_messages.append(hidden_reasoning_message)
-                        yield hidden_reasoning_message
-                elif isinstance(event, BetaRawContentBlockDeltaEvent):
-                    delta = event.delta
-                    if isinstance(delta, BetaTextDelta):
-                        # Safety check
-                        if not self.anthropic_mode == EventMode.TEXT:
-                            raise RuntimeError(
-                                f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
-                            )
+                            self.reasoning_messages.append(hidden_reasoning_message)
+                            yield hidden_reasoning_message
-                        # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
-                        delta.text = delta.text.replace("</thinking>", "")
-                        self.accumulated_inner_thoughts.append(delta.text)
-                        reasoning_message = ReasoningMessage(
-                            id=self.letta_assistant_message_id,
-                            reasoning=self.accumulated_inner_thoughts[-1],
-                            date=datetime.now(timezone.utc).isoformat(),
-                        )
-                        self.reasoning_messages.append(reasoning_message)
-                        yield reasoning_message
-                    elif isinstance(delta, BetaInputJSONDelta):
-                        if not self.anthropic_mode == EventMode.TOOL_USE:
-                            raise RuntimeError(
-                                f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
-                            )
+                    elif isinstance(event, BetaRawContentBlockDeltaEvent):
+                        delta = event.delta
-                        self.accumulated_tool_call_args.append(delta.partial_json)
-                        combined_args = "".join(self.accumulated_tool_call_args)
-                        current_parsed = self.optimistic_json_parser.parse(combined_args)
+                        if isinstance(delta, BetaTextDelta):
+                            # Safety check
+                            if not self.anthropic_mode == EventMode.TEXT:
+                                raise RuntimeError(
+                                    f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
+                                )
-                        # Start detecting a difference in inner thoughts
-                        previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
-                        current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
-                        inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
+                            # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
+                            delta.text = delta.text.replace("</thinking>", "")
+                            self.accumulated_inner_thoughts.append(delta.text)
-                        if inner_thoughts_diff:
                             reasoning_message = ReasoningMessage(
                                 id=self.letta_assistant_message_id,
-                                reasoning=inner_thoughts_diff,
+                                reasoning=self.accumulated_inner_thoughts[-1],
                                 date=datetime.now(timezone.utc).isoformat(),
                             )
                             self.reasoning_messages.append(reasoning_message)
                             yield reasoning_message
-                        # Check if inner thoughts are complete - if so, flush the buffer
-                        if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(combined_args):
-                            self.inner_thoughts_complete = True
-                            # Flush all buffered tool call messages
-                            for buffered_msg in self.tool_call_buffer:
-                                yield buffered_msg
-                            self.tool_call_buffer = []
+                        elif isinstance(delta, BetaInputJSONDelta):
+                            if not self.anthropic_mode == EventMode.TOOL_USE:
+                                raise RuntimeError(
+                                    f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
+                                )
+                            self.accumulated_tool_call_args += delta.partial_json
+                            current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
-                        # Start detecting special case of "send_message"
-                        if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
-                            previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
-                            current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
-                            send_message_diff = current_send_message[len(previous_send_message) :]
+                            # Start detecting a difference in inner thoughts
+                            previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
+                            current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
+                            inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
-                            # Only stream out if it's not an empty string
-                            if send_message_diff:
-                                yield AssistantMessage(
+                            if inner_thoughts_diff:
+                                reasoning_message = ReasoningMessage(
                                     id=self.letta_assistant_message_id,
-                                    content=[TextContent(text=send_message_diff)],
+                                    reasoning=inner_thoughts_diff,
                                     date=datetime.now(timezone.utc).isoformat(),
                                 )
-                        else:
-                            # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
-                            tool_call_msg = ToolCallMessage(
-                                id=self.letta_tool_message_id,
-                                tool_call=ToolCallDelta(arguments=delta.partial_json),
-                                date=datetime.now(timezone.utc).isoformat(),
-                            )
-                            if self.inner_thoughts_complete:
-                                yield tool_call_msg
+                                self.reasoning_messages.append(reasoning_message)
+                                yield reasoning_message
+                            # Check if inner thoughts are complete - if so, flush the buffer
+                            if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
+                                self.inner_thoughts_complete = True
+                                # Flush all buffered tool call messages
+                                for buffered_msg in self.tool_call_buffer:
+                                    yield buffered_msg
+                                self.tool_call_buffer = []
+                            # Start detecting special case of "send_message"
+                            if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
+                                previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
+                                current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
+                                send_message_diff = current_send_message[len(previous_send_message) :]
+                                # Only stream out if it's not an empty string
+                                if send_message_diff:
+                                    yield AssistantMessage(
+                                        id=self.letta_assistant_message_id,
+                                        content=[TextContent(text=send_message_diff)],
+                                        date=datetime.now(timezone.utc).isoformat(),
+                                    )
                             else:
-                                self.tool_call_buffer.append(tool_call_msg)
+                                # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
+                                tool_call_msg = ToolCallMessage(
+                                    id=self.letta_tool_message_id,
+                                    tool_call=ToolCallDelta(arguments=delta.partial_json),
+                                    date=datetime.now(timezone.utc).isoformat(),
+                                )
-                        # Set previous parse
-                        self.previous_parse = current_parsed
-                    elif isinstance(delta, BetaThinkingDelta):
-                        # Safety check
-                        if not self.anthropic_mode == EventMode.THINKING:
-                            raise RuntimeError(
-                                f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
+                                if self.inner_thoughts_complete:
+                                    yield tool_call_msg
+                                else:
+                                    self.tool_call_buffer.append(tool_call_msg)
+                            # Set previous parse
+                            self.previous_parse = current_parsed
+                        elif isinstance(delta, BetaThinkingDelta):
+                            # Safety check
+                            if not self.anthropic_mode == EventMode.THINKING:
+                                raise RuntimeError(
+                                    f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
+                                )
+                            reasoning_message = ReasoningMessage(
+                                id=self.letta_assistant_message_id,
+                                source="reasoner_model",
+                                reasoning=delta.thinking,
+                                date=datetime.now(timezone.utc).isoformat(),
                             )
+                            self.reasoning_messages.append(reasoning_message)
+                            yield reasoning_message
+                        elif isinstance(delta, BetaSignatureDelta):
+                            # Safety check
+                            if not self.anthropic_mode == EventMode.THINKING:
+                                raise RuntimeError(
+                                    f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
+                                )
-                        reasoning_message = ReasoningMessage(
-                            id=self.letta_assistant_message_id,
-                            source="reasoner_model",
-                            reasoning=delta.thinking,
-                            date=datetime.now(timezone.utc).isoformat(),
-                        )
-                        self.reasoning_messages.append(reasoning_message)
-                        yield reasoning_message
-                    elif isinstance(delta, BetaSignatureDelta):
-                        # Safety check
-                        if not self.anthropic_mode == EventMode.THINKING:
-                            raise RuntimeError(
-                                f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
+                            reasoning_message = ReasoningMessage(
+                                id=self.letta_assistant_message_id,
+                                source="reasoner_model",
+                                reasoning="",
+                                date=datetime.now(timezone.utc).isoformat(),
+                                signature=delta.signature,
                             )
+                            self.reasoning_messages.append(reasoning_message)
+                            yield reasoning_message
+                    elif isinstance(event, BetaRawMessageStartEvent):
+                        self.message_id = event.message.id
+                        self.input_tokens += event.message.usage.input_tokens
+                        self.output_tokens += event.message.usage.output_tokens
+                    elif isinstance(event, BetaRawMessageDeltaEvent):
+                        self.output_tokens += event.usage.output_tokens
+                    elif isinstance(event, BetaRawMessageStopEvent):
+                        # Don't do anything here! We don't want to stop the stream.
+                        pass
+                    elif isinstance(event, BetaRawContentBlockStopEvent):
+                        # If we're exiting a tool use block and there are still buffered messages,
+                        # we should flush them now
+                        if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
+                            for buffered_msg in self.tool_call_buffer:
+                                yield buffered_msg
+                            self.tool_call_buffer = []
-                        reasoning_message = ReasoningMessage(
-                            id=self.letta_assistant_message_id,
-                            source="reasoner_model",
-                            reasoning="",
-                            date=datetime.now(timezone.utc).isoformat(),
-                            signature=delta.signature,
-                        )
-                        self.reasoning_messages.append(reasoning_message)
-                        yield reasoning_message
-                elif isinstance(event, BetaRawMessageStartEvent):
-                    self.message_id = event.message.id
-                    self.input_tokens += event.message.usage.input_tokens
-                    self.output_tokens += event.message.usage.output_tokens
-                elif isinstance(event, BetaRawMessageDeltaEvent):
-                    self.output_tokens += event.usage.output_tokens
-                elif isinstance(event, BetaRawMessageStopEvent):
-                    # Don't do anything here! We don't want to stop the stream.
-                    pass
-                elif isinstance(event, BetaRawContentBlockStopEvent):
-                    # If we're exiting a tool use block and there are still buffered messages,
-                    # we should flush them now
-                    if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
-                        for buffered_msg in self.tool_call_buffer:
-                            yield buffered_msg
-                        self.tool_call_buffer = []
-                    self.anthropic_mode = None
+                        self.anthropic_mode = None
+        except Exception as e:
+            logger.error("Error processing stream: %s", e)
+            raise
+        finally:
+            logger.info("AnthropicStreamingInterface: Stream processing complete.")
     def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
         def _process_group(

letta/interfaces/openai_chat_completions_streaming_interface.py CHANGED Viewed

@@ -5,7 +5,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice,
 from letta.constants import PRE_EXECUTION_MESSAGE_ARG
 from letta.interfaces.utils import _format_sse_chunk
-from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+from letta.server.rest_api.json_parser import OptimisticJSONParser
 class OpenAIChatCompletionsStreamingInterface:

letta/llm_api/anthropic.py CHANGED Viewed

@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
+from letta.schemas.enums import ProviderType
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
@@ -128,11 +129,12 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
     # NOTE: currently there is no GET /models, so we need to hardcode
     # return MODEL_LIST
-    anthropic_override_key = ProviderManager().get_anthropic_override_key()
-    if anthropic_override_key:
-        anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
+    if api_key:
+        anthropic_client = anthropic.Anthropic(api_key=api_key)
     elif model_settings.anthropic_api_key:
         anthropic_client = anthropic.Anthropic()
+    else:
+        raise ValueError("No API key provided")
     models = anthropic_client.models.list()
     models_json = models.model_dump()
@@ -738,13 +740,14 @@ def anthropic_chat_completions_request(
     put_inner_thoughts_in_kwargs: bool = False,
     extended_thinking: bool = False,
     max_reasoning_tokens: Optional[int] = None,
+    provider_name: Optional[str] = None,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> ChatCompletionResponse:
     """https://docs.anthropic.com/claude/docs/tool-use"""
     anthropic_client = None
-    anthropic_override_key = ProviderManager().get_anthropic_override_key()
-    if anthropic_override_key:
-        anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
+    if provider_name and provider_name != ProviderType.anthropic.value:
+        api_key = ProviderManager().get_override_key(provider_name)
+        anthropic_client = anthropic.Anthropic(api_key=api_key)
     elif model_settings.anthropic_api_key:
         anthropic_client = anthropic.Anthropic()
     else:
@@ -796,6 +799,7 @@ def anthropic_chat_completions_request_stream(
     put_inner_thoughts_in_kwargs: bool = False,
     extended_thinking: bool = False,
     max_reasoning_tokens: Optional[int] = None,
+    provider_name: Optional[str] = None,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> Generator[ChatCompletionChunkResponse, None, None]:
     """Stream chat completions from Anthropic API.
@@ -810,10 +814,9 @@ def anthropic_chat_completions_request_stream(
         extended_thinking=extended_thinking,
         max_reasoning_tokens=max_reasoning_tokens,
     )
-    anthropic_override_key = ProviderManager().get_anthropic_override_key()
-    if anthropic_override_key:
-        anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
+    if provider_name and provider_name != ProviderType.anthropic.value:
+        api_key = ProviderManager().get_override_key(provider_name)
+        anthropic_client = anthropic.Anthropic(api_key=api_key)
     elif model_settings.anthropic_api_key:
         anthropic_client = anthropic.Anthropic()
@@ -860,6 +863,7 @@ def anthropic_chat_completions_process_stream(
     put_inner_thoughts_in_kwargs: bool = False,
     extended_thinking: bool = False,
     max_reasoning_tokens: Optional[int] = None,
+    provider_name: Optional[str] = None,
     create_message_id: bool = True,
     create_message_datetime: bool = True,
     betas: List[str] = ["tools-2024-04-04"],
@@ -944,6 +948,7 @@ def anthropic_chat_completions_process_stream(
                 put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
                 extended_thinking=extended_thinking,
                 max_reasoning_tokens=max_reasoning_tokens,
+                provider_name=provider_name,
                 betas=betas,
             )
         ):

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.log import get_logger
+from letta.schemas.enums import ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import Tool
@@ -112,7 +113,10 @@ class AnthropicClient(LLMClientBase):
     @trace_method
     def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
-        override_key = ProviderManager().get_anthropic_override_key()
+        override_key = None
+        if self.provider_name and self.provider_name != ProviderType.anthropic.value:
+            override_key = ProviderManager().get_override_key(self.provider_name)
         if async_client:
             return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
         return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -63,7 +63,7 @@ class GoogleVertexClient(GoogleAIClient):
         # Add thinking_config
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
-        thinking_budget = 0 if not self.llm_config.enable_reasoner else self.llm_config.max_reasoning_tokens
+        thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
         thinking_config = ThinkingConfig(
             thinking_budget=thinking_budget,
         )

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -24,6 +24,7 @@ from letta.llm_api.openai import (
 from letta.local_llm.chat_completion_proxy import get_chat_completion
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.schemas.enums import ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
@@ -171,6 +172,10 @@ def create(
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
+        elif llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
+            from letta.services.provider_manager import ProviderManager
+            api_key = ProviderManager().get_override_key(llm_config.provider_name)
         elif model_settings.openai_api_key is None:
             # the openai python client requires a dummy API key
             api_key = "DUMMY_API_KEY"
@@ -373,6 +378,7 @@ def create(
                 stream_interface=stream_interface,
                 extended_thinking=llm_config.enable_reasoner,
                 max_reasoning_tokens=llm_config.max_reasoning_tokens,
+                provider_name=llm_config.provider_name,
                 name=name,
             )
@@ -383,6 +389,7 @@ def create(
                 put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
                 extended_thinking=llm_config.enable_reasoner,
                 max_reasoning_tokens=llm_config.max_reasoning_tokens,
+                provider_name=llm_config.provider_name,
             )
         if llm_config.put_inner_thoughts_in_kwargs:

letta/llm_api/llm_client.py CHANGED Viewed

@@ -9,8 +9,10 @@ class LLMClient:
     @staticmethod
     def create(
-        provider: ProviderType,
+        provider_type: ProviderType,
+        provider_name: Optional[str] = None,
         put_inner_thoughts_first: bool = True,
+        actor_id: Optional[str] = None,
     ) -> Optional[LLMClientBase]:
         """
         Create an LLM client based on the model endpoint type.
@@ -25,30 +27,38 @@ class LLMClient:
         Raises:
             ValueError: If the model endpoint type is not supported
         """
-        match provider:
+        match provider_type:
             case ProviderType.google_ai:
                 from letta.llm_api.google_ai_client import GoogleAIClient
                 return GoogleAIClient(
+                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor_id=actor_id,
                 )
             case ProviderType.google_vertex:
                 from letta.llm_api.google_vertex_client import GoogleVertexClient
                 return GoogleVertexClient(
+                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor_id=actor_id,
                 )
             case ProviderType.anthropic:
                 from letta.llm_api.anthropic_client import AnthropicClient
                 return AnthropicClient(
+                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor_id=actor_id,
                 )
             case ProviderType.openai:
                 from letta.llm_api.openai_client import OpenAIClient
                 return OpenAIClient(
+                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor_id=actor_id,
                 )
             case _:
                 return None

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -20,9 +20,13 @@ class LLMClientBase:
     def __init__(
         self,
+        provider_name: Optional[str] = None,
         put_inner_thoughts_first: Optional[bool] = True,
         use_tool_naming: bool = True,
+        actor_id: Optional[str] = None,
     ):
+        self.actor_id = actor_id
+        self.provider_name = provider_name
         self.put_inner_thoughts_first = put_inner_thoughts_first
         self.use_tool_naming = use_tool_naming

letta/llm_api/openai.py CHANGED Viewed

@@ -157,11 +157,17 @@ def build_openai_chat_completions_request(
         # if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
         # data.response_format = {"type": "json_object"}
+    # always set user id for openai requests
+    if user_id:
+        data.user = str(user_id)
     if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
-        # override user id for inference.memgpt.ai
-        import uuid
+        if not user_id:
+            # override user id for inference.letta.com
+            import uuid
+            data.user = str(uuid.UUID(int=0))
-        data.user = str(uuid.UUID(int=0))
         data.model = "memgpt-openai"
     if use_structured_output and data.tools is not None and len(data.tools) > 0:

letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl

letta-nightly 0.7.7.dev20250430205840py3-none-any.whl → 0.7.8.dev20250501064110py3-none-any.whl