PyPI - letta-nightly - Versions diffs - 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl - Mend

letta-nightly 0.11.6.dev20250902104140py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

letta/__init__.py +1 -1
letta/agent.py +10 -14
letta/agents/base_agent.py +18 -0
letta/agents/helpers.py +32 -7
letta/agents/letta_agent.py +953 -762
letta/agents/voice_agent.py +1 -1
letta/client/streaming.py +0 -1
letta/constants.py +11 -8
letta/errors.py +9 -0
letta/functions/function_sets/base.py +77 -69
letta/functions/function_sets/builtin.py +41 -22
letta/functions/function_sets/multi_agent.py +1 -2
letta/functions/schema_generator.py +0 -1
letta/helpers/converters.py +8 -3
letta/helpers/datetime_helpers.py +5 -4
letta/helpers/message_helper.py +1 -2
letta/helpers/pinecone_utils.py +0 -1
letta/helpers/tool_rule_solver.py +10 -0
letta/helpers/tpuf_client.py +848 -0
letta/interface.py +8 -8
letta/interfaces/anthropic_streaming_interface.py +7 -0
letta/interfaces/openai_streaming_interface.py +29 -6
letta/llm_api/anthropic_client.py +188 -18
letta/llm_api/azure_client.py +0 -1
letta/llm_api/bedrock_client.py +1 -2
letta/llm_api/deepseek_client.py +319 -5
letta/llm_api/google_vertex_client.py +75 -17
letta/llm_api/groq_client.py +0 -1
letta/llm_api/helpers.py +2 -2
letta/llm_api/llm_api_tools.py +1 -50
letta/llm_api/llm_client.py +6 -8
letta/llm_api/mistral.py +1 -1
letta/llm_api/openai.py +16 -13
letta/llm_api/openai_client.py +31 -16
letta/llm_api/together_client.py +0 -1
letta/llm_api/xai_client.py +0 -1
letta/local_llm/chat_completion_proxy.py +7 -6
letta/local_llm/settings/settings.py +1 -1
letta/orm/__init__.py +1 -0
letta/orm/agent.py +8 -6
letta/orm/archive.py +9 -1
letta/orm/block.py +3 -4
letta/orm/block_history.py +3 -1
letta/orm/group.py +2 -3
letta/orm/identity.py +1 -2
letta/orm/job.py +1 -2
letta/orm/llm_batch_items.py +1 -2
letta/orm/message.py +8 -4
letta/orm/mixins.py +18 -0
letta/orm/organization.py +2 -0
letta/orm/passage.py +8 -1
letta/orm/passage_tag.py +55 -0
letta/orm/sandbox_config.py +1 -3
letta/orm/step.py +1 -2
letta/orm/tool.py +1 -0
letta/otel/resource.py +2 -2
letta/plugins/plugins.py +1 -1
letta/prompts/prompt_generator.py +10 -2
letta/schemas/agent.py +11 -0
letta/schemas/archive.py +4 -0
letta/schemas/block.py +13 -0
letta/schemas/embedding_config.py +0 -1
letta/schemas/enums.py +24 -7
letta/schemas/group.py +12 -0
letta/schemas/letta_message.py +55 -1
letta/schemas/letta_message_content.py +28 -0
letta/schemas/letta_request.py +21 -4
letta/schemas/letta_stop_reason.py +9 -1
letta/schemas/llm_config.py +24 -8
letta/schemas/mcp.py +0 -3
letta/schemas/memory.py +14 -0
letta/schemas/message.py +245 -141
letta/schemas/openai/chat_completion_request.py +2 -1
letta/schemas/passage.py +1 -0
letta/schemas/providers/bedrock.py +1 -1
letta/schemas/providers/openai.py +2 -2
letta/schemas/tool.py +11 -5
letta/schemas/tool_execution_result.py +0 -1
letta/schemas/tool_rule.py +71 -0
letta/serialize_schemas/marshmallow_agent.py +1 -2
letta/server/rest_api/app.py +3 -3
letta/server/rest_api/auth/index.py +0 -1
letta/server/rest_api/interface.py +3 -11
letta/server/rest_api/redis_stream_manager.py +3 -4
letta/server/rest_api/routers/v1/agents.py +143 -84
letta/server/rest_api/routers/v1/blocks.py +1 -1
letta/server/rest_api/routers/v1/folders.py +1 -1
letta/server/rest_api/routers/v1/groups.py +23 -22
letta/server/rest_api/routers/v1/internal_templates.py +68 -0
letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
letta/server/rest_api/routers/v1/sources.py +1 -1
letta/server/rest_api/routers/v1/tools.py +167 -15
letta/server/rest_api/streaming_response.py +4 -3
letta/server/rest_api/utils.py +75 -18
letta/server/server.py +24 -35
letta/services/agent_manager.py +359 -45
letta/services/agent_serialization_manager.py +23 -3
letta/services/archive_manager.py +72 -3
letta/services/block_manager.py +1 -2
letta/services/context_window_calculator/token_counter.py +11 -6
letta/services/file_manager.py +1 -3
letta/services/files_agents_manager.py +2 -4
letta/services/group_manager.py +73 -12
letta/services/helpers/agent_manager_helper.py +5 -5
letta/services/identity_manager.py +8 -3
letta/services/job_manager.py +2 -14
letta/services/llm_batch_manager.py +1 -3
letta/services/mcp/base_client.py +1 -2
letta/services/mcp_manager.py +5 -6
letta/services/message_manager.py +536 -15
letta/services/organization_manager.py +1 -2
letta/services/passage_manager.py +287 -12
letta/services/provider_manager.py +1 -3
letta/services/sandbox_config_manager.py +12 -7
letta/services/source_manager.py +1 -2
letta/services/step_manager.py +0 -1
letta/services/summarizer/summarizer.py +4 -2
letta/services/telemetry_manager.py +1 -3
letta/services/tool_executor/builtin_tool_executor.py +136 -316
letta/services/tool_executor/core_tool_executor.py +231 -74
letta/services/tool_executor/files_tool_executor.py +2 -2
letta/services/tool_executor/mcp_tool_executor.py +0 -1
letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
letta/services/tool_executor/sandbox_tool_executor.py +0 -1
letta/services/tool_executor/tool_execution_sandbox.py +2 -3
letta/services/tool_manager.py +181 -64
letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
letta/services/user_manager.py +1 -2
letta/settings.py +5 -3
letta/streaming_interface.py +3 -3
letta/system.py +1 -1
letta/utils.py +0 -1
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
letta/llm_api/deepseek.py +0 -303
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0

letta/llm_api/deepseek_client.py CHANGED Viewed

@@ -1,21 +1,327 @@
+import json
 import os
+import re
+import warnings
 from typing import List, Optional
 from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
-from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_request import (
+    AssistantMessage,
+    ChatCompletionRequest,
+    ChatMessage,
+    FunctionCall as ToolFunctionChoiceFunctionCall,
+    Tool,
+    ToolFunctionChoice,
+    ToolMessage,
+    UserMessage,
+    cast_message_to_subtype,
+)
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.openai import Function, ToolCall
 from letta.settings import model_settings
+from letta.utils import get_tool_call_id
-class DeepseekClient(OpenAIClient):
+def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
+    """
+    Merge `ToolMessage` objects into the previous message.
+    """
+    previous_message.content += (
+        f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
+    )
+    return previous_message
+def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
+    """
+    For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
+    """
+    if "tool_calls" in assistant_message.dict().keys():
+        assistant_message.content = "".join(
+            [
+                # f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
+                f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
+                for tool_call in assistant_message.tool_calls
+            ]
+        )
+        del assistant_message.tool_calls
+    return assistant_message
+def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
+    """
+    Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
+    Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
+    This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
+    at the end.
+    """
+    deepseek_messages = []
+    for idx, message in enumerate(messages):
+        # First message is the system prompt, add it
+        if idx == 0 and message.role == "system":
+            deepseek_messages.append(message)
+            continue
+        if message.role == "user":
+            if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
+                # User message, add it
+                deepseek_messages.append(UserMessage(content=message.content))
+            else:
+                # add to the content of the previous message
+                deepseek_messages[-1].content += message.content
+        elif message.role == "assistant":
+            if deepseek_messages[-1].role == "user":
+                # Assistant message, remove tool calls and add them to the content
+                deepseek_messages.append(handle_assistant_message(message))
+            else:
+                # add to the content of the previous message
+                deepseek_messages[-1].content += message.content
+        elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
+            # Tool message, add it to the last assistant message
+            merged_message = merge_tool_message(deepseek_messages[-1], message)
+            deepseek_messages[-1] = merged_message
+        else:
+            print(f"Skipping message: {message}")
+    # This needs to end on a user message, add a dummy message if the last was assistant
+    if deepseek_messages[-1].role == "assistant":
+        deepseek_messages.append(UserMessage(content=""))
+    return deepseek_messages
+def build_deepseek_chat_completions_request(
+    llm_config: LLMConfig,
+    messages: List[_Message],
+    user_id: Optional[str],
+    functions: Optional[list],
+    function_call: Optional[str],
+    use_tool_naming: bool,
+    max_tokens: Optional[int],
+) -> ChatCompletionRequest:
+    # if functions and llm_config.put_inner_thoughts_in_kwargs:
+    #     # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
+    #     # TODO(fix)
+    #     inner_thoughts_desc = (
+    #         INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
+    #     )
+    #     functions = add_inner_thoughts_to_functions(
+    #         functions=functions,
+    #         inner_thoughts_key=INNER_THOUGHTS_KWARG,
+    #         inner_thoughts_description=inner_thoughts_desc,
+    #     )
+    openai_message_list = [
+        cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
+    ]
+    if llm_config.model:
+        model = llm_config.model
+    else:
+        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        model = None
+    if use_tool_naming:
+        if function_call is None:
+            tool_choice = None
+        elif function_call not in ["none", "auto", "required"]:
+            tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
+        else:
+            tool_choice = function_call
+        def add_functions_to_system_message(system_message: ChatMessage):
+            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
+            system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
+        if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
+            add_functions_to_system_message(
+                openai_message_list[0]
+            )  # Inject additional instructions to the system prompt with the available functions
+            openai_message_list = map_messages_to_deepseek_format(openai_message_list)
+            data = ChatCompletionRequest(
+                model=model,
+                messages=openai_message_list,
+                user=str(user_id),
+                max_completion_tokens=max_tokens,
+                temperature=llm_config.temperature,
+            )
+        else:
+            data = ChatCompletionRequest(
+                model=model,
+                messages=openai_message_list,
+                tools=[Tool(type="function", function=f) for f in functions] if functions else None,
+                tool_choice=tool_choice,
+                user=str(user_id),
+                max_completion_tokens=max_tokens,
+                temperature=llm_config.temperature,
+            )
+    else:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            functions=functions,
+            function_call=function_call,
+            user=str(user_id),
+            max_completion_tokens=max_tokens,
+            temperature=llm_config.temperature,
+        )
+    return data
+def convert_deepseek_response_to_chatcompletion(
+    response: ChatCompletionResponse,
+) -> ChatCompletionResponse:
+    """
+        Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response):
+        ChatCompletion(
+        id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
+        choices=[
+            Choice(
+                finish_reason='stop',
+                index=0,
+                logprobs=None,
+                message=ChatCompletionMessage(
+                    content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
+                    refusal=None,
+                    role='assistant',
+                    audio=None,
+                    function_call=None,
+                    tool_calls=None,
+                    reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
+                )
+            )
+        ],
+        created=1738266449,
+        model='deepseek-reasoner',
+        object='chat.completion',
+        service_tier=None,
+        system_fingerprint='fp_7e73fd9a08',
+        usage=CompletionUsage(
+            completion_tokens=111,
+            prompt_tokens=1270,
+            total_tokens=1381,
+            completion_tokens_details=CompletionTokensDetails(
+                accepted_prediction_tokens=None,
+                audio_tokens=None,
+                reasoning_tokens=72,
+                rejected_prediction_tokens=None
+            ),
+            prompt_tokens_details=PromptTokensDetails(
+                audio_tokens=None,
+                cached_tokens=1088
+            ),
+            prompt_cache_hit_tokens=1088,
+            prompt_cache_miss_tokens=182
+        )
+    )
+    """
+    def convert_dict_quotes(input_dict: dict):
+        """
+        Convert a dictionary with single-quoted keys to double-quoted keys,
+        properly handling boolean values and nested structures.
+        Args:
+            input_dict (dict): Input dictionary with single-quoted keys
+        Returns:
+            str: JSON string with double-quoted keys
+        """
+        # First convert the dictionary to a JSON string to handle booleans properly
+        json_str = json.dumps(input_dict)
+        # Function to handle complex string replacements
+        def replace_quotes(match):
+            key = match.group(1)
+            # Escape any existing double quotes in the key
+            key = key.replace('"', '\\"')
+            return f'"{key}":'
+        # Replace single-quoted keys with double-quoted keys
+        # This regex looks for single-quoted keys followed by a colon
+        def strip_json_block(text):
+            # Check if text starts with ```json or similar
+            if text.strip().startswith("```"):
+                # Split by \n to remove the first and last lines
+                lines = text.split("\n")[1:-1]
+                return "\n".join(lines)
+            return text
+        pattern = r"'([^']*)':"
+        converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
+        # Parse the string back to ensure valid JSON format
+        try:
+            json.loads(converted_str)
+            return converted_str
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
+    def extract_json_block(text):
+        # Find the first {
+        start = text.find("{")
+        if start == -1:
+            return text
+        # Track nested braces to find the matching closing brace
+        brace_count = 0
+        end = start
+        for i in range(start, len(text)):
+            if text[i] == "{":
+                brace_count += 1
+            elif text[i] == "}":
+                brace_count -= 1
+                if brace_count == 0:
+                    end = i + 1
+                    break
+        return text[start:end]
+    content = response.choices[0].message.content
+    try:
+        content_dict = json.loads(extract_json_block(content))
+        if type(content_dict["arguments"]) == str:
+            content_dict["arguments"] = json.loads(content_dict["arguments"])
+        tool_calls = [
+            ToolCall(
+                id=get_tool_call_id(),
+                type="function",
+                function=Function(
+                    name=content_dict["name"],
+                    arguments=convert_dict_quotes(content_dict["arguments"]),
+                ),
+            )
+        ]
+    except (json.JSONDecodeError, TypeError, KeyError) as e:
+        print(e)
+        tool_calls = response.choices[0].message.tool_calls
+        raise ValueError(f"Failed to create valid JSON {content}")
+    # Move the "reasoning_content" into the "content" field
+    response.choices[0].message.content = response.choices[0].message.reasoning_content
+    response.choices[0].message.tool_calls = tool_calls
+    # Remove the "reasoning_content" field
+    response.choices[0].message.reasoning_content = None
+    return response
+class DeepseekClient(OpenAIClient):
     def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
         return False
@@ -36,15 +342,21 @@ class DeepseekClient(OpenAIClient):
         data = super().build_request_data(messages, llm_config, tools, force_tool_call)
         def add_functions_to_system_message(system_message: ChatMessage):
-            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
+            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
             system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
+        openai_message_list = [
+            cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
+        ]
         if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
             add_functions_to_system_message(
-                data["messages"][0]
+                openai_message_list[0]
             )  # Inject additional instructions to the system prompt with the available functions
-            data["messages"] = map_messages_to_deepseek_format(data["messages"])
+            openai_message_list = map_messages_to_deepseek_format(openai_message_list)
+        data["messages"] = [m.dict() for m in openai_message_list]
         return data
@@ -94,4 +406,6 @@ class DeepseekClient(OpenAIClient):
         Handles potential extraction of inner thoughts if they were added via kwargs.
         """
         response = ChatCompletionResponse(**response_data)
+        if response.choices[0].message.tool_calls:
+            return super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
         return convert_deepseek_response_to_chatcompletion(response)

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -3,6 +3,7 @@ import uuid
 from typing import List, Optional
 from google import genai
+from google.genai import errors
 from google.genai.types import (
     FunctionCallingConfig,
     FunctionCallingConfigMode,
@@ -31,6 +32,7 @@ logger = get_logger(__name__)
 class GoogleVertexClient(LLMClientBase):
+    MAX_RETRIES = model_settings.gemini_max_retries
     def _get_client(self):
         timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
@@ -60,12 +62,59 @@ class GoogleVertexClient(LLMClientBase):
         Performs underlying request to llm and returns raw response.
         """
         client = self._get_client()
-        response = await client.aio.models.generate_content(
-            model=llm_config.model,
-            contents=request_data["contents"],
-            config=request_data["config"],
-        )
-        return response.model_dump()
+        # Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry
+        # https://github.com/googleapis/python-aiplatform/issues/4472
+        retry_count = 1
+        should_retry = True
+        while should_retry and retry_count <= self.MAX_RETRIES:
+            try:
+                response = await client.aio.models.generate_content(
+                    model=llm_config.model,
+                    contents=request_data["contents"],
+                    config=request_data["config"],
+                )
+            except errors.APIError as e:
+                # Retry on 503 and 500 errors as well, usually ephemeral from Gemini
+                if e.code == 503 or e.code == 500:
+                    logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
+                    retry_count += 1
+                    continue
+                raise e
+            except Exception as e:
+                raise e
+            response_data = response.model_dump()
+            is_malformed_function_call = self.is_malformed_function_call(response_data)
+            if is_malformed_function_call:
+                logger.warning(
+                    f"Received FinishReason.MALFORMED_FUNCTION_CALL in response for {llm_config.model}, retrying {retry_count}/{self.MAX_RETRIES}"
+                )
+                # Modify the last message if it's a heartbeat to include warning about special characters
+                if request_data["contents"] and len(request_data["contents"]) > 0:
+                    last_message = request_data["contents"][-1]
+                    if last_message.get("role") == "user" and last_message.get("parts"):
+                        for part in last_message["parts"]:
+                            if "text" in part:
+                                try:
+                                    # Try to parse as JSON to check if it's a heartbeat
+                                    message_json = json_loads(part["text"])
+                                    if message_json.get("type") == "heartbeat" and "reason" in message_json:
+                                        # Append warning to the reason
+                                        warning = f" RETRY {retry_count}/{self.MAX_RETRIES} ***DO NOT USE SPECIAL CHARACTERS OR QUOTATIONS INSIDE FUNCTION CALL ARGUMENTS. IF YOU MUST, MAKE SURE TO ESCAPE THEM PROPERLY***"
+                                        message_json["reason"] = message_json["reason"] + warning
+                                        # Update the text with modified JSON
+                                        part["text"] = json_dumps(message_json)
+                                        logger.warning(
+                                            f"Modified heartbeat message with special character warning for retry {retry_count}/{self.MAX_RETRIES}"
+                                        )
+                                except (json.JSONDecodeError, TypeError):
+                                    # Not a JSON message or not a heartbeat, skip modification
+                                    pass
+            should_retry = is_malformed_function_call
+            retry_count += 1
+        return response_data
     @staticmethod
     def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
@@ -230,10 +279,12 @@ class GoogleVertexClient(LLMClientBase):
             "contents": contents,
             "config": {
                 "temperature": llm_config.temperature,
-                "max_output_tokens": llm_config.max_tokens,
                 "tools": formatted_tools,
             },
         }
+        # Make tokens is optional
+        if llm_config.max_tokens:
+            request_data["config"]["max_output_tokens"] = llm_config.max_tokens
         if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
             request_data["config"]["response_mime_type"] = "application/json"
@@ -298,7 +349,6 @@ class GoogleVertexClient(LLMClientBase):
         }
         }
         """
         response = GenerateContentResponse(**response_data)
         try:
             choices = []
@@ -310,7 +360,7 @@ class GoogleVertexClient(LLMClientBase):
                     # This means the response is malformed like MALFORMED_FUNCTION_CALL
                     # NOTE: must be a ValueError to trigger a retry
                     if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
-                        raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}...")
+                        raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
                     else:
                         raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
@@ -344,9 +394,9 @@ class GoogleVertexClient(LLMClientBase):
                         if llm_config.put_inner_thoughts_in_kwargs:
                             from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
-                            assert (
-                                INNER_THOUGHTS_KWARG_VERTEX in function_args
-                            ), f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
+                                f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            )
                             inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
                             assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
                         else:
@@ -380,9 +430,9 @@ class GoogleVertexClient(LLMClientBase):
                             if llm_config.put_inner_thoughts_in_kwargs:
                                 from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
-                                assert (
-                                    INNER_THOUGHTS_KWARG_VERTEX in function_args
-                                ), f"Couldn't find inner thoughts in function args:\n{function_call}"
+                                assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
+                                    f"Couldn't find inner thoughts in function args:\n{function_call}"
+                                )
                                 inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
                                 assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
                             else:
@@ -406,7 +456,7 @@ class GoogleVertexClient(LLMClientBase):
                         except json.decoder.JSONDecodeError:
                             if candidate.finish_reason == "MAX_TOKENS":
-                                raise ValueError(f"Could not parse response data from LLM: exceeded max token limit")
+                                raise ValueError("Could not parse response data from LLM: exceeded max token limit")
                             # Inner thoughts are the content by default
                             inner_thoughts = response_message.text
@@ -463,7 +513,7 @@ class GoogleVertexClient(LLMClientBase):
                 )
             else:
                 # Count it ourselves
-                assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
+                assert input_messages is not None, "Didn't get UsageMetadata from the API response, so input_messages is required"
                 prompt_tokens = count_tokens(json_dumps(input_messages))  # NOTE: this is a very rough approximation
                 completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump()))  # NOTE: this is also approximate
                 total_tokens = prompt_tokens + completion_tokens
@@ -516,6 +566,14 @@ class GoogleVertexClient(LLMClientBase):
     def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
         return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
+    def is_malformed_function_call(self, response_data: dict) -> dict:
+        response = GenerateContentResponse(**response_data)
+        for candidate in response.candidates:
+            content = candidate.content
+            if content is None or content.role is None or content.parts is None:
+                return candidate.finish_reason == "MALFORMED_FUNCTION_CALL"
+        return False
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
         # Fallback to base implementation

letta/llm_api/groq_client.py CHANGED Viewed

@@ -14,7 +14,6 @@ from letta.settings import model_settings
 class GroqClient(OpenAIClient):
     def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
         return False

letta/llm_api/helpers.py CHANGED Viewed

@@ -310,7 +310,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
             f"Given in_context_messages has different length from given token_counts: {len(in_context_messages)} != {len(token_counts)}"
         )
-    in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
+    in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
     if summarizer_settings.evict_all_messages:
         logger.info("Evicting all messages...")
@@ -351,7 +351,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
 def get_token_counts_for_messages(in_context_messages: List[Message]) -> List[int]:
-    in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
+    in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
     token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai]
     return token_counts

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -7,7 +7,6 @@ import requests
 from letta.constants import CLI_WARNING_PREFIX
 from letta.errors import LettaConfigurationError, RateLimitExceededError
-from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
 from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.openai import (
     build_openai_chat_completions_request,
@@ -146,7 +145,7 @@ def create(
     # Count the tokens first, if there's an overflow exit early by throwing an error up the stack
     # NOTE: we want to include a specific substring in the error message to trigger summarization
-    messages_oai_format = [m.to_openai_dict() for m in messages]
+    messages_oai_format = Message.to_openai_dicts_from_list(messages)
     prompt_tokens = num_tokens_from_messages(messages=messages_oai_format, model=llm_config.model)
     function_tokens = num_tokens_from_functions(functions=functions, model=llm_config.model) if functions else 0
     if prompt_tokens + function_tokens > llm_config.context_window:
@@ -245,54 +244,6 @@ def create(
         return response
-    elif llm_config.model_endpoint_type == "deepseek":
-        if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
-            # only is a problem if we are *not* using an openai proxy
-            raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
-        data = build_deepseek_chat_completions_request(
-            llm_config,
-            messages,
-            user_id,
-            functions,
-            function_call,
-            use_tool_naming,
-            llm_config.max_tokens,
-        )
-        if stream:  # Client requested token streaming
-            data.stream = True
-            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
-                stream_interface, AgentRefreshStreamingInterface
-            ), type(stream_interface)
-            response = openai_chat_completions_process_stream(
-                url=llm_config.model_endpoint,
-                api_key=model_settings.deepseek_api_key,
-                chat_completion_request=data,
-                stream_interface=stream_interface,
-                name=name,
-                # TODO should we toggle for R1 vs V3?
-                expect_reasoning_content=True,
-            )
-        else:  # Client did not request token streaming (expect a blocking backend response)
-            data.stream = False
-            if isinstance(stream_interface, AgentChunkStreamingInterface):
-                stream_interface.stream_start()
-            try:
-                response = openai_chat_completions_request(
-                    url=llm_config.model_endpoint,
-                    api_key=model_settings.deepseek_api_key,
-                    chat_completion_request=data,
-                )
-            finally:
-                if isinstance(stream_interface, AgentChunkStreamingInterface):
-                    stream_interface.stream_end()
-        """
-        if llm_config.put_inner_thoughts_in_kwargs:
-            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
-        """
-        response = convert_deepseek_response_to_chatcompletion(response)
-        return response
     # local model
     else:
         if stream:

letta/llm_api/llm_client.py CHANGED Viewed

@@ -58,13 +58,6 @@ class LLMClient:
                     put_inner_thoughts_first=put_inner_thoughts_first,
                     actor=actor,
                 )
-            case ProviderType.openai | ProviderType.ollama:
-                from letta.llm_api.openai_client import OpenAIClient
-                return OpenAIClient(
-                    put_inner_thoughts_first=put_inner_thoughts_first,
-                    actor=actor,
-                )
             case ProviderType.together:
                 from letta.llm_api.together_client import TogetherClient
@@ -101,4 +94,9 @@ class LLMClient:
                     actor=actor,
                 )
             case _:
-                return None
+                from letta.llm_api.openai_client import OpenAIClient
+                return OpenAIClient(
+                    put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor=actor,
+                )

letta/llm_api/mistral.py CHANGED Viewed

@@ -13,7 +13,7 @@ async def mistral_get_model_list_async(url: str, api_key: str) -> dict:
     if api_key is not None:
         headers["Authorization"] = f"Bearer {api_key}"
-    logger.debug(f"Sending request to %s", url)
+    logger.debug("Sending request to %s", url)
     async with aiohttp.ClientSession() as session:
         # TODO add query param "tool" to be true

letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

letta-nightly 0.11.6.dev20250902104140py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl