PyPI - letta-nightly - Versions diffs - 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl - Mend

letta-nightly 0.7.20.dev20250520104253py3-none-any.whl → 0.7.21.dev20250521233415py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

letta/__init__.py +1 -1
letta/agent.py +290 -3
letta/agents/base_agent.py +0 -55
letta/agents/helpers.py +5 -0
letta/agents/letta_agent.py +314 -64
letta/agents/letta_agent_batch.py +102 -55
letta/agents/voice_agent.py +5 -5
letta/client/client.py +9 -18
letta/constants.py +55 -1
letta/functions/function_sets/builtin.py +27 -0
letta/functions/mcp_client/stdio_client.py +1 -1
letta/groups/sleeptime_multi_agent_v2.py +1 -1
letta/interfaces/anthropic_streaming_interface.py +10 -1
letta/interfaces/openai_streaming_interface.py +9 -2
letta/llm_api/anthropic.py +21 -2
letta/llm_api/anthropic_client.py +33 -6
letta/llm_api/google_ai_client.py +136 -423
letta/llm_api/google_vertex_client.py +173 -22
letta/llm_api/llm_api_tools.py +27 -0
letta/llm_api/llm_client.py +1 -1
letta/llm_api/llm_client_base.py +32 -21
letta/llm_api/openai.py +57 -0
letta/llm_api/openai_client.py +7 -11
letta/memory.py +0 -1
letta/orm/__init__.py +1 -0
letta/orm/enums.py +1 -0
letta/orm/provider_trace.py +26 -0
letta/orm/step.py +1 -0
letta/schemas/provider_trace.py +43 -0
letta/schemas/providers.py +210 -65
letta/schemas/step.py +1 -0
letta/schemas/tool.py +4 -0
letta/server/db.py +37 -19
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +57 -34
letta/server/rest_api/routers/v1/blocks.py +3 -3
letta/server/rest_api/routers/v1/identities.py +24 -26
letta/server/rest_api/routers/v1/jobs.py +3 -3
letta/server/rest_api/routers/v1/llms.py +13 -8
letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
letta/server/rest_api/routers/v1/tags.py +3 -3
letta/server/rest_api/routers/v1/telemetry.py +18 -0
letta/server/rest_api/routers/v1/tools.py +6 -6
letta/server/rest_api/streaming_response.py +105 -0
letta/server/rest_api/utils.py +4 -0
letta/server/server.py +140 -0
letta/services/agent_manager.py +251 -18
letta/services/block_manager.py +52 -37
letta/services/helpers/noop_helper.py +10 -0
letta/services/identity_manager.py +43 -38
letta/services/job_manager.py +29 -0
letta/services/message_manager.py +111 -0
letta/services/sandbox_config_manager.py +36 -0
letta/services/step_manager.py +146 -0
letta/services/telemetry_manager.py +58 -0
letta/services/tool_executor/tool_execution_manager.py +49 -5
letta/services/tool_executor/tool_execution_sandbox.py +47 -0
letta/services/tool_executor/tool_executor.py +236 -7
letta/services/tool_manager.py +160 -1
letta/services/tool_sandbox/e2b_sandbox.py +65 -3
letta/settings.py +10 -2
letta/tracing.py +5 -5
{letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
{letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +67 -60
{letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -5,14 +5,16 @@ from typing import List, Optional
 from google import genai
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
+from letta.constants import NON_USER_MSG_PREFIX
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps, json_loads
-from letta.llm_api.google_ai_client import GoogleAIClient
+from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.local_llm.utils import count_tokens
 from letta.log import get_logger
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_request import Tool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
 from letta.settings import model_settings, settings
 from letta.utils import get_tool_call_id
@@ -20,18 +22,21 @@ from letta.utils import get_tool_call_id
 logger = get_logger(__name__)
-class GoogleVertexClient(GoogleAIClient):
+class GoogleVertexClient(LLMClientBase):
-    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
-        """
-        Performs underlying request to llm and returns raw response.
-        """
-        client = genai.Client(
+    def _get_client(self):
+        return genai.Client(
             vertexai=True,
             project=model_settings.google_cloud_project,
             location=model_settings.google_cloud_location,
             http_options={"api_version": "v1"},
         )
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Performs underlying request to llm and returns raw response.
+        """
+        client = self._get_client()
         response = client.models.generate_content(
             model=llm_config.model,
             contents=request_data["contents"],
@@ -43,12 +48,7 @@ class GoogleVertexClient(GoogleAIClient):
         """
         Performs underlying request to llm and returns raw response.
         """
-        client = genai.Client(
-            vertexai=True,
-            project=model_settings.google_cloud_project,
-            location=model_settings.google_cloud_location,
-            http_options={"api_version": "v1"},
-        )
+        client = self._get_client()
         response = await client.aio.models.generate_content(
             model=llm_config.model,
             contents=request_data["contents"],
@@ -56,6 +56,139 @@ class GoogleVertexClient(GoogleAIClient):
         )
         return response.model_dump()
+    def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
+        """Google AI API requires all function call returns are immediately followed by a 'model' role message.
+        In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
+        so there is no natural follow-up 'model' role message.
+        To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
+        with role == 'model' that is placed in-betweeen and function output
+        (role == 'tool') and user message (role == 'user').
+        """
+        dummy_yield_message = {
+            "role": "model",
+            "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
+        }
+        messages_with_padding = []
+        for i, message in enumerate(messages):
+            messages_with_padding.append(message)
+            # Check if the current message role is 'tool' and the next message role is 'user'
+            if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
+                messages_with_padding.append(dummy_yield_message)
+        return messages_with_padding
+    def _clean_google_ai_schema_properties(self, schema_part: dict):
+        """Recursively clean schema parts to remove unsupported Google AI keywords."""
+        if not isinstance(schema_part, dict):
+            return
+        # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
+        # * Only a subset of the OpenAPI schema is supported.
+        # * Supported parameter types in Python are limited.
+        unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
+        keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
+        for key_to_remove in keys_to_remove_at_this_level:
+            logger.warning(f"Removing unsupported keyword 	'{key_to_remove}' from schema part.")
+            del schema_part[key_to_remove]
+        if schema_part.get("type") == "string" and "format" in schema_part:
+            allowed_formats = ["enum", "date-time"]
+            if schema_part["format"] not in allowed_formats:
+                logger.warning(f"Removing unsupported format 	'{schema_part['format']}' for string type. Allowed: {allowed_formats}")
+                del schema_part["format"]
+        # Check properties within the current level
+        if "properties" in schema_part and isinstance(schema_part["properties"], dict):
+            for prop_name, prop_schema in schema_part["properties"].items():
+                self._clean_google_ai_schema_properties(prop_schema)
+        # Check items within arrays
+        if "items" in schema_part and isinstance(schema_part["items"], dict):
+            self._clean_google_ai_schema_properties(schema_part["items"])
+        # Check within anyOf, allOf, oneOf lists
+        for key in ["anyOf", "allOf", "oneOf"]:
+            if key in schema_part and isinstance(schema_part[key], list):
+                for item_schema in schema_part[key]:
+                    self._clean_google_ai_schema_properties(item_schema)
+    def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
+        """
+        OpenAI style:
+        "tools": [{
+            "type": "function",
+            "function": {
+                "name": "find_movies",
+                "description": "find ....",
+                "parameters": {
+                "type": "object",
+                "properties": {
+                    PARAM: {
+                    "type": PARAM_TYPE,  # eg "string"
+                    "description": PARAM_DESCRIPTION,
+                    },
+                    ...
+                },
+                "required": List[str],
+                }
+            }
+        }
+        ]
+        Google AI style:
+        "tools": [{
+            "functionDeclarations": [{
+            "name": "find_movies",
+            "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
+            "parameters": {
+                "type": "OBJECT",
+                "properties": {
+                "location": {
+                    "type": "STRING",
+                    "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
+                },
+                "description": {
+                    "type": "STRING",
+                    "description": "Any kind of description including category or genre, title words, attributes, etc."
+                }
+                },
+                "required": ["description"]
+            }
+            }, {
+            "name": "find_theaters",
+            ...
+        """
+        function_list = [
+            dict(
+                name=t.function.name,
+                description=t.function.description,
+                parameters=t.function.parameters,  # TODO need to unpack
+            )
+            for t in tools
+        ]
+        # Add inner thoughts if needed
+        for func in function_list:
+            # Note: Google AI API used to have weird casing requirements, but not any more
+            # Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
+            if "parameters" in func and isinstance(func["parameters"], dict):
+                self._clean_google_ai_schema_properties(func["parameters"])
+            # Add inner thoughts
+            if llm_config.put_inner_thoughts_in_kwargs:
+                from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
+                func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
+                    "type": "string",
+                    "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
+                }
+                func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
+        return [{"functionDeclarations": function_list}]
     def build_request_data(
         self,
         messages: List[PydanticMessage],
@@ -66,11 +199,29 @@ class GoogleVertexClient(GoogleAIClient):
         """
         Constructs a request object in the expected data format for this client.
         """
-        request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
-        request_data["config"] = request_data.pop("generation_config")
-        request_data["config"]["tools"] = request_data.pop("tools")
-        tool_names = [t["name"] for t in tools] if tools else []
+        if tools:
+            tool_objs = [Tool(type="function", function=t) for t in tools]
+            tool_names = [t.function.name for t in tool_objs]
+            # Convert to the exact payload style Google expects
+            formatted_tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
+        else:
+            formatted_tools = []
+            tool_names = []
+        contents = self.add_dummy_model_messages(
+            [m.to_google_ai_dict() for m in messages],
+        )
+        request_data = {
+            "contents": contents,
+            "config": {
+                "temperature": llm_config.temperature,
+                "max_output_tokens": llm_config.max_tokens,
+                "tools": formatted_tools,
+            },
+        }
         if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
             request_data["config"]["response_mime_type"] = "application/json"
             request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
@@ -89,11 +240,11 @@ class GoogleVertexClient(GoogleAIClient):
         # Add thinking_config
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
-        thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
-        thinking_config = ThinkingConfig(
-            thinking_budget=thinking_budget,
-        )
-        request_data["config"]["thinking_config"] = thinking_config.model_dump()
+        if llm_config.enable_reasoner:
+            thinking_config = ThinkingConfig(
+                thinking_budget=llm_config.max_reasoning_tokens,
+            )
+            request_data["config"]["thinking_config"] = thinking_config.model_dump()
         return request_data

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -20,15 +20,19 @@ from letta.llm_api.openai import (
     build_openai_chat_completions_request,
     openai_chat_completions_process_stream,
     openai_chat_completions_request,
+    prepare_openai_payload,
 )
 from letta.local_llm.chat_completion_proxy import get_chat_completion
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.orm.user import User
 from letta.schemas.enums import ProviderCategory
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.provider_trace import ProviderTraceCreate
+from letta.services.telemetry_manager import TelemetryManager
 from letta.settings import ModelSettings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
 from letta.tracing import log_event, trace_method
@@ -142,6 +146,9 @@ def create(
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
     put_inner_thoughts_first: bool = True,
     name: Optional[str] = None,
+    telemetry_manager: Optional[TelemetryManager] = None,
+    step_id: Optional[str] = None,
+    actor: Optional[User] = None,
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
     from letta.utils import printd
@@ -233,6 +240,16 @@ def create(
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
                     stream_interface.stream_end()
+        telemetry_manager.create_provider_trace(
+            actor=actor,
+            provider_trace_create=ProviderTraceCreate(
+                request_json=prepare_openai_payload(data),
+                response_json=response.model_json_schema(),
+                step_id=step_id,
+                organization_id=actor.organization_id,
+            ),
+        )
         if llm_config.put_inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
@@ -407,6 +424,16 @@ def create(
         if llm_config.put_inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        telemetry_manager.create_provider_trace(
+            actor=actor,
+            provider_trace_create=ProviderTraceCreate(
+                request_json=chat_completion_request.model_json_schema(),
+                response_json=response.model_json_schema(),
+                step_id=step_id,
+                organization_id=actor.organization_id,
+            ),
+        )
         return response
     # elif llm_config.model_endpoint_type == "cohere":

letta/llm_api/llm_client.py CHANGED Viewed

@@ -51,7 +51,7 @@ class LLMClient:
                     put_inner_thoughts_first=put_inner_thoughts_first,
                     actor=actor,
                 )
-            case ProviderType.openai:
+            case ProviderType.openai | ProviderType.together:
                 from letta.llm_api.openai_client import OpenAIClient
                 return OpenAIClient(

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -9,7 +9,9 @@ from letta.errors import LLMError
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
-from letta.tracing import log_event
+from letta.schemas.provider_trace import ProviderTraceCreate
+from letta.services.telemetry_manager import TelemetryManager
+from letta.tracing import log_event, trace_method
 if TYPE_CHECKING:
     from letta.orm import User
@@ -31,13 +33,15 @@ class LLMClientBase:
         self.put_inner_thoughts_first = put_inner_thoughts_first
         self.use_tool_naming = use_tool_naming
+    @trace_method
     def send_llm_request(
         self,
         messages: List[Message],
         llm_config: LLMConfig,
         tools: Optional[List[dict]] = None,  # TODO: change to Tool object
-        stream: bool = False,
         force_tool_call: Optional[str] = None,
+        telemetry_manager: Optional["TelemetryManager"] = None,
+        step_id: Optional[str] = None,
     ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
         """
         Issues a request to the downstream model endpoint and parses response.
@@ -48,37 +52,51 @@ class LLMClientBase:
         try:
             log_event(name="llm_request_sent", attributes=request_data)
-            if stream:
-                return self.stream(request_data, llm_config)
-            else:
-                response_data = self.request(request_data, llm_config)
+            response_data = self.request(request_data, llm_config)
+            if step_id and telemetry_manager:
+                telemetry_manager.create_provider_trace(
+                    actor=self.actor,
+                    provider_trace_create=ProviderTraceCreate(
+                        request_json=request_data,
+                        response_json=response_data,
+                        step_id=step_id,
+                        organization_id=self.actor.organization_id,
+                    ),
+                )
             log_event(name="llm_response_received", attributes=response_data)
         except Exception as e:
             raise self.handle_llm_error(e)
         return self.convert_response_to_chat_completion(response_data, messages, llm_config)
+    @trace_method
     async def send_llm_request_async(
         self,
+        request_data: dict,
         messages: List[Message],
         llm_config: LLMConfig,
-        tools: Optional[List[dict]] = None,  # TODO: change to Tool object
-        stream: bool = False,
-        force_tool_call: Optional[str] = None,
+        telemetry_manager: "TelemetryManager | None" = None,
+        step_id: str | None = None,
     ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
         """
         Issues a request to the downstream model endpoint.
         If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
         Otherwise returns a ChatCompletionResponse.
         """
-        request_data = self.build_request_data(messages, llm_config, tools, force_tool_call)
         try:
             log_event(name="llm_request_sent", attributes=request_data)
-            if stream:
-                return await self.stream_async(request_data, llm_config)
-            else:
-                response_data = await self.request_async(request_data, llm_config)
+            response_data = await self.request_async(request_data, llm_config)
+            await telemetry_manager.create_provider_trace_async(
+                actor=self.actor,
+                provider_trace_create=ProviderTraceCreate(
+                    request_json=request_data,
+                    response_json=response_data,
+                    step_id=step_id,
+                    organization_id=self.actor.organization_id,
+                ),
+            )
             log_event(name="llm_response_received", attributes=response_data)
         except Exception as e:
             raise self.handle_llm_error(e)
@@ -133,13 +151,6 @@ class LLMClientBase:
         """
         raise NotImplementedError
-    @abstractmethod
-    def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
-        """
-        Performs underlying streaming request to llm and returns raw response.
-        """
-        raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
     @abstractmethod
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
         """

letta/llm_api/openai.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import warnings
 from typing import Generator, List, Optional, Union
+import httpx
 import requests
 from openai import OpenAI
@@ -110,6 +111,62 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
         raise e
+async def openai_get_model_list_async(
+    url: str,
+    api_key: Optional[str] = None,
+    fix_url: bool = False,
+    extra_params: Optional[dict] = None,
+    client: Optional["httpx.AsyncClient"] = None,
+) -> dict:
+    """https://platform.openai.com/docs/api-reference/models/list"""
+    from letta.utils import printd
+    # In some cases we may want to double-check the URL and do basic correction
+    if fix_url and not url.endswith("/v1"):
+        url = smart_urljoin(url, "v1")
+    url = smart_urljoin(url, "models")
+    headers = {"Content-Type": "application/json"}
+    if api_key is not None:
+        headers["Authorization"] = f"Bearer {api_key}"
+    printd(f"Sending request to {url}")
+    # Use provided client or create a new one
+    close_client = False
+    if client is None:
+        client = httpx.AsyncClient()
+        close_client = True
+    try:
+        response = await client.get(url, headers=headers, params=extra_params)
+        response.raise_for_status()
+        result = response.json()
+        printd(f"response = {result}")
+        return result
+    except httpx.HTTPStatusError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        error_response = None
+        try:
+            error_response = http_err.response.json()
+        except:
+            error_response = {"status_code": http_err.response.status_code, "text": http_err.response.text}
+        printd(f"Got HTTPError, exception={http_err}, response={error_response}")
+        raise http_err
+    except httpx.RequestError as req_err:
+        # Handle other httpx-related errors (e.g., connection error)
+        printd(f"Got RequestException, exception={req_err}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        printd(f"Got unknown Exception, exception={e}")
+        raise e
+    finally:
+        if close_client:
+            await client.aclose()
 def build_openai_chat_completions_request(
     llm_config: LLMConfig,
     messages: List[_Message],

letta/llm_api/openai_client.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 from typing import List, Optional
 import openai
-from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
+from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
-from letta.schemas.enums import ProviderCategory
+from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase):
             from letta.services.provider_manager import ProviderManager
             api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
+        if llm_config.model_endpoint_type == ProviderType.together:
+            api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY")
         if not api_key:
             api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -254,20 +256,14 @@ class OpenAIClient(LLMClientBase):
         return chat_completion_response
-    def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
-        """
-        Performs underlying streaming request to OpenAI and returns the stream iterator.
-        """
-        client = OpenAI(**self._prepare_client_kwargs(llm_config))
-        response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
-        return response_stream
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
         """
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
         client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config))
-        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
+        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
+            **request_data, stream=True, stream_options={"include_usage": True}
+        )
         return response_stream
     def handle_llm_error(self, e: Exception) -> Exception:

letta/memory.py CHANGED Viewed

@@ -93,7 +93,6 @@ def summarize_messages(
         response = llm_client.send_llm_request(
             messages=message_sequence,
             llm_config=llm_config_no_inner_thoughts,
-            stream=False,
         )
     else:
         response = create(

letta/orm/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from letta.orm.message import Message
 from letta.orm.organization import Organization
 from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
 from letta.orm.provider import Provider
+from letta.orm.provider_trace import ProviderTrace
 from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
 from letta.orm.source import Source
 from letta.orm.sources_agents import SourcesAgents

letta/orm/enums.py CHANGED Viewed

@@ -8,6 +8,7 @@ class ToolType(str, Enum):
     LETTA_MULTI_AGENT_CORE = "letta_multi_agent_core"
     LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
     LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
+    LETTA_BUILTIN = "letta_builtin"
     EXTERNAL_COMPOSIO = "external_composio"
     EXTERNAL_LANGCHAIN = "external_langchain"
     # TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?

letta/orm/provider_trace.py ADDED Viewed

@@ -0,0 +1,26 @@
+import uuid
+from sqlalchemy import JSON, Index, String
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from letta.orm.mixins import OrganizationMixin
+from letta.orm.sqlalchemy_base import SqlalchemyBase
+from letta.schemas.provider_trace import ProviderTrace as PydanticProviderTrace
+class ProviderTrace(SqlalchemyBase, OrganizationMixin):
+    """Defines data model for storing provider trace information"""
+    __tablename__ = "provider_traces"
+    __pydantic_model__ = PydanticProviderTrace
+    __table_args__ = (Index("ix_step_id", "step_id"),)
+    id: Mapped[str] = mapped_column(
+        primary_key=True, doc="Unique provider trace identifier", default=lambda: f"provider_trace-{uuid.uuid4()}"
+    )
+    request_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider request")
+    response_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider response")
+    step_id: Mapped[str] = mapped_column(String, nullable=True, doc="ID of the step that this trace is associated with")
+    # Relationships
+    organization: Mapped["Organization"] = relationship("Organization", lazy="selectin")

letta/orm/step.py CHANGED Viewed

@@ -35,6 +35,7 @@ class Step(SqlalchemyBase):
     )
     agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
     provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
+    provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
     model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
     model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
     context_window_limit: Mapped[Optional[int]] = mapped_column(

letta/schemas/provider_trace.py ADDED Viewed

@@ -0,0 +1,43 @@
+from __future__ import annotations
+from datetime import datetime
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, Field
+from letta.helpers.datetime_helpers import get_utc_time
+from letta.schemas.letta_base import OrmMetadataBase
+class BaseProviderTrace(OrmMetadataBase):
+    __id_prefix__ = "provider_trace"
+class ProviderTraceCreate(BaseModel):
+    """Request to create a provider trace"""
+    request_json: dict[str, Any] = Field(..., description="JSON content of the provider request")
+    response_json: dict[str, Any] = Field(..., description="JSON content of the provider response")
+    step_id: str = Field(None, description="ID of the step that this trace is associated with")
+    organization_id: str = Field(..., description="The unique identifier of the organization.")
+class ProviderTrace(BaseProviderTrace):
+    """
+    Letta's internal representation of a provider trace.
+    Attributes:
+        id (str): The unique identifier of the provider trace.
+        request_json (Dict[str, Any]): JSON content of the provider request.
+        response_json (Dict[str, Any]): JSON content of the provider response.
+        step_id (str): ID of the step that this trace is associated with.
+        organization_id (str): The unique identifier of the organization.
+        created_at (datetime): The timestamp when the object was created.
+    """
+    id: str = BaseProviderTrace.generate_id_field()
+    request_json: Dict[str, Any] = Field(..., description="JSON content of the provider request")
+    response_json: Dict[str, Any] = Field(..., description="JSON content of the provider response")
+    step_id: Optional[str] = Field(None, description="ID of the step that this trace is associated with")
+    organization_id: str = Field(..., description="The unique identifier of the organization.")
+    created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")

letta-nightly 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl

letta-nightly 0.7.20.dev20250520104253py3-none-any.whl → 0.7.21.dev20250521233415py3-none-any.whl