PyPI - letta-nightly - Versions diffs - 0.6.33.dev20250226104113__py3-none-any.whl → 0.6.34.dev20250227200331__py3-none-any.whl - Mend

letta-nightly 0.6.33.dev20250226104113py3-none-any.whl → 0.6.34.dev20250227200331py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (21) hide show

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.33"
+__version__ = "0.6.34"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -832,7 +832,7 @@ class Agent(BaseAgent):
                 )
             if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window):
-                printd(
+                logger.warning(
                     f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
                 )
@@ -842,7 +842,7 @@ class Agent(BaseAgent):
                     self.agent_alerted_about_memory_pressure = True  # it's up to the outer loop to handle this
             else:
-                printd(
+                logger.info(
                     f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
                 )
@@ -892,6 +892,16 @@ class Agent(BaseAgent):
             if is_context_overflow_error(e):
                 in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
+                # TODO: this is a patch to resolve immediate issues, should be removed once the summarizer is fixes
+                if self.agent_state.message_buffer_autoclear:
+                    # no calling the summarizer in this case
+                    logger.error(
+                        f"step() failed with an exception that looks like a context window overflow, but message buffer is set to autoclear, so skipping: '{str(e)}'"
+                    )
+                    raise e
+                summarize_attempt_count += 1
                 if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
                     logger.warning(
                         f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -187,8 +187,65 @@ def create(
                 function_call = "required"
         data = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, put_inner_thoughts_first=put_inner_thoughts_first
+            llm_config,
+            messages,
+            user_id,
+            functions,
+            function_call,
+            use_tool_naming,
+            put_inner_thoughts_first=put_inner_thoughts_first,
+            use_structured_output=True,  # NOTE: turn on all the time for OpenAI API
         )
+        if stream:  # Client requested token streaming
+            data.stream = True
+            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
+                stream_interface, AgentRefreshStreamingInterface
+            ), type(stream_interface)
+            response = openai_chat_completions_process_stream(
+                url=llm_config.model_endpoint,
+                api_key=api_key,
+                chat_completion_request=data,
+                stream_interface=stream_interface,
+            )
+        else:  # Client did not request token streaming (expect a blocking backend response)
+            data.stream = False
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_start()
+            try:
+                response = openai_chat_completions_request(
+                    url=llm_config.model_endpoint,
+                    api_key=api_key,
+                    chat_completion_request=data,
+                )
+            finally:
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.stream_end()
+        if llm_config.put_inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        return response
+    elif llm_config.model_endpoint_type == "xai":
+        api_key = model_settings.xai_api_key
+        if function_call is None and functions is not None and len(functions) > 0:
+            # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+            function_call = "required"
+        data = build_openai_chat_completions_request(
+            llm_config,
+            messages,
+            user_id,
+            functions,
+            function_call,
+            use_tool_naming,
+            put_inner_thoughts_first=put_inner_thoughts_first,
+            use_structured_output=False,  # NOTE: not supported atm for xAI
+        )
         if stream:  # Client requested token streaming
             data.stream = True
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(

letta/llm_api/openai.py CHANGED Viewed

@@ -13,7 +13,7 @@ from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
 from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
-from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, cast_message_to_subtype
+from letta.schemas.openai.chat_completion_request import FunctionSchema, Tool, ToolFunctionChoice, cast_message_to_subtype
 from letta.schemas.openai.chat_completion_response import (
     ChatCompletionChunkResponse,
     ChatCompletionResponse,
@@ -95,6 +95,7 @@ def build_openai_chat_completions_request(
     function_call: Optional[str],
     use_tool_naming: bool,
     put_inner_thoughts_first: bool = True,
+    use_structured_output: bool = True,
 ) -> ChatCompletionRequest:
     if functions and llm_config.put_inner_thoughts_in_kwargs:
         # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -157,6 +158,16 @@ def build_openai_chat_completions_request(
         data.user = str(uuid.UUID(int=0))
         data.model = "memgpt-openai"
+    if use_structured_output and data.tools is not None and len(data.tools) > 0:
+        # Convert to structured output style (which has 'strict' and no optionals)
+        for tool in data.tools:
+            try:
+                # tool["function"] = convert_to_structured_output(tool["function"])
+                structured_output_version = convert_to_structured_output(tool.function.model_dump())
+                tool.function = FunctionSchema(**structured_output_version)
+            except ValueError as e:
+                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data
@@ -455,11 +466,12 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
-    if "tools" in data:
-        for tool in data["tools"]:
-            try:
-                tool["function"] = convert_to_structured_output(tool["function"])
-            except ValueError as e:
-                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+    # # NOTE: move this out to wherever the ChatCompletionRequest is created
+    # if "tools" in data:
+    #     for tool in data["tools"]:
+    #         try:
+    #             tool["function"] = convert_to_structured_output(tool["function"])
+    #         except ValueError as e:
+    #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data

letta/orm/sqlalchemy_base.py CHANGED Viewed

@@ -69,6 +69,7 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
         join_model: Optional[Base] = None,
         join_conditions: Optional[Union[Tuple, List]] = None,
         identifier_keys: Optional[List[str]] = None,
+        identifier_id: Optional[str] = None,
         **kwargs,
     ) -> List["SqlalchemyBase"]:
         """
@@ -147,6 +148,10 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
             if identifier_keys and hasattr(cls, "identities"):
                 query = query.join(cls.identities).filter(cls.identities.property.mapper.class_.identifier_key.in_(identifier_keys))
+            # given the identifier_id, we can find within the agents table any agents that have the identifier_id in their identity_ids
+            if identifier_id and hasattr(cls, "identities"):
+                query = query.join(cls.identities).filter(cls.identities.property.mapper.class_.id == identifier_id)
             # Apply filtering logic from kwargs
             for key, value in kwargs.items():
                 if "." in key:

letta/schemas/llm_config.py CHANGED Viewed

@@ -42,6 +42,7 @@ class LLMConfig(BaseModel):
         "together",  # completions endpoint
         "bedrock",
         "deepseek",
+        "xai",
     ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
@@ -56,7 +57,7 @@ class LLMConfig(BaseModel):
         description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
     )
     max_tokens: Optional[int] = Field(
-        1024,
+        4096,
         description="The maximum number of tokens to generate. If not set, the model will use its default value.",
     )

letta/schemas/providers.py CHANGED Viewed

@@ -211,6 +211,63 @@ class OpenAIProvider(Provider):
             return None
+class xAIProvider(OpenAIProvider):
+    """https://docs.x.ai/docs/api-reference"""
+    name: str = "xai"
+    api_key: str = Field(..., description="API key for the xAI/Grok API.")
+    base_url: str = Field("https://api.x.ai/v1", description="Base URL for the xAI/Grok API.")
+    def get_model_context_window_size(self, model_name: str) -> Optional[int]:
+        # xAI doesn't return context window in the model listing,
+        # so these are hardcoded from their website
+        if model_name == "grok-2-1212":
+            return 131072
+        else:
+            return None
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=self.api_key)
+        if "data" in response:
+            data = response["data"]
+        else:
+            data = response
+        configs = []
+        for model in data:
+            assert "id" in model, f"xAI/Grok model missing 'id' field: {model}"
+            model_name = model["id"]
+            # In case xAI starts supporting it in the future:
+            if "context_length" in model:
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
+            if not context_window_size:
+                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                continue
+            configs.append(
+                LLMConfig(
+                    model=model_name,
+                    model_endpoint_type="xai",
+                    model_endpoint=self.base_url,
+                    context_window=context_window_size,
+                    handle=self.get_handle(model_name),
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # No embeddings supported
+        return []
 class DeepSeekProvider(OpenAIProvider):
     """
     DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
@@ -456,6 +513,13 @@ class AnthropicProvider(Provider):
                     warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                     model["context_window"] = 200000
+            max_tokens = 8192
+            if "claude-3-opus" in model["id"]:
+                max_tokens = 4096
+            if "claude-3-haiku" in model["id"]:
+                max_tokens = 4096
+            # TODO: set for 3-7 extended thinking mode
             # We set this to false by default, because Anthropic can
             # natively support <thinking> tags inside of content fields
             # However, putting COT inside of tool calls can make it more
@@ -472,6 +536,7 @@ class AnthropicProvider(Provider):
                     context_window=model["context_window"],
                     handle=self.get_handle(model["id"]),
                     put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    max_tokens=max_tokens,
                 )
             )
         return configs
@@ -811,6 +876,7 @@ class GoogleAIProvider(Provider):
                     model_endpoint=self.base_url,
                     context_window=self.get_model_context_window(model),
                     handle=self.get_handle(model),
+                    max_tokens=8192,
                 )
             )
         return configs
@@ -862,6 +928,7 @@ class GoogleVertexProvider(Provider):
                     model_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
                     context_window=context_length,
                     handle=self.get_handle(model),
+                    max_tokens=8192,
                 )
             )
         return configs

letta/server/rest_api/chat_completions_interface.py CHANGED Viewed

@@ -225,10 +225,10 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                 combined_args = "".join(self.current_function_arguments)
                 parsed_args = OptimisticJSONParser().parse(combined_args)
-                # TODO: Make this less brittle! This depends on `message` coming first!
-                # This is a heuristic we use to know if we're done with the `message` part of `send_message`
-                if len(parsed_args.keys()) > 1:
-                    self._found_message_tool_kwarg = True
+                if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
+                    self.assistant_message_tool_kwarg
+                ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
+                    self.current_json_parse_result = parsed_args
                     return ChatCompletionChunk(
                         id=chunk.id,
                         object=chunk.object,
@@ -237,31 +237,11 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                         choices=[
                             Choice(
                                 index=choice.index,
-                                delta=ChoiceDelta(),
-                                finish_reason="stop",
+                                delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                finish_reason=None,
                             )
                         ],
                     )
-                else:
-                    # If the parsed result is different
-                    # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
-                    if parsed_args != self.current_json_parse_result:
-                        self.current_json_parse_result = parsed_args
-                        # If we can see a "message" field, return it as partial content
-                        if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
-                            return ChatCompletionChunk(
-                                id=chunk.id,
-                                object=chunk.object,
-                                created=chunk.created.timestamp(),
-                                model=chunk.model,
-                                choices=[
-                                    Choice(
-                                        index=choice.index,
-                                        delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
-                                        finish_reason=None,
-                                    )
-                                ],
-                            )
         # If there's a finish reason, pass that along
         if choice.finish_reason is not None:

letta/server/rest_api/routers/openai/chat_completions/chat_completions.py CHANGED Viewed

@@ -1,50 +1,19 @@
 import asyncio
-import json
-import uuid
 from typing import TYPE_CHECKING, List, Optional, Union
-import httpx
-import openai
 from fastapi import APIRouter, Body, Depends, Header, HTTPException
 from fastapi.responses import StreamingResponse
-from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
 from openai.types.chat.completion_create_params import CompletionCreateParams
-from starlette.concurrency import run_in_threadpool
 from letta.agent import Agent
-from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, LETTA_TOOL_SET, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG
-from letta.helpers.tool_execution_helper import (
-    add_pre_execution_message,
-    enable_strict_mode,
-    execute_external_tool,
-    remove_request_heartbeat,
-)
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.log import get_logger
-from letta.orm.enums import ToolType
 from letta.schemas.message import Message, MessageCreate
-from letta.schemas.openai.chat_completion_request import (
-    AssistantMessage,
-    ChatCompletionRequest,
-    Tool,
-    ToolCall,
-    ToolCallFunction,
-    ToolMessage,
-    UserMessage,
-)
 from letta.schemas.user import User
 from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
-from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
 # TODO this belongs in a controller!
-from letta.server.rest_api.utils import (
-    convert_letta_messages_to_openai,
-    create_assistant_message_from_openai_response,
-    create_user_message,
-    get_letta_server,
-    get_messages_from_completion_request,
-    sse_async_generator,
-)
-from letta.settings import model_settings
+from letta.server.rest_api.utils import get_letta_server, get_messages_from_completion_request, sse_async_generator
 if TYPE_CHECKING:
     from letta.server.server import SyncServer
@@ -54,258 +23,6 @@ router = APIRouter(prefix="/v1", tags=["chat_completions"])
 logger = get_logger(__name__)
-@router.post(
-    "/fast/chat/completions",
-    response_model=None,
-    operation_id="create_fast_chat_completions",
-    responses={
-        200: {
-            "description": "Successful response",
-            "content": {
-                "text/event-stream": {"description": "Server-Sent Events stream"},
-            },
-        }
-    },
-)
-async def create_fast_chat_completions(
-    completion_request: CompletionCreateParams = Body(...),
-    server: "SyncServer" = Depends(get_letta_server),
-    user_id: Optional[str] = Header(None, alias="user_id"),
-):
-    actor = server.user_manager.get_user_or_default(user_id=user_id)
-    agent_id = str(completion_request.get("user", None))
-    if agent_id is None:
-        raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
-    agent_state = server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
-    if agent_state.llm_config.model_endpoint_type != "openai":
-        raise HTTPException(status_code=400, detail="Only OpenAI models are supported by this endpoint.")
-    # Convert Letta messages to OpenAI messages
-    in_context_messages = server.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=actor)
-    openai_messages = convert_letta_messages_to_openai(in_context_messages)
-    # Also parse user input from completion_request and append
-    input_message = get_messages_from_completion_request(completion_request)[-1]
-    openai_messages.append(input_message)
-    # Tools we allow this agent to call
-    tools = [t for t in agent_state.tools if t.name not in LETTA_TOOL_SET and t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
-    # Initial request
-    openai_request = ChatCompletionRequest(
-        model=agent_state.llm_config.model,
-        messages=openai_messages,
-        # TODO: This nested thing here is so ugly, need to refactor
-        tools=(
-            [
-                Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
-                for t in tools
-            ]
-            if tools
-            else None
-        ),
-        tool_choice="auto",
-        user=user_id,
-        max_completion_tokens=agent_state.llm_config.max_tokens,
-        temperature=agent_state.llm_config.temperature,
-        stream=True,
-    )
-    # Create the OpenAI async client
-    client = openai.AsyncClient(
-        api_key=model_settings.openai_api_key,
-        max_retries=0,
-        http_client=httpx.AsyncClient(
-            timeout=httpx.Timeout(connect=15.0, read=30.0, write=15.0, pool=15.0),
-            follow_redirects=True,
-            limits=httpx.Limits(
-                max_connections=50,
-                max_keepalive_connections=50,
-                keepalive_expiry=120,
-            ),
-        ),
-    )
-    # The messages we want to persist to the Letta agent
-    user_message = create_user_message(input_message=input_message, agent_id=agent_id, actor=actor)
-    message_db_queue = [user_message]
-    async def event_stream():
-        """
-        A function-calling loop:
-          - We stream partial tokens.
-          - If we detect a tool call (finish_reason="tool_calls"), we parse it,
-            add two messages to the conversation:
-              (a) assistant message with tool_calls referencing the same ID
-              (b) a tool message referencing that ID, containing the tool result.
-          - Re-invoke the OpenAI request with updated conversation, streaming again.
-          - End when finish_reason="stop" or no more tool calls.
-        """
-        # We'll keep updating this conversation in a loop
-        conversation = openai_messages[:]
-        while True:
-            # Make the streaming request to OpenAI
-            stream = await client.chat.completions.create(**openai_request.model_dump(exclude_unset=True))
-            content_buffer = []
-            tool_call_name = None
-            tool_call_args_str = ""
-            tool_call_id = None
-            tool_call_happened = False
-            finish_reason_stop = False
-            optimistic_json_parser = OptimisticJSONParser(strict=True)
-            current_parsed_json_result = {}
-            async with stream:
-                async for chunk in stream:
-                    choice = chunk.choices[0]
-                    delta = choice.delta
-                    finish_reason = choice.finish_reason  # "tool_calls", "stop", or None
-                    if delta.content:
-                        content_buffer.append(delta.content)
-                        yield f"data: {chunk.model_dump_json()}\n\n"
-                    # CASE B: Partial tool call info
-                    if delta.tool_calls:
-                        # Typically there's only one in delta.tool_calls
-                        tc = delta.tool_calls[0]
-                        if tc.function.name:
-                            tool_call_name = tc.function.name
-                        if tc.function.arguments:
-                            tool_call_args_str += tc.function.arguments
-                            # See if we can stream out the pre-execution message
-                            parsed_args = optimistic_json_parser.parse(tool_call_args_str)
-                            if parsed_args.get(
-                                PRE_EXECUTION_MESSAGE_ARG
-                            ) and current_parsed_json_result.get(  # Ensure key exists and is not None/empty
-                                PRE_EXECUTION_MESSAGE_ARG
-                            ) != parsed_args.get(
-                                PRE_EXECUTION_MESSAGE_ARG
-                            ):
-                                # Only stream if there's something new to stream
-                                # We do this way to avoid hanging JSON at the end of the stream, e.g. '}'
-                                if parsed_args != current_parsed_json_result:
-                                    current_parsed_json_result = parsed_args
-                                    synthetic_chunk = ChatCompletionChunk(
-                                        id=chunk.id,
-                                        object=chunk.object,
-                                        created=chunk.created,
-                                        model=chunk.model,
-                                        choices=[
-                                            Choice(
-                                                index=choice.index,
-                                                delta=ChoiceDelta(content=tc.function.arguments, role="assistant"),
-                                                finish_reason=None,
-                                            )
-                                        ],
-                                    )
-                                    yield f"data: {synthetic_chunk.model_dump_json()}\n\n"
-                        # We might generate a unique ID for the tool call
-                        if tc.id:
-                            tool_call_id = tc.id
-                    # Check finish_reason
-                    if finish_reason == "tool_calls":
-                        tool_call_happened = True
-                        break
-                    elif finish_reason == "stop":
-                        finish_reason_stop = True
-                        break
-            if content_buffer:
-                # We treat that partial text as an assistant message
-                content = "".join(content_buffer)
-                conversation.append({"role": "assistant", "content": content})
-                # Create an assistant message here to persist later
-                assistant_message = create_assistant_message_from_openai_response(
-                    response_text=content, agent_id=agent_id, model=agent_state.llm_config.model, actor=actor
-                )
-                message_db_queue.append(assistant_message)
-            if tool_call_happened:
-                # Parse the tool call arguments
-                try:
-                    tool_args = json.loads(tool_call_args_str)
-                except json.JSONDecodeError:
-                    tool_args = {}
-                if not tool_call_id:
-                    # If no tool_call_id given by the model, generate one
-                    tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
-                # 1) Insert the "assistant" message with the tool_calls field
-                #    referencing the same tool_call_id
-                assistant_tool_call_msg = AssistantMessage(
-                    content=None,
-                    tool_calls=[ToolCall(id=tool_call_id, function=ToolCallFunction(name=tool_call_name, arguments=tool_call_args_str))],
-                )
-                conversation.append(assistant_tool_call_msg.model_dump())
-                # 2) Execute the tool
-                target_tool = next((x for x in tools if x.name == tool_call_name), None)
-                if not target_tool:
-                    # Tool not found, handle error
-                    yield f"data: {json.dumps({'error': 'Tool not found', 'tool': tool_call_name})}\n\n"
-                    break
-                try:
-                    tool_result, _ = execute_external_tool(
-                        agent_state=agent_state,
-                        function_name=tool_call_name,
-                        function_args=tool_args,
-                        target_letta_tool=target_tool,
-                        actor=actor,
-                        allow_agent_state_modifications=False,
-                    )
-                except Exception as e:
-                    tool_result = f"Failed to call tool. Error: {e}"
-                # 3) Insert the "tool" message referencing the same tool_call_id
-                tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
-                conversation.append(tool_message.model_dump())
-                # 4) Add a user message prompting the tool call result summarization
-                heartbeat_user_message = UserMessage(
-                    content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user.",
-                )
-                conversation.append(heartbeat_user_message.model_dump())
-                # Now, re-invoke OpenAI with the updated conversation
-                openai_request.messages = conversation
-                continue  # Start the while loop again
-            if finish_reason_stop:
-                # Model is done, no more calls
-                break
-            # If we reach here, no tool call, no "stop", but we've ended streaming
-            # Possibly a model error or some other finish reason. We'll just end.
-            break
-        await run_in_threadpool(
-            server.agent_manager.append_to_in_context_messages,
-            message_db_queue,
-            agent_id=agent_id,
-            actor=actor,
-        )
-        yield "data: [DONE]\n\n"
-    return StreamingResponse(event_stream(), media_type="text/event-stream")
 @router.post(
     "/chat/completions",
     response_model=None,

letta/server/rest_api/routers/v1/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from letta.server.rest_api.routers.v1.sources import router as sources_router
 from letta.server.rest_api.routers.v1.steps import router as steps_router
 from letta.server.rest_api.routers.v1.tags import router as tags_router
 from letta.server.rest_api.routers.v1.tools import router as tools_router
+from letta.server.rest_api.routers.v1.voice import router as voice_router
 ROUTERS = [
     tools_router,
@@ -26,4 +27,5 @@ ROUTERS = [
     runs_router,
     steps_router,
     tags_router,
+    voice_router,
 ]

letta-nightly 0.6.33.dev20250226104113__py3-none-any.whl → 0.6.34.dev20250227200331__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.33.dev20250226104113py3-none-any.whl → 0.6.34.dev20250227200331py3-none-any.whl