PyPI - letta-nightly - Versions diffs - 0.6.33.dev20250226104113__tar.gz → 0.6.34.dev20250227200331__tar.gz - Mend

letta-nightly 0.6.33.dev20250226104113tar.gz → 0.6.34.dev20250227200331tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (266) hide show

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: letta-nightly
-Version: 0.6.33.dev20250226104113
+Version: 0.6.34.dev20250227200331
 Summary: Create LLM agents with long-term memory and custom tools
 License: Apache License
 Author: Letta Team

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.33"
+__version__ = "0.6.34"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/agent.py RENAMED Viewed

@@ -832,7 +832,7 @@ class Agent(BaseAgent):
                 )
             if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window):
-                printd(
+                logger.warning(
                     f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
                 )
@@ -842,7 +842,7 @@ class Agent(BaseAgent):
                     self.agent_alerted_about_memory_pressure = True  # it's up to the outer loop to handle this
             else:
-                printd(
+                logger.info(
                     f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
                 )
@@ -892,6 +892,16 @@ class Agent(BaseAgent):
             if is_context_overflow_error(e):
                 in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user)
+                # TODO: this is a patch to resolve immediate issues, should be removed once the summarizer is fixes
+                if self.agent_state.message_buffer_autoclear:
+                    # no calling the summarizer in this case
+                    logger.error(
+                        f"step() failed with an exception that looks like a context window overflow, but message buffer is set to autoclear, so skipping: '{str(e)}'"
+                    )
+                    raise e
+                summarize_attempt_count += 1
                 if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
                     logger.warning(
                         f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/llm_api/llm_api_tools.py RENAMED Viewed

@@ -187,8 +187,65 @@ def create(
                 function_call = "required"
         data = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, put_inner_thoughts_first=put_inner_thoughts_first
+            llm_config,
+            messages,
+            user_id,
+            functions,
+            function_call,
+            use_tool_naming,
+            put_inner_thoughts_first=put_inner_thoughts_first,
+            use_structured_output=True,  # NOTE: turn on all the time for OpenAI API
         )
+        if stream:  # Client requested token streaming
+            data.stream = True
+            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
+                stream_interface, AgentRefreshStreamingInterface
+            ), type(stream_interface)
+            response = openai_chat_completions_process_stream(
+                url=llm_config.model_endpoint,
+                api_key=api_key,
+                chat_completion_request=data,
+                stream_interface=stream_interface,
+            )
+        else:  # Client did not request token streaming (expect a blocking backend response)
+            data.stream = False
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_start()
+            try:
+                response = openai_chat_completions_request(
+                    url=llm_config.model_endpoint,
+                    api_key=api_key,
+                    chat_completion_request=data,
+                )
+            finally:
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.stream_end()
+        if llm_config.put_inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        return response
+    elif llm_config.model_endpoint_type == "xai":
+        api_key = model_settings.xai_api_key
+        if function_call is None and functions is not None and len(functions) > 0:
+            # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+            function_call = "required"
+        data = build_openai_chat_completions_request(
+            llm_config,
+            messages,
+            user_id,
+            functions,
+            function_call,
+            use_tool_naming,
+            put_inner_thoughts_first=put_inner_thoughts_first,
+            use_structured_output=False,  # NOTE: not supported atm for xAI
+        )
         if stream:  # Client requested token streaming
             data.stream = True
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/llm_api/openai.py RENAMED Viewed

@@ -13,7 +13,7 @@ from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
 from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
-from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, cast_message_to_subtype
+from letta.schemas.openai.chat_completion_request import FunctionSchema, Tool, ToolFunctionChoice, cast_message_to_subtype
 from letta.schemas.openai.chat_completion_response import (
     ChatCompletionChunkResponse,
     ChatCompletionResponse,
@@ -95,6 +95,7 @@ def build_openai_chat_completions_request(
     function_call: Optional[str],
     use_tool_naming: bool,
     put_inner_thoughts_first: bool = True,
+    use_structured_output: bool = True,
 ) -> ChatCompletionRequest:
     if functions and llm_config.put_inner_thoughts_in_kwargs:
         # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -157,6 +158,16 @@ def build_openai_chat_completions_request(
         data.user = str(uuid.UUID(int=0))
         data.model = "memgpt-openai"
+    if use_structured_output and data.tools is not None and len(data.tools) > 0:
+        # Convert to structured output style (which has 'strict' and no optionals)
+        for tool in data.tools:
+            try:
+                # tool["function"] = convert_to_structured_output(tool["function"])
+                structured_output_version = convert_to_structured_output(tool.function.model_dump())
+                tool.function = FunctionSchema(**structured_output_version)
+            except ValueError as e:
+                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data
@@ -455,11 +466,12 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
-    if "tools" in data:
-        for tool in data["tools"]:
-            try:
-                tool["function"] = convert_to_structured_output(tool["function"])
-            except ValueError as e:
-                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+    # # NOTE: move this out to wherever the ChatCompletionRequest is created
+    # if "tools" in data:
+    #     for tool in data["tools"]:
+    #         try:
+    #             tool["function"] = convert_to_structured_output(tool["function"])
+    #         except ValueError as e:
+    #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/orm/sqlalchemy_base.py RENAMED Viewed

@@ -69,6 +69,7 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
         join_model: Optional[Base] = None,
         join_conditions: Optional[Union[Tuple, List]] = None,
         identifier_keys: Optional[List[str]] = None,
+        identifier_id: Optional[str] = None,
         **kwargs,
     ) -> List["SqlalchemyBase"]:
         """
@@ -147,6 +148,10 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
             if identifier_keys and hasattr(cls, "identities"):
                 query = query.join(cls.identities).filter(cls.identities.property.mapper.class_.identifier_key.in_(identifier_keys))
+            # given the identifier_id, we can find within the agents table any agents that have the identifier_id in their identity_ids
+            if identifier_id and hasattr(cls, "identities"):
+                query = query.join(cls.identities).filter(cls.identities.property.mapper.class_.id == identifier_id)
             # Apply filtering logic from kwargs
             for key, value in kwargs.items():
                 if "." in key:

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/schemas/llm_config.py RENAMED Viewed

@@ -42,6 +42,7 @@ class LLMConfig(BaseModel):
         "together",  # completions endpoint
         "bedrock",
         "deepseek",
+        "xai",
     ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
@@ -56,7 +57,7 @@ class LLMConfig(BaseModel):
         description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
     )
     max_tokens: Optional[int] = Field(
-        1024,
+        4096,
         description="The maximum number of tokens to generate. If not set, the model will use its default value.",
     )

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/schemas/providers.py RENAMED Viewed

@@ -211,6 +211,63 @@ class OpenAIProvider(Provider):
             return None
+class xAIProvider(OpenAIProvider):
+    """https://docs.x.ai/docs/api-reference"""
+    name: str = "xai"
+    api_key: str = Field(..., description="API key for the xAI/Grok API.")
+    base_url: str = Field("https://api.x.ai/v1", description="Base URL for the xAI/Grok API.")
+    def get_model_context_window_size(self, model_name: str) -> Optional[int]:
+        # xAI doesn't return context window in the model listing,
+        # so these are hardcoded from their website
+        if model_name == "grok-2-1212":
+            return 131072
+        else:
+            return None
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=self.api_key)
+        if "data" in response:
+            data = response["data"]
+        else:
+            data = response
+        configs = []
+        for model in data:
+            assert "id" in model, f"xAI/Grok model missing 'id' field: {model}"
+            model_name = model["id"]
+            # In case xAI starts supporting it in the future:
+            if "context_length" in model:
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
+            if not context_window_size:
+                warnings.warn(f"Couldn't find context window size for model {model_name}")
+                continue
+            configs.append(
+                LLMConfig(
+                    model=model_name,
+                    model_endpoint_type="xai",
+                    model_endpoint=self.base_url,
+                    context_window=context_window_size,
+                    handle=self.get_handle(model_name),
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # No embeddings supported
+        return []
 class DeepSeekProvider(OpenAIProvider):
     """
     DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
@@ -456,6 +513,13 @@ class AnthropicProvider(Provider):
                     warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
                     model["context_window"] = 200000
+            max_tokens = 8192
+            if "claude-3-opus" in model["id"]:
+                max_tokens = 4096
+            if "claude-3-haiku" in model["id"]:
+                max_tokens = 4096
+            # TODO: set for 3-7 extended thinking mode
             # We set this to false by default, because Anthropic can
             # natively support <thinking> tags inside of content fields
             # However, putting COT inside of tool calls can make it more
@@ -472,6 +536,7 @@ class AnthropicProvider(Provider):
                     context_window=model["context_window"],
                     handle=self.get_handle(model["id"]),
                     put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    max_tokens=max_tokens,
                 )
             )
         return configs
@@ -811,6 +876,7 @@ class GoogleAIProvider(Provider):
                     model_endpoint=self.base_url,
                     context_window=self.get_model_context_window(model),
                     handle=self.get_handle(model),
+                    max_tokens=8192,
                 )
             )
         return configs
@@ -862,6 +928,7 @@ class GoogleVertexProvider(Provider):
                     model_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
                     context_window=context_length,
                     handle=self.get_handle(model),
+                    max_tokens=8192,
                 )
             )
         return configs

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/server/rest_api/chat_completions_interface.py RENAMED Viewed

@@ -225,10 +225,10 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                 combined_args = "".join(self.current_function_arguments)
                 parsed_args = OptimisticJSONParser().parse(combined_args)
-                # TODO: Make this less brittle! This depends on `message` coming first!
-                # This is a heuristic we use to know if we're done with the `message` part of `send_message`
-                if len(parsed_args.keys()) > 1:
-                    self._found_message_tool_kwarg = True
+                if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
+                    self.assistant_message_tool_kwarg
+                ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
+                    self.current_json_parse_result = parsed_args
                     return ChatCompletionChunk(
                         id=chunk.id,
                         object=chunk.object,
@@ -237,31 +237,11 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                         choices=[
                             Choice(
                                 index=choice.index,
-                                delta=ChoiceDelta(),
-                                finish_reason="stop",
+                                delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                finish_reason=None,
                             )
                         ],
                     )
-                else:
-                    # If the parsed result is different
-                    # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
-                    if parsed_args != self.current_json_parse_result:
-                        self.current_json_parse_result = parsed_args
-                        # If we can see a "message" field, return it as partial content
-                        if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
-                            return ChatCompletionChunk(
-                                id=chunk.id,
-                                object=chunk.object,
-                                created=chunk.created.timestamp(),
-                                model=chunk.model,
-                                choices=[
-                                    Choice(
-                                        index=choice.index,
-                                        delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
-                                        finish_reason=None,
-                                    )
-                                ],
-                            )
         # If there's a finish reason, pass that along
         if choice.finish_reason is not None:

letta_nightly-0.6.34.dev20250227200331/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py ADDED Viewed

@@ -0,0 +1,142 @@
+import asyncio
+from typing import TYPE_CHECKING, List, Optional, Union
+from fastapi import APIRouter, Body, Depends, Header, HTTPException
+from fastapi.responses import StreamingResponse
+from openai.types.chat.completion_create_params import CompletionCreateParams
+from letta.agent import Agent
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.log import get_logger
+from letta.schemas.message import Message, MessageCreate
+from letta.schemas.user import User
+from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
+# TODO this belongs in a controller!
+from letta.server.rest_api.utils import get_letta_server, get_messages_from_completion_request, sse_async_generator
+if TYPE_CHECKING:
+    from letta.server.server import SyncServer
+router = APIRouter(prefix="/v1", tags=["chat_completions"])
+logger = get_logger(__name__)
+@router.post(
+    "/chat/completions",
+    response_model=None,
+    operation_id="create_chat_completions",
+    responses={
+        200: {
+            "description": "Successful response",
+            "content": {
+                "text/event-stream": {"description": "Server-Sent Events stream"},
+            },
+        }
+    },
+)
+async def create_chat_completions(
+    completion_request: CompletionCreateParams = Body(...),
+    server: "SyncServer" = Depends(get_letta_server),
+    user_id: Optional[str] = Header(None, alias="user_id"),
+):
+    # Validate and process fields
+    messages = get_messages_from_completion_request(completion_request)
+    input_message = messages[-1]
+    # Process remaining fields
+    if not completion_request["stream"]:
+        raise HTTPException(status_code=400, detail="Must be streaming request: `stream` was set to `False` in the request.")
+    actor = server.user_manager.get_user_or_default(user_id=user_id)
+    agent_id = str(completion_request.get("user", None))
+    if agent_id is None:
+        error_msg = "Must pass agent_id in the 'user' field"
+        logger.error(error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)
+    letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
+    llm_config = letta_agent.agent_state.llm_config
+    if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
+        error_msg = f"You can only use models with type 'openai' for chat completions. This agent {agent_id} has llm_config: \n{llm_config.model_dump_json(indent=4)}"
+        logger.error(error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)
+    model = completion_request.get("model")
+    if model != llm_config.model:
+        warning_msg = f"The requested model {model} is different from the model specified in this agent's ({agent_id}) llm_config: \n{llm_config.model_dump_json(indent=4)}"
+        logger.warning(f"Defaulting to {llm_config.model}...")
+        logger.warning(warning_msg)
+    logger.info(f"Received input message: {input_message}")
+    return await send_message_to_agent_chat_completions(
+        server=server,
+        letta_agent=letta_agent,
+        actor=actor,
+        messages=[MessageCreate(role=input_message["role"], content=input_message["content"])],
+    )
+async def send_message_to_agent_chat_completions(
+    server: "SyncServer",
+    letta_agent: Agent,
+    actor: User,
+    messages: Union[List[Message], List[MessageCreate]],
+    assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
+    assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+) -> StreamingResponse:
+    """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
+    # For streaming response
+    try:
+        # TODO: cleanup this logic
+        llm_config = letta_agent.agent_state.llm_config
+        # Create a new interface per request
+        letta_agent.interface = ChatCompletionsStreamingInterface()
+        streaming_interface = letta_agent.interface
+        if not isinstance(streaming_interface, ChatCompletionsStreamingInterface):
+            raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
+        # Allow AssistantMessage is desired by client
+        streaming_interface.assistant_message_tool_name = assistant_message_tool_name
+        streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
+        # Related to JSON buffer reader
+        streaming_interface.inner_thoughts_in_kwargs = (
+            llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
+        )
+        # Offload the synchronous message_func to a separate thread
+        streaming_interface.stream_start()
+        asyncio.create_task(
+            asyncio.to_thread(
+                server.send_messages,
+                actor=actor,
+                agent_id=letta_agent.agent_state.id,
+                messages=messages,
+                interface=streaming_interface,
+                put_inner_thoughts_first=False,
+            )
+        )
+        # return a stream
+        return StreamingResponse(
+            sse_async_generator(
+                streaming_interface.get_generator(),
+                usage_task=None,
+                finish_message=True,
+            ),
+            media_type="text/event-stream",
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(e)
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"{e}")

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/server/rest_api/routers/v1/__init__.py RENAMED Viewed

@@ -11,6 +11,7 @@ from letta.server.rest_api.routers.v1.sources import router as sources_router
 from letta.server.rest_api.routers.v1.steps import router as steps_router
 from letta.server.rest_api.routers.v1.tags import router as tags_router
 from letta.server.rest_api.routers.v1.tools import router as tools_router
+from letta.server.rest_api.routers.v1.voice import router as voice_router
 ROUTERS = [
     tools_router,
@@ -26,4 +27,5 @@ ROUTERS = [
     runs_router,
     steps_router,
     tags_router,
+    voice_router,
 ]

{letta_nightly-0.6.33.dev20250226104113 → letta_nightly-0.6.34.dev20250227200331}/letta/server/rest_api/routers/v1/agents.py RENAMED Viewed

@@ -69,7 +69,6 @@ def list_agents(
             "project_id": project_id,
             "template_id": template_id,
             "base_template_id": base_template_id,
-            "identifier_id": identifier_id,
         }.items()
         if value is not None
     }
@@ -84,6 +83,7 @@ def list_agents(
         tags=tags,
         match_all_tags=match_all_tags,
         identifier_keys=identifier_keys,
+        identifier_id=identifier_id,
         **kwargs,
     )
     return agents

letta-nightly 0.6.33.dev20250226104113__tar.gz → 0.6.34.dev20250227200331__tar.gz

Potentially problematic release.

letta-nightly 0.6.33.dev20250226104113tar.gz → 0.6.34.dev20250227200331tar.gz