PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl - Mend

letta-nightly 0.4.1.dev20241007104134py3-none-any.whl → 0.4.1.dev20241008104105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (25) hide show

letta/agent.py +19 -9
letta/credentials.py +1 -1
letta/errors.py +1 -1
letta/llm_api/azure_openai.py +15 -19
letta/llm_api/helpers.py +153 -0
letta/llm_api/llm_api_tools.py +39 -215
letta/llm_api/openai.py +70 -2
letta/providers.py +5 -1
letta/schemas/llm_config.py +5 -2
letta/server/rest_api/admin/tools.py +0 -1
letta/server/rest_api/app.py +1 -17
letta/server/rest_api/routers/openai/assistants/threads.py +9 -6
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
letta/server/rest_api/routers/v1/agents.py +23 -13
letta/server/rest_api/routers/v1/blocks.py +5 -3
letta/server/rest_api/routers/v1/jobs.py +5 -3
letta/server/rest_api/routers/v1/sources.py +24 -12
letta/server/rest_api/routers/v1/tools.py +11 -6
letta/server/server.py +17 -34
letta/settings.py +2 -1
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/METADATA +1 -1
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/RECORD +25 -24
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/LICENSE +0 -0
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/WHEEL +0 -0
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/entry_points.txt +0 -0

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -1,25 +1,25 @@
-import copy
-import json
 import os
 import random
 import time
-import warnings
 from typing import List, Optional, Union
 import requests
-from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+from letta.constants import CLI_WARNING_PREFIX
 from letta.llm_api.anthropic import anthropic_chat_completions_request
-from letta.llm_api.azure_openai import (
-    MODEL_TO_AZURE_ENGINE,
-    azure_openai_chat_completions_request,
-)
+from letta.llm_api.azure_openai import azure_openai_chat_completions_request
 from letta.llm_api.cohere import cohere_chat_completions_request
 from letta.llm_api.google_ai import (
     convert_tools_to_google_ai_format,
     google_ai_chat_completions_request,
 )
+from letta.llm_api.helpers import (
+    add_inner_thoughts_to_functions,
+    derive_inner_thoughts_in_kwargs,
+    unpack_all_inner_thoughts_from_kwargs,
+)
 from letta.llm_api.openai import (
+    build_openai_chat_completions_request,
     openai_chat_completions_process_stream,
     openai_chat_completions_request,
 )
@@ -37,144 +37,15 @@ from letta.schemas.openai.chat_completion_request import (
     Tool,
     cast_message_to_subtype,
 )
-from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.streaming_interface import (
     AgentChunkStreamingInterface,
     AgentRefreshStreamingInterface,
 )
-from letta.utils import json_dumps
 LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
-# TODO update to use better types
-def add_inner_thoughts_to_functions(
-    functions: List[dict],
-    inner_thoughts_key: str,
-    inner_thoughts_description: str,
-    inner_thoughts_required: bool = True,
-    # inner_thoughts_to_front: bool = True,  TODO support sorting somewhere, probably in the to_dict?
-) -> List[dict]:
-    """Add an inner_thoughts kwarg to every function in the provided list"""
-    # return copies
-    new_functions = []
-    # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
-    for function_object in functions:
-        function_params = function_object["parameters"]["properties"]
-        required_params = list(function_object["parameters"]["required"])
-        # if the inner thoughts arg doesn't exist, add it
-        if inner_thoughts_key not in function_params:
-            function_params[inner_thoughts_key] = {
-                "type": "string",
-                "description": inner_thoughts_description,
-            }
-        # make sure it's tagged as required
-        new_function_object = copy.deepcopy(function_object)
-        if inner_thoughts_required and inner_thoughts_key not in required_params:
-            required_params.append(inner_thoughts_key)
-            new_function_object["parameters"]["required"] = required_params
-        new_functions.append(new_function_object)
-    # return a list of copies
-    return new_functions
-def unpack_all_inner_thoughts_from_kwargs(
-    response: ChatCompletionResponse,
-    inner_thoughts_key: str,
-) -> ChatCompletionResponse:
-    """Strip the inner thoughts out of the tool call and put it in the message content"""
-    if len(response.choices) == 0:
-        raise ValueError(f"Unpacking inner thoughts from empty response not supported")
-    new_choices = []
-    for choice in response.choices:
-        new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
-    # return an updated copy
-    new_response = response.model_copy(deep=True)
-    new_response.choices = new_choices
-    return new_response
-def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
-    message = choice.message
-    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
-        if len(message.tool_calls) > 1:
-            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
-        # TODO support multiple tool calls
-        tool_call = message.tool_calls[0]
-        try:
-            # Sadly we need to parse the JSON since args are in string format
-            func_args = dict(json.loads(tool_call.function.arguments))
-            if inner_thoughts_key in func_args:
-                # extract the inner thoughts
-                inner_thoughts = func_args.pop(inner_thoughts_key)
-                # replace the kwargs
-                new_choice = choice.model_copy(deep=True)
-                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
-                # also replace the message content
-                if new_choice.message.content is not None:
-                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
-                new_choice.message.content = inner_thoughts
-                return new_choice
-            else:
-                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
-        except json.JSONDecodeError as e:
-            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
-            raise e
-def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
-    """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
-    from letta.utils import printd
-    match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
-    # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
-    if match_string in str(exception):
-        printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
-        return True
-    # Based on python requests + OpenAI REST API (/v1)
-    elif isinstance(exception, requests.exceptions.HTTPError):
-        if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
-            try:
-                error_details = exception.response.json()
-                if "error" not in error_details:
-                    printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
-                    return False
-                else:
-                    error_details = error_details["error"]
-                # Check for the specific error code
-                if error_details.get("code") == "context_length_exceeded":
-                    printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
-                    return True
-                # Soft-check for "maximum context length" inside of the message
-                elif error_details.get("message") and "maximum context length" in error_details.get("message"):
-                    printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
-                    return True
-                else:
-                    printd(f"HTTPError occurred, but unknown error message: {error_details}")
-                    return False
-            except ValueError:
-                # JSON decoding failed
-                printd(f"HTTPError occurred ({exception}), but no JSON error message.")
-    # Generic fail
-    else:
-        return False
 def retry_with_exponential_backoff(
     func,
     initial_delay: float = 1,
@@ -248,7 +119,8 @@ def create(
     stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     # TODO move to llm_config?
     # if unspecified (None), default to something we've tested
-    inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+    inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
+    max_tokens: Optional[int] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
@@ -267,59 +139,14 @@ def create(
     # openai
     if llm_config.model_endpoint_type == "openai":
-        if inner_thoughts_in_kwargs == OptionState.DEFAULT:
-            # model that are known to not use `content` fields on tool calls
-            inner_thoughts_in_kwargs = (
-                "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
-            )
-        else:
-            inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
-        if not isinstance(inner_thoughts_in_kwargs, bool):
-            warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
-            inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
-        if inner_thoughts_in_kwargs:
-            functions = add_inner_thoughts_to_functions(
-                functions=functions,
-                inner_thoughts_key=INNER_THOUGHTS_KWARG,
-                inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
-            )
-        openai_message_list = [
-            cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
-        ]
-        # TODO do the same for Azure?
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise ValueError(f"OpenAI key is missing from letta config file")
-        if use_tool_naming:
-            data = ChatCompletionRequest(
-                model=llm_config.model,
-                messages=openai_message_list,
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                tool_choice=function_call,
-                user=str(user_id),
-            )
-        else:
-            data = ChatCompletionRequest(
-                model=llm_config.model,
-                messages=openai_message_list,
-                functions=functions,
-                function_call=function_call,
-                user=str(user_id),
-            )
-            # https://platform.openai.com/docs/guides/text-generation/json-mode
-            # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
-            if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
-                data.response_format = {"type": "json_object"}
-        if "inference.memgpt.ai" in llm_config.model_endpoint:
-            # override user id for inference.memgpt.ai
-            import uuid
-            data.user = str(uuid.UUID(int=0))
+        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
+        data = build_openai_chat_completions_request(
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
+        )
         if stream:  # Client requested token streaming
             data.stream = True
@@ -356,35 +183,32 @@ def create(
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
-        azure_deployment = (
-            model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
+        if model_settings.azure_api_key is None:
+            raise ValueError(f"Azure API key is missing. Did you set AZURE_API_KEY in your env?")
+        if model_settings.azure_base_url is None:
+            raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
+        # Set the llm config model_endpoint from model_settings
+        # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
+        llm_config.model_endpoint = model_settings.azure_base_url
+        inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
+        chat_completion_request = build_openai_chat_completions_request(
+            llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
         )
-        if use_tool_naming:
-            data = dict(
-                # NOTE: don't pass model to Azure calls, that is the deployment_id
-                # model=agent_config.model,
-                messages=[m.to_openai_dict() for m in messages],
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
-                tool_choice=function_call,
-                user=str(user_id),
-            )
-        else:
-            data = dict(
-                # NOTE: don't pass model to Azure calls, that is the deployment_id
-                # model=agent_config.model,
-                messages=[m.to_openai_dict() for m in messages],
-                functions=functions,
-                function_call=function_call,
-                user=str(user_id),
-            )
-        return azure_openai_chat_completions_request(
-            resource_name=model_settings.azure_endpoint,
-            deployment_id=azure_deployment,
-            api_version=model_settings.azure_version,
-            api_key=model_settings.azure_key,
-            data=data,
+        response = azure_openai_chat_completions_request(
+            model_settings=model_settings,
+            llm_config=llm_config,
+            api_key=model_settings.azure_api_key,
+            chat_completion_request=chat_completion_request,
         )
+        if inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        return response
     elif llm_config.model_endpoint_type == "google_ai":
         if stream:
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
@@ -517,7 +341,7 @@ def create(
                 stream_inferface.stream_end()
         if inner_thoughts_in_kwargs:
-            response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
         return response

letta/llm_api/openai.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
-from typing import Generator, Optional, Union
+import warnings
+from typing import Generator, List, Optional, Union
 import httpx
 import requests
@@ -8,10 +9,19 @@ from httpx_sse._exceptions import SSEError
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from letta.errors import LLMError
+from letta.llm_api.helpers import add_inner_thoughts_to_functions
+from letta.local_llm.constants import (
+    INNER_THOUGHTS_KWARG,
+    INNER_THOUGHTS_KWARG_DESCRIPTION,
+)
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
+from letta.schemas.openai.chat_completion_request import (
+    ChatCompletionRequest,
+    cast_message_to_subtype,
+)
 from letta.schemas.openai.chat_completion_response import (
     ChatCompletionChunkResponse,
     ChatCompletionResponse,
@@ -81,6 +91,64 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
         raise e
+def build_openai_chat_completions_request(
+    llm_config: LLMConfig,
+    messages: List[Message],
+    user_id: Optional[str],
+    functions: Optional[list],
+    function_call: str,
+    use_tool_naming: bool,
+    inner_thoughts_in_kwargs: bool,
+    max_tokens: Optional[int],
+) -> ChatCompletionRequest:
+    if inner_thoughts_in_kwargs:
+        functions = add_inner_thoughts_to_functions(
+            functions=functions,
+            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+        )
+    openai_message_list = [
+        cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
+    ]
+    if llm_config.model:
+        model = llm_config.model
+    else:
+        warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
+        model = None
+    if use_tool_naming:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            tools=[{"type": "function", "function": f} for f in functions] if functions else None,
+            tool_choice=function_call,
+            user=str(user_id),
+            max_tokens=max_tokens,
+        )
+    else:
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            functions=functions,
+            function_call=function_call,
+            user=str(user_id),
+            max_tokens=max_tokens,
+        )
+        # https://platform.openai.com/docs/guides/text-generation/json-mode
+        # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
+        if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
+            data.response_format = {"type": "json_object"}
+    if "inference.memgpt.ai" in llm_config.model_endpoint:
+        # override user id for inference.memgpt.ai
+        import uuid
+        data.user = str(uuid.UUID(int=0))
+    return data
 def openai_chat_completions_process_stream(
     url: str,
     api_key: str,

letta/providers.py CHANGED Viewed

@@ -220,7 +220,11 @@ class GoogleAIProvider(Provider):
 class AzureProvider(Provider):
-    pass
+    name: str = "azure"
+    base_url: str = Field(
+        ..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
+    )
+    api_key: str = Field(..., description="API key for the Azure API.")
 class VLLMProvider(OpenAIProvider):

letta/schemas/llm_config.py CHANGED Viewed

@@ -11,7 +11,7 @@ class LLMConfig(BaseModel):
         model (str): The name of the LLM model.
         model_endpoint_type (str): The endpoint type for the model.
         model_endpoint (str): The endpoint for the model.
-        model_wrapper (str): The wrapper for the model.
+        model_wrapper (str): The wrapper for the model. This is used to wrap additional text around the input/output of the model. This is useful for text-to-text completions, such as the Completions API in OpenAI.
         context_window (int): The context window size for the model.
     """
@@ -34,7 +34,10 @@ class LLMConfig(BaseModel):
         "vllm",
         "hugging-face",
     ] = Field(..., description="The endpoint type for the model.")
-    model_endpoint: str = Field(..., description="The endpoint for the model.")
+    model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
+    api_version: Optional[str] = Field(
+        None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview."
+    )
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
     context_window: int = Field(..., description="The context window size for the model.")

letta/server/rest_api/admin/tools.py CHANGED Viewed

@@ -26,7 +26,6 @@ class CreateToolResponse(BaseModel):
 def setup_tools_index_router(server: SyncServer, interface: QueuingInterface):
-    # get_current_user_with_server = partial(partial(get_current_user, server), password)
     @router.delete("/tools/{tool_name}", tags=["tools"])
     async def delete_tool(

letta/server/rest_api/app.py CHANGED Viewed

@@ -5,8 +5,7 @@ from pathlib import Path
 from typing import Optional
 import uvicorn
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
+from fastapi import FastAPI
 from starlette.middleware.cors import CORSMiddleware
 from letta.server.constants import REST_DEFAULT_PORT
@@ -84,21 +83,6 @@ def create_application() -> "FastAPI":
         allow_headers=["*"],
     )
-    @app.middleware("http")
-    async def set_current_user_middleware(request: Request, call_next):
-        user_id = request.headers.get("user_id")
-        if user_id:
-            try:
-                server.set_current_user(user_id)
-            except ValueError as e:
-                # Return an HTTP 401 Unauthorized response
-                # raise HTTPException(status_code=401, detail=str(e))
-                return JSONResponse(status_code=401, content={"detail": str(e)})
-        else:
-            server.set_current_user(None)
-        response = await call_next(request)
-        return response
     for route in v1_routes:
         app.include_router(route, prefix=API_PREFIX)
         # this gives undocumented routes for "latest" and bare api calls.

letta/server/rest_api/routers/openai/assistants/threads.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import uuid
 from typing import TYPE_CHECKING, List
-from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
+from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
 from letta.constants import DEFAULT_PRESET
 from letta.schemas.agent import CreateAgent
@@ -43,11 +43,12 @@ router = APIRouter(prefix="/v1/threads", tags=["threads"])
 def create_thread(
     request: CreateThreadRequest = Body(...),
     server: SyncServer = Depends(get_letta_server),
+    user_id: str = Header(None),  # Extract user_id from header, default to None if not present
 ):
     # TODO: use requests.description and requests.metadata fields
     # TODO: handle requests.file_ids and requests.tools
     # TODO: eventually allow request to override embedding/llm model
-    actor = server.get_current_user()
+    actor = server.get_user_or_default(user_id=user_id)
     print("Create thread/agent", request)
     # create a letta agent
@@ -67,8 +68,9 @@ def create_thread(
 def retrieve_thread(
     thread_id: str = Path(..., description="The unique identifier of the thread."),
     server: SyncServer = Depends(get_letta_server),
+    user_id: str = Header(None),  # Extract user_id from header, default to None if not present
 ):
-    actor = server.get_current_user()
+    actor = server.get_user_or_default(user_id=user_id)
     agent = server.get_agent(user_id=actor.id, agent_id=thread_id)
     assert agent is not None
     return OpenAIThread(
@@ -100,8 +102,9 @@ def create_message(
     thread_id: str = Path(..., description="The unique identifier of the thread."),
     request: CreateMessageRequest = Body(...),
     server: SyncServer = Depends(get_letta_server),
+    user_id: str = Header(None),  # Extract user_id from header, default to None if not present
 ):
-    actor = server.get_current_user()
+    actor = server.get_user_or_default(user_id=user_id)
     agent_id = thread_id
     # create message object
     message = Message(
@@ -143,8 +146,9 @@ def list_messages(
     after: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
     before: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
     server: SyncServer = Depends(get_letta_server),
+    user_id: str = Header(None),  # Extract user_id from header, default to None if not present
 ):
-    actor = server.get_current_user()
+    actor = server.get_user_or_default(user_id)
     after_uuid = after if before else None
     before_uuid = before if before else None
     agent_id = thread_id
@@ -239,7 +243,6 @@ def create_run(
     request: CreateRunRequest = Body(...),
     server: SyncServer = Depends(get_letta_server),
 ):
-    server.get_current_user()
     # TODO: add request.instructions as a message?
     agent_id = thread_id

letta/server/rest_api/routers/openai/chat_completions/chat_completions.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 from typing import TYPE_CHECKING
-from fastapi import APIRouter, Body, Depends, HTTPException
+from fastapi import APIRouter, Body, Depends, Header, HTTPException
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_message import FunctionCall, LettaMessage
@@ -30,12 +30,14 @@ router = APIRouter(prefix="/v1/chat/completions", tags=["chat_completions"])
 async def create_chat_completion(
     completion_request: ChatCompletionRequest = Body(...),
     server: "SyncServer" = Depends(get_letta_server),
+    user_id: str = Header(None),  # Extract user_id from header, default to None if not present
 ):
     """Send a message to a Letta agent via a /chat/completions completion_request
     The bearer token will be used to identify the user.
     The 'user' field in the completion_request should be set to the agent ID.
     """
-    actor = server.get_current_user()
+    actor = server.get_user_or_default(user_id=user_id)
     agent_id = completion_request.user
     if agent_id is None:
         raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")

letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.4.1.dev20241007104134py3-none-any.whl → 0.4.1.dev20241008104105py3-none-any.whl