PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241006104046__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl - Mend

letta-nightly 0.4.1.dev20241006104046py3-none-any.whl → 0.4.1.dev20241008104105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (25) hide show

letta/agent.py +19 -9
letta/credentials.py +1 -1
letta/errors.py +1 -1
letta/llm_api/azure_openai.py +15 -19
letta/llm_api/helpers.py +153 -0
letta/llm_api/llm_api_tools.py +39 -215
letta/llm_api/openai.py +70 -2
letta/providers.py +5 -1
letta/schemas/llm_config.py +5 -2
letta/server/rest_api/admin/tools.py +0 -1
letta/server/rest_api/app.py +1 -17
letta/server/rest_api/routers/openai/assistants/threads.py +9 -6
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
letta/server/rest_api/routers/v1/agents.py +23 -13
letta/server/rest_api/routers/v1/blocks.py +5 -3
letta/server/rest_api/routers/v1/jobs.py +5 -3
letta/server/rest_api/routers/v1/sources.py +24 -12
letta/server/rest_api/routers/v1/tools.py +11 -6
letta/server/server.py +17 -34
letta/settings.py +2 -1
{letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/METADATA +1 -1
{letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/RECORD +25 -24
{letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/LICENSE +0 -0
{letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/WHEEL +0 -0
{letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/entry_points.txt +0 -0

letta/agent.py CHANGED Viewed

@@ -18,7 +18,7 @@ from letta.constants import (
     MESSAGE_SUMMARY_WARNING_FRAC,
 )
 from letta.interface import AgentInterface
-from letta.llm_api.llm_api_tools import create, is_context_overflow_error
+from letta.llm_api.llm_api_tools import create
 from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
 from letta.metadata import MetadataStore
 from letta.persistence_manager import LocalStateManager
@@ -56,6 +56,7 @@ from letta.utils import (
 )
 from .errors import LLMError
+from .llm_api.helpers import is_context_overflow_error
 def compile_memory_metadata_block(
@@ -207,7 +208,7 @@ class BaseAgent(ABC):
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """
@@ -223,7 +224,7 @@ class BaseAgent(ABC):
 class Agent(BaseAgent):
     def __init__(
         self,
-        interface: AgentInterface,
+        interface: Optional[AgentInterface],
         # agents can be created from providing agent_state
         agent_state: AgentState,
         tools: List[Tool],
@@ -460,7 +461,7 @@ class Agent(BaseAgent):
         function_call: str = "auto",
         first_message: bool = False,  # hint
         stream: bool = False,  # TODO move to config?
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
     ) -> ChatCompletionResponse:
         """Get response from LLM API"""
         try:
@@ -478,7 +479,7 @@ class Agent(BaseAgent):
                 stream=stream,
                 stream_inferface=self.interface,
                 # putting inner thoughts in func args or not
-                inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )
             if len(response.choices) == 0:
@@ -560,6 +561,8 @@ class Agent(BaseAgent):
             function_call = (
                 response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
             )
+            # Get the name of the function
             function_name = function_call.name
             printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
@@ -608,6 +611,13 @@ class Agent(BaseAgent):
                 self.interface.function_message(f"Error: {error_msg}", msg_obj=messages[-1])
                 return messages, False, True  # force a heartbeat to allow agent to handle error
+            # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
+            if "inner_thoughts" in function_args:
+                response_message.content = function_args.pop("inner_thoughts")
+            # The content if then internal monologue, not chat
+            if response_message.content:
+                self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
             # (Still parsing function args)
             # Handle requests for immediate heartbeat
             heartbeat_request = function_args.pop("request_heartbeat", None)
@@ -716,7 +726,7 @@ class Agent(BaseAgent):
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """Top-level event message handler for the Letta agent"""
@@ -795,7 +805,7 @@ class Agent(BaseAgent):
                         message_sequence=input_message_sequence,
                         first_message=True,  # passed through to the prompt formatter
                         stream=stream,
-                        inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                        inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     )
                     if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
                         break
@@ -808,7 +818,7 @@ class Agent(BaseAgent):
                 response = self._get_ai_reply(
                     message_sequence=input_message_sequence,
                     stream=stream,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                 )
             # Step 3: check if LLM wanted to call a function
@@ -892,7 +902,7 @@ class Agent(BaseAgent):
                     recreate_message_timestamp=recreate_message_timestamp,
                     stream=stream,
                     timestamp=timestamp,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     ms=ms,
                 )

letta/credentials.py CHANGED Viewed

@@ -30,7 +30,7 @@ class LettaCredentials:
     # azure config
     azure_auth_type: str = "api_key"
-    azure_key: Optional[str] = None
+    azure_key: Optional[str] = os.getenv("AZURE_OPENAI_API_KEY")
     # groq config
     groq_key: Optional[str] = os.getenv("GROQ_API_KEY")

letta/errors.py CHANGED Viewed

@@ -56,7 +56,7 @@ class LettaMessageError(LettaError):
             error_msg += f" (Explanation: {explanation})"
         # Pretty print out message JSON
-        message_json = json.dumps([message.model_dump_json(indent=4) for message in messages], indent=4)
+        message_json = json.dumps([message.model_dump() for message in messages], indent=4)
         return f"{error_msg}\n\n{message_json}"

letta/llm_api/azure_openai.py CHANGED Viewed

@@ -2,8 +2,11 @@ from typing import Union
 import requests
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.chat_completions import ChatCompletionRequest
 from letta.schemas.openai.embedding_response import EmbeddingResponse
+from letta.settings import ModelSettings
 from letta.utils import smart_urljoin
 MODEL_TO_AZURE_ENGINE = {
@@ -13,17 +16,16 @@ MODEL_TO_AZURE_ENGINE = {
     "gpt-3.5": "gpt-35-turbo",
     "gpt-3.5-turbo": "gpt-35-turbo",
     "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
+    "gpt-4o-mini": "gpt-4o-mini",
 }
-def clean_azure_endpoint(raw_endpoint_name: str) -> str:
-    """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'"""
-    if raw_endpoint_name is None:
-        raise ValueError(raw_endpoint_name)
-    endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "")
-    endpoint_address = endpoint_address.replace("http://", "")
-    endpoint_address = endpoint_address.replace("https://", "")
-    return endpoint_address
+def get_azure_endpoint(llm_config: LLMConfig, model_settings: ModelSettings):
+    assert llm_config.api_version, "Missing model version! This field must be provided in the LLM config for Azure."
+    assert llm_config.model in MODEL_TO_AZURE_ENGINE, f"{llm_config.model} not in supported models: {list(MODEL_TO_AZURE_ENGINE.keys())}"
+    model = MODEL_TO_AZURE_ENGINE[llm_config.model]
+    return f"{model_settings.azure_base_url}/openai/deployments/{model}/chat/completions?api-version={llm_config.api_version}"
 def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict:
@@ -72,19 +74,15 @@ def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version
 def azure_openai_chat_completions_request(
-    resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
+    model_settings: ModelSettings, llm_config: LLMConfig, api_key: str, chat_completion_request: ChatCompletionRequest
 ) -> ChatCompletionResponse:
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
     from letta.utils import printd
-    assert resource_name is not None, "Missing required field when calling Azure OpenAI"
-    assert deployment_id is not None, "Missing required field when calling Azure OpenAI"
-    assert api_version is not None, "Missing required field when calling Azure OpenAI"
     assert api_key is not None, "Missing required field when calling Azure OpenAI"
-    resource_name = clean_azure_endpoint(resource_name)
-    url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
+    data = chat_completion_request.model_dump(exclude_none=True)
     # If functions == None, strip from the payload
     if "functions" in data and data["functions"] is None:
@@ -95,11 +93,10 @@ def azure_openai_chat_completions_request(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
-    printd(f"Sending request to {url}")
+    model_endpoint = get_azure_endpoint(llm_config, model_settings)
+    printd(f"Sending request to {model_endpoint}")
     try:
-        data["messages"] = [i.to_openai_dict() for i in data["messages"]]
-        response = requests.post(url, headers=headers, json=data)
-        printd(f"response = {response}")
+        response = requests.post(model_endpoint, headers=headers, json=data)
         response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
         response = response.json()  # convert to dict from string
         printd(f"response.json = {response}")
@@ -128,7 +125,6 @@ def azure_openai_embeddings_request(
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
     from letta.utils import printd
-    resource_name = clean_azure_endpoint(resource_name)
     url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}

letta/llm_api/helpers.py ADDED Viewed

@@ -0,0 +1,153 @@
+import copy
+import json
+import warnings
+from typing import List, Union
+import requests
+from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+from letta.schemas.enums import OptionState
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
+from letta.utils import json_dumps
+# TODO update to use better types
+def add_inner_thoughts_to_functions(
+    functions: List[dict],
+    inner_thoughts_key: str,
+    inner_thoughts_description: str,
+    inner_thoughts_required: bool = True,
+    # inner_thoughts_to_front: bool = True,  TODO support sorting somewhere, probably in the to_dict?
+) -> List[dict]:
+    """Add an inner_thoughts kwarg to every function in the provided list"""
+    # return copies
+    new_functions = []
+    # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
+    for function_object in functions:
+        function_params = function_object["parameters"]["properties"]
+        required_params = list(function_object["parameters"]["required"])
+        # if the inner thoughts arg doesn't exist, add it
+        if inner_thoughts_key not in function_params:
+            function_params[inner_thoughts_key] = {
+                "type": "string",
+                "description": inner_thoughts_description,
+            }
+        # make sure it's tagged as required
+        new_function_object = copy.deepcopy(function_object)
+        if inner_thoughts_required and inner_thoughts_key not in required_params:
+            required_params.append(inner_thoughts_key)
+            new_function_object["parameters"]["required"] = required_params
+        new_functions.append(new_function_object)
+    # return a list of copies
+    return new_functions
+def unpack_all_inner_thoughts_from_kwargs(
+    response: ChatCompletionResponse,
+    inner_thoughts_key: str,
+) -> ChatCompletionResponse:
+    """Strip the inner thoughts out of the tool call and put it in the message content"""
+    if len(response.choices) == 0:
+        raise ValueError(f"Unpacking inner thoughts from empty response not supported")
+    new_choices = []
+    for choice in response.choices:
+        new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
+    # return an updated copy
+    new_response = response.model_copy(deep=True)
+    new_response.choices = new_choices
+    return new_response
+def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
+    message = choice.message
+    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
+        if len(message.tool_calls) > 1:
+            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
+        # TODO support multiple tool calls
+        tool_call = message.tool_calls[0]
+        try:
+            # Sadly we need to parse the JSON since args are in string format
+            func_args = dict(json.loads(tool_call.function.arguments))
+            if inner_thoughts_key in func_args:
+                # extract the inner thoughts
+                inner_thoughts = func_args.pop(inner_thoughts_key)
+                # replace the kwargs
+                new_choice = choice.model_copy(deep=True)
+                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
+                # also replace the message content
+                if new_choice.message.content is not None:
+                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
+                new_choice.message.content = inner_thoughts
+                return new_choice
+            else:
+                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+        except json.JSONDecodeError as e:
+            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
+            raise e
+def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
+    """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
+    from letta.utils import printd
+    match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
+    # Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
+    if match_string in str(exception):
+        printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
+        return True
+    # Based on python requests + OpenAI REST API (/v1)
+    elif isinstance(exception, requests.exceptions.HTTPError):
+        if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
+            try:
+                error_details = exception.response.json()
+                if "error" not in error_details:
+                    printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
+                    return False
+                else:
+                    error_details = error_details["error"]
+                # Check for the specific error code
+                if error_details.get("code") == "context_length_exceeded":
+                    printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
+                    return True
+                # Soft-check for "maximum context length" inside of the message
+                elif error_details.get("message") and "maximum context length" in error_details.get("message"):
+                    printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
+                    return True
+                else:
+                    printd(f"HTTPError occurred, but unknown error message: {error_details}")
+                    return False
+            except ValueError:
+                # JSON decoding failed
+                printd(f"HTTPError occurred ({exception}), but no JSON error message.")
+    # Generic fail
+    else:
+        return False
+def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
+    if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
+        # model that are known to not use `content` fields on tool calls
+        inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
+    else:
+        inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
+    if not isinstance(inner_thoughts_in_kwargs, bool):
+        warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
+        inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
+    return inner_thoughts_in_kwargs

letta-nightly 0.4.1.dev20241006104046__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.4.1.dev20241006104046py3-none-any.whl → 0.4.1.dev20241008104105py3-none-any.whl