PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl - Mend

letta-nightly 0.4.1.dev20241007104134py3-none-any.whl → 0.4.1.dev20241009104130py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show

letta/agent.py +36 -10
letta/client/client.py +8 -1
letta/credentials.py +3 -3
letta/errors.py +1 -1
letta/functions/schema_generator.py +1 -1
letta/llm_api/anthropic.py +3 -24
letta/llm_api/azure_openai.py +53 -108
letta/llm_api/azure_openai_constants.py +10 -0
letta/llm_api/google_ai.py +39 -64
letta/llm_api/helpers.py +208 -0
letta/llm_api/llm_api_tools.py +43 -218
letta/llm_api/openai.py +74 -50
letta/main.py +1 -1
letta/metadata.py +2 -0
letta/providers.py +144 -31
letta/schemas/agent.py +14 -0
letta/schemas/llm_config.py +2 -2
letta/schemas/openai/chat_completion_response.py +3 -0
letta/schemas/tool.py +3 -3
letta/server/rest_api/admin/tools.py +0 -1
letta/server/rest_api/app.py +1 -17
letta/server/rest_api/routers/openai/assistants/threads.py +10 -7
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +5 -3
letta/server/rest_api/routers/v1/agents.py +23 -13
letta/server/rest_api/routers/v1/blocks.py +5 -3
letta/server/rest_api/routers/v1/jobs.py +5 -3
letta/server/rest_api/routers/v1/sources.py +25 -13
letta/server/rest_api/routers/v1/tools.py +12 -7
letta/server/server.py +33 -37
letta/settings.py +5 -113
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/METADATA +1 -1
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/RECORD +35 -33
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/LICENSE +0 -0
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/WHEEL +0 -0
{letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/entry_points.txt +0 -0

letta/agent.py CHANGED Viewed

@@ -18,7 +18,7 @@ from letta.constants import (
     MESSAGE_SUMMARY_WARNING_FRAC,
 )
 from letta.interface import AgentInterface
-from letta.llm_api.llm_api_tools import create, is_context_overflow_error
+from letta.llm_api.llm_api_tools import create
 from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
 from letta.metadata import MetadataStore
 from letta.persistence_manager import LocalStateManager
@@ -56,6 +56,7 @@ from letta.utils import (
 )
 from .errors import LLMError
+from .llm_api.helpers import is_context_overflow_error
 def compile_memory_metadata_block(
@@ -207,7 +208,7 @@ class BaseAgent(ABC):
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """
@@ -223,7 +224,7 @@ class BaseAgent(ABC):
 class Agent(BaseAgent):
     def __init__(
         self,
-        interface: AgentInterface,
+        interface: Optional[AgentInterface],
         # agents can be created from providing agent_state
         agent_state: AgentState,
         tools: List[Tool],
@@ -238,6 +239,7 @@ class Agent(BaseAgent):
         assert isinstance(self.agent_state.memory, Memory), f"Memory object is not of type Memory: {type(self.agent_state.memory)}"
         # link tools
+        self.tools = tools
         self.link_tools(tools)
         # gpt-4, gpt-3.5-turbo, ...
@@ -337,6 +339,9 @@ class Agent(BaseAgent):
         for tool_name in self.agent_state.tools:
             assert tool_name in [tool.name for tool in tools], f"Tool name {tool_name} not included in agent tool list"
+        # Update tools
+        self.tools = tools
         # Store the functions schemas (this is passed as an argument to ChatCompletion)
         self.functions = []
         self.functions_python = {}
@@ -460,7 +465,7 @@ class Agent(BaseAgent):
         function_call: str = "auto",
         first_message: bool = False,  # hint
         stream: bool = False,  # TODO move to config?
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
     ) -> ChatCompletionResponse:
         """Get response from LLM API"""
         try:
@@ -478,10 +483,10 @@ class Agent(BaseAgent):
                 stream=stream,
                 stream_inferface=self.interface,
                 # putting inner thoughts in func args or not
-                inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )
-            if len(response.choices) == 0:
+            if len(response.choices) == 0 or response.choices[0] is None:
                 raise Exception(f"API call didn't return a message: {response}")
             # special case for 'length'
@@ -551,15 +556,20 @@ class Agent(BaseAgent):
             )  # extend conversation with assistant's reply
             printd(f"Function call message: {messages[-1]}")
+            nonnull_content = False
             if response_message.content:
                 # The content if then internal monologue, not chat
                 self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
+                # Flag to avoid printing a duplicate if inner thoughts get popped from the function call
+                nonnull_content = True
             # Step 3: call the function
             # Note: the JSON response may not always be valid; be sure to handle errors
             function_call = (
                 response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
             )
+            # Get the name of the function
             function_name = function_call.name
             printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
@@ -608,9 +618,21 @@ class Agent(BaseAgent):
                 self.interface.function_message(f"Error: {error_msg}", msg_obj=messages[-1])
                 return messages, False, True  # force a heartbeat to allow agent to handle error
+            # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
+            if "inner_thoughts" in function_args:
+                response_message.content = function_args.pop("inner_thoughts")
+            # The content if then internal monologue, not chat
+            if response_message.content and not nonnull_content:
+                self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
             # (Still parsing function args)
             # Handle requests for immediate heartbeat
             heartbeat_request = function_args.pop("request_heartbeat", None)
+            # Edge case: heartbeat_request is returned as a stringified boolean, we will attempt to parse:
+            if isinstance(heartbeat_request, str) and heartbeat_request.lower().strip() == "true":
+                heartbeat_request = True
             if not isinstance(heartbeat_request, bool) or heartbeat_request is None:
                 printd(
                     f"{CLI_WARNING_PREFIX}'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}"
@@ -716,7 +738,7 @@ class Agent(BaseAgent):
         recreate_message_timestamp: bool = True,  # if True, when input is a Message type, recreated the 'created_at' field
         stream: bool = False,  # TODO move to config?
         timestamp: Optional[datetime.datetime] = None,
-        inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+        inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
         ms: Optional[MetadataStore] = None,
     ) -> AgentStepResponse:
         """Top-level event message handler for the Letta agent"""
@@ -795,7 +817,7 @@ class Agent(BaseAgent):
                         message_sequence=input_message_sequence,
                         first_message=True,  # passed through to the prompt formatter
                         stream=stream,
-                        inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                        inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     )
                     if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
                         break
@@ -808,7 +830,7 @@ class Agent(BaseAgent):
                 response = self._get_ai_reply(
                     message_sequence=input_message_sequence,
                     stream=stream,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                 )
             # Step 3: check if LLM wanted to call a function
@@ -892,7 +914,7 @@ class Agent(BaseAgent):
                     recreate_message_timestamp=recreate_message_timestamp,
                     stream=stream,
                     timestamp=timestamp,
-                    inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
+                    inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
                     ms=ms,
                 )
@@ -1343,6 +1365,10 @@ def save_agent(agent: Agent, ms: MetadataStore):
     else:
         ms.create_agent(agent_state)
+    for tool in agent.tools:
+        if ms.get_tool(tool_name=tool.name, user_id=tool.user_id) is None:
+            ms.create_tool(tool)
     agent.agent_state = ms.get_agent(agent_id=agent_id)
     assert isinstance(agent.agent_state.memory, Memory), f"Memory is not a Memory object: {type(agent_state.memory)}"

letta/client/client.py CHANGED Viewed

@@ -9,7 +9,7 @@ from letta.constants import BASE_TOOLS, DEFAULT_HUMAN, DEFAULT_PERSONA
 from letta.data_sources.connectors import DataConnector
 from letta.functions.functions import parse_source_code
 from letta.memory import get_memory_functions
-from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
+from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgentState
 from letta.schemas.block import (
     Block,
     CreateBlock,
@@ -68,6 +68,7 @@ class AbstractClient(object):
     def create_agent(
         self,
         name: Optional[str] = None,
+        agent_type: Optional[AgentType] = AgentType.memgpt_agent,
         embedding_config: Optional[EmbeddingConfig] = None,
         llm_config: Optional[LLMConfig] = None,
         memory: Memory = ChatMemory(human=get_human_text(DEFAULT_HUMAN), persona=get_persona_text(DEFAULT_PERSONA)),
@@ -319,6 +320,8 @@ class RESTClient(AbstractClient):
     def create_agent(
         self,
         name: Optional[str] = None,
+        # agent config
+        agent_type: Optional[AgentType] = AgentType.memgpt_agent,
         # model configs
         embedding_config: EmbeddingConfig = None,
         llm_config: LLMConfig = None,
@@ -381,6 +384,7 @@ class RESTClient(AbstractClient):
             memory=memory,
             tools=tool_names,
             system=system,
+            agent_type=agent_type,
             llm_config=llm_config if llm_config else self._default_llm_config,
             embedding_config=embedding_config if embedding_config else self._default_embedding_config,
         )
@@ -1462,6 +1466,8 @@ class LocalClient(AbstractClient):
     def create_agent(
         self,
         name: Optional[str] = None,
+        # agent config
+        agent_type: Optional[AgentType] = AgentType.memgpt_agent,
         # model configs
         embedding_config: EmbeddingConfig = None,
         llm_config: LLMConfig = None,
@@ -1524,6 +1530,7 @@ class LocalClient(AbstractClient):
                 memory=memory,
                 tools=tool_names,
                 system=system,
+                agent_type=agent_type,
                 llm_config=llm_config if llm_config else self._default_llm_config,
                 embedding_config=embedding_config if embedding_config else self._default_embedding_config,
             ),

letta/credentials.py CHANGED Viewed

@@ -30,7 +30,7 @@ class LettaCredentials:
     # azure config
     azure_auth_type: str = "api_key"
-    azure_key: Optional[str] = None
+    azure_key: Optional[str] = os.getenv("AZURE_OPENAI_API_KEY")
     # groq config
     groq_key: Optional[str] = os.getenv("GROQ_API_KEY")
@@ -76,7 +76,7 @@ class LettaCredentials:
                 "azure_embedding_deployment": get_field(config, "azure", "embedding_deployment"),
                 # gemini
                 "google_ai_key": get_field(config, "google_ai", "key"),
-                "google_ai_service_endpoint": get_field(config, "google_ai", "service_endpoint"),
+                # "google_ai_service_endpoint": get_field(config, "google_ai", "service_endpoint"),
                 # anthropic
                 "anthropic_key": get_field(config, "anthropic", "key"),
                 # cohere
@@ -117,7 +117,7 @@ class LettaCredentials:
         # gemini
         set_field(config, "google_ai", "key", self.google_ai_key)
-        set_field(config, "google_ai", "service_endpoint", self.google_ai_service_endpoint)
+        # set_field(config, "google_ai", "service_endpoint", self.google_ai_service_endpoint)
         # anthropic
         set_field(config, "anthropic", "key", self.anthropic_key)

letta/errors.py CHANGED Viewed

@@ -56,7 +56,7 @@ class LettaMessageError(LettaError):
             error_msg += f" (Explanation: {explanation})"
         # Pretty print out message JSON
-        message_json = json.dumps([message.model_dump_json(indent=4) for message in messages], indent=4)
+        message_json = json.dumps([message.model_dump() for message in messages], indent=4)
         return f"{error_msg}\n\n{message_json}"

letta/functions/schema_generator.py CHANGED Viewed

@@ -130,7 +130,7 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
     if function.__name__ not in ["send_message", "pause_heartbeats"]:
         schema["parameters"]["properties"]["request_heartbeat"] = {
             "type": "boolean",
-            "description": "Request an immediate heartbeat after function execution. Set to 'true' if you want to send a follow-up message or run a follow-up function.",
+            "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
         }
         schema["parameters"]["required"].append("request_heartbeat")

letta/llm_api/anthropic.py CHANGED Viewed

@@ -2,8 +2,7 @@ import json
 import re
 from typing import List, Optional, Union
-import requests
+from letta.llm_api.helpers import make_post_request
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
 from letta.schemas.openai.chat_completion_response import (
@@ -295,7 +294,6 @@ def anthropic_chat_completions_request(
     inner_thoughts_xml_tag: Optional[str] = "thinking",
 ) -> ChatCompletionResponse:
     """https://docs.anthropic.com/claude/docs/tool-use"""
-    from letta.utils import printd
     url = smart_urljoin(url, "messages")
     headers = {
@@ -360,24 +358,5 @@ def anthropic_chat_completions_request(
     data.pop("user", None)
     data.pop("tool_choice", None)
-    printd(f"Sending request to {url}")
-    try:
-        response = requests.post(url, headers=headers, json=data)
-        printd(f"response = {response}")
-        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
-        response = response.json()  # convert to dict from string
-        printd(f"response.json = {response}")
-        response = convert_anthropic_response_to_chatcompletion(response_json=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
-        return response
-    except requests.exceptions.HTTPError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        printd(f"Got HTTPError, exception={http_err}, payload={data}")
-        raise http_err
-    except requests.exceptions.RequestException as req_err:
-        # Handle other requests-related errors (e.g., connection error)
-        printd(f"Got RequestException, exception={req_err}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        printd(f"Got unknown Exception, exception={e}")
-        raise e
+    response_json = make_post_request(url, headers, data)
+    return convert_anthropic_response_to_chatcompletion(response_json=response_json, inner_thoughts_xml_tag=inner_thoughts_xml_tag)

letta/llm_api/azure_openai.py CHANGED Viewed

@@ -1,90 +1,74 @@
-from typing import Union
 import requests
+from letta.llm_api.helpers import make_post_request
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.chat_completions import ChatCompletionRequest
 from letta.schemas.openai.embedding_response import EmbeddingResponse
-from letta.utils import smart_urljoin
+from letta.settings import ModelSettings
+def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
+    return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}"
-MODEL_TO_AZURE_ENGINE = {
-    "gpt-4-1106-preview": "gpt-4",
-    "gpt-4": "gpt-4",
-    "gpt-4-32k": "gpt-4-32k",
-    "gpt-3.5": "gpt-35-turbo",
-    "gpt-3.5-turbo": "gpt-35-turbo",
-    "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
-}
+def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str):
+    return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}"
-def clean_azure_endpoint(raw_endpoint_name: str) -> str:
-    """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'"""
-    if raw_endpoint_name is None:
-        raise ValueError(raw_endpoint_name)
-    endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "")
-    endpoint_address = endpoint_address.replace("http://", "")
-    endpoint_address = endpoint_address.replace("https://", "")
-    return endpoint_address
+def get_azure_model_list_endpoint(base_url: str, api_version: str):
+    return f"{base_url}/openai/models?api-version={api_version}"
-def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict:
+def azure_openai_get_model_list(base_url: str, api_key: str, api_version: str) -> list:
     """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
-    from letta.utils import printd
     # https://xxx.openai.azure.com/openai/models?api-version=xxx
-    url = smart_urljoin(url, "openai")
-    url = smart_urljoin(url, f"models?api-version={api_version}")
     headers = {"Content-Type": "application/json"}
     if api_key is not None:
         headers["api-key"] = f"{api_key}"
-    printd(f"Sending request to {url}")
+    url = get_azure_model_list_endpoint(base_url, api_version)
     try:
         response = requests.get(url, headers=headers)
-        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
-        response = response.json()  # convert to dict from string
-        printd(f"response = {response}")
-        return response
-    except requests.exceptions.HTTPError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        try:
-            response = response.json()
-        except:
-            pass
-        printd(f"Got HTTPError, exception={http_err}, response={response}")
-        raise http_err
-    except requests.exceptions.RequestException as req_err:
-        # Handle other requests-related errors (e.g., connection error)
-        try:
-            response = response.json()
-        except:
-            pass
-        printd(f"Got RequestException, exception={req_err}, response={response}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        try:
-            response = response.json()
-        except:
-            pass
-        printd(f"Got unknown Exception, exception={e}, response={response}")
-        raise e
+        response.raise_for_status()
+    except requests.RequestException as e:
+        raise RuntimeError(f"Failed to retrieve model list: {e}")
+    return response.json().get("data", [])
+def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
+    model_list = azure_openai_get_model_list(base_url, api_key, api_version)
+    # Extract models that support text generation
+    model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
+    return model_options
+def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list:
+    def valid_embedding_model(m: dict):
+        valid_name = True
+        if require_embedding_in_name:
+            valid_name = "embedding" in m["id"]
+        return m.get("capabilities").get("embeddings") == True and valid_name
+    model_list = azure_openai_get_model_list(base_url, api_key, api_version)
+    # Extract models that support embeddings
+    model_options = [m for m in model_list if valid_embedding_model(m)]
+    return model_options
 def azure_openai_chat_completions_request(
-    resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
+    model_settings: ModelSettings, llm_config: LLMConfig, api_key: str, chat_completion_request: ChatCompletionRequest
 ) -> ChatCompletionResponse:
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
-    from letta.utils import printd
-    assert resource_name is not None, "Missing required field when calling Azure OpenAI"
-    assert deployment_id is not None, "Missing required field when calling Azure OpenAI"
-    assert api_version is not None, "Missing required field when calling Azure OpenAI"
     assert api_key is not None, "Missing required field when calling Azure OpenAI"
-    resource_name = clean_azure_endpoint(resource_name)
-    url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
+    data = chat_completion_request.model_dump(exclude_none=True)
     # If functions == None, strip from the payload
     if "functions" in data and data["functions"] is None:
@@ -95,61 +79,22 @@ def azure_openai_chat_completions_request(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
-    printd(f"Sending request to {url}")
-    try:
-        data["messages"] = [i.to_openai_dict() for i in data["messages"]]
-        response = requests.post(url, headers=headers, json=data)
-        printd(f"response = {response}")
-        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
-        response = response.json()  # convert to dict from string
-        printd(f"response.json = {response}")
-        # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
-        if "content" not in response["choices"][0].get("message"):
-            response["choices"][0]["message"]["content"] = None
-        response = ChatCompletionResponse(**response)  # convert to 'dot-dict' style which is the openai python client default
-        return response
-    except requests.exceptions.HTTPError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        printd(f"Got HTTPError, exception={http_err}, payload={data}")
-        raise http_err
-    except requests.exceptions.RequestException as req_err:
-        # Handle other requests-related errors (e.g., connection error)
-        printd(f"Got RequestException, exception={req_err}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        printd(f"Got unknown Exception, exception={e}")
-        raise e
+    url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.api_version)
+    response_json = make_post_request(url, headers, data)
+    # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
+    if "content" not in response_json["choices"][0].get("message"):
+        response_json["choices"][0]["message"]["content"] = None
+    response = ChatCompletionResponse(**response_json)  # convert to 'dot-dict' style which is the openai python client default
+    return response
 def azure_openai_embeddings_request(
     resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
 ) -> EmbeddingResponse:
     """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
-    from letta.utils import printd
-    resource_name = clean_azure_endpoint(resource_name)
     url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
     headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
-    printd(f"Sending request to {url}")
-    try:
-        response = requests.post(url, headers=headers, json=data)
-        printd(f"response = {response}")
-        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
-        response = response.json()  # convert to dict from string
-        printd(f"response.json = {response}")
-        response = EmbeddingResponse(**response)  # convert to 'dot-dict' style which is the openai python client default
-        return response
-    except requests.exceptions.HTTPError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        printd(f"Got HTTPError, exception={http_err}, payload={data}")
-        raise http_err
-    except requests.exceptions.RequestException as req_err:
-        # Handle other requests-related errors (e.g., connection error)
-        printd(f"Got RequestException, exception={req_err}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        printd(f"Got unknown Exception, exception={e}")
-        raise e
+    response_json = make_post_request(url, headers, data)
+    return EmbeddingResponse(**response_json)

letta/llm_api/azure_openai_constants.py ADDED Viewed

@@ -0,0 +1,10 @@
+AZURE_MODEL_TO_CONTEXT_LENGTH = {
+    "babbage-002": 16384,
+    "davinci-002": 16384,
+    "gpt-35-turbo-0613": 4096,
+    "gpt-35-turbo-1106": 16385,
+    "gpt-35-turbo-0125": 16385,
+    "gpt-4-0613": 8192,
+    "gpt-4o-mini-2024-07-18": 128000,
+    "gpt-4o-2024-08-06": 128000,
+}

letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.4.1.dev20241007104134py3-none-any.whl → 0.4.1.dev20241009104130py3-none-any.whl