PyPI - letta-nightly - Versions diffs - 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl - Mend

letta-nightly 0.8.17.dev20250722104501py3-none-any.whl → 0.9.0.dev20250724081419py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

letta/__init__.py +5 -3
letta/agent.py +3 -2
letta/agents/base_agent.py +4 -1
letta/agents/voice_agent.py +1 -0
letta/constants.py +4 -2
letta/functions/schema_generator.py +2 -1
letta/groups/dynamic_multi_agent.py +1 -0
letta/helpers/converters.py +13 -5
letta/helpers/json_helpers.py +6 -1
letta/llm_api/anthropic.py +2 -2
letta/llm_api/aws_bedrock.py +24 -94
letta/llm_api/deepseek.py +1 -1
letta/llm_api/google_ai_client.py +0 -38
letta/llm_api/google_constants.py +6 -3
letta/llm_api/helpers.py +1 -1
letta/llm_api/llm_api_tools.py +4 -7
letta/llm_api/mistral.py +12 -37
letta/llm_api/openai.py +17 -17
letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
letta/local_llm/constants.py +2 -23
letta/local_llm/json_parser.py +11 -1
letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
letta/local_llm/ollama/api.py +2 -2
letta/orm/__init__.py +1 -0
letta/orm/agent.py +33 -2
letta/orm/files_agents.py +13 -10
letta/orm/mixins.py +8 -0
letta/orm/prompt.py +13 -0
letta/orm/sqlite_functions.py +61 -17
letta/otel/db_pool_monitoring.py +13 -12
letta/schemas/agent.py +69 -4
letta/schemas/agent_file.py +2 -0
letta/schemas/block.py +11 -0
letta/schemas/embedding_config.py +15 -3
letta/schemas/enums.py +2 -0
letta/schemas/file.py +1 -1
letta/schemas/folder.py +74 -0
letta/schemas/memory.py +12 -6
letta/schemas/prompt.py +9 -0
letta/schemas/providers/__init__.py +47 -0
letta/schemas/providers/anthropic.py +78 -0
letta/schemas/providers/azure.py +80 -0
letta/schemas/providers/base.py +201 -0
letta/schemas/providers/bedrock.py +78 -0
letta/schemas/providers/cerebras.py +79 -0
letta/schemas/providers/cohere.py +18 -0
letta/schemas/providers/deepseek.py +63 -0
letta/schemas/providers/google_gemini.py +102 -0
letta/schemas/providers/google_vertex.py +54 -0
letta/schemas/providers/groq.py +35 -0
letta/schemas/providers/letta.py +39 -0
letta/schemas/providers/lmstudio.py +97 -0
letta/schemas/providers/mistral.py +41 -0
letta/schemas/providers/ollama.py +151 -0
letta/schemas/providers/openai.py +241 -0
letta/schemas/providers/together.py +85 -0
letta/schemas/providers/vllm.py +57 -0
letta/schemas/providers/xai.py +66 -0
letta/server/db.py +0 -5
letta/server/rest_api/app.py +4 -3
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +152 -4
letta/server/rest_api/routers/v1/folders.py +490 -0
letta/server/rest_api/routers/v1/providers.py +2 -2
letta/server/rest_api/routers/v1/sources.py +21 -26
letta/server/rest_api/routers/v1/tools.py +90 -15
letta/server/server.py +50 -95
letta/services/agent_manager.py +420 -81
letta/services/agent_serialization_manager.py +707 -0
letta/services/block_manager.py +132 -11
letta/services/file_manager.py +104 -29
letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
letta/services/file_processor/file_processor.py +75 -24
letta/services/file_processor/parser/markitdown_parser.py +95 -0
letta/services/files_agents_manager.py +57 -17
letta/services/group_manager.py +7 -0
letta/services/helpers/agent_manager_helper.py +25 -15
letta/services/provider_manager.py +2 -2
letta/services/source_manager.py +35 -16
letta/services/tool_executor/files_tool_executor.py +12 -5
letta/services/tool_manager.py +12 -0
letta/services/tool_sandbox/e2b_sandbox.py +52 -48
letta/settings.py +9 -6
letta/streaming_utils.py +2 -1
letta/utils.py +34 -1
{letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
{letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
{letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -5,16 +5,18 @@ try:
     __version__ = version("letta")
 except PackageNotFoundError:
     # Fallback for development installations
-    __version__ = "0.8.17"
+    __version__ = "0.9.0"
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]
 # import clients
 from letta.client.client import RESTClient
-# imports for easier access
+# Import sqlite_functions early to ensure event handlers are registered
+from letta.orm import sqlite_functions
+# # imports for easier access
 from letta.schemas.agent import AgentState
 from letta.schemas.block import Block
 from letta.schemas.embedding_config import EmbeddingConfig

letta/agent.py CHANGED Viewed

@@ -36,6 +36,7 @@ from letta.interface import AgentInterface
 from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
 from letta.llm_api.llm_api_tools import create
 from letta.llm_api.llm_client import LLMClient
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
 from letta.memory import summarize_messages
@@ -548,8 +549,8 @@ class Agent(BaseAgent):
                 return messages, False, True  # force a heartbeat to allow agent to handle error
             # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
-            if "inner_thoughts" in function_args:
-                response_message.content = function_args.pop("inner_thoughts")
+            if INNER_THOUGHTS_KWARG in function_args:
+                response_message.content = function_args.pop(INNER_THOUGHTS_KWARG)
             # The content if then internal monologue, not chat
             if response_message.content and not nonnull_content:
                 self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)

letta/agents/base_agent.py CHANGED Viewed

@@ -122,7 +122,9 @@ class BaseAgent(ABC):
             curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
             # generate just the memory string with current state for comparison
-            curr_memory_str = agent_state.memory.compile(tool_usage_rules=tool_constraint_block, sources=agent_state.sources)
+            curr_memory_str = agent_state.memory.compile(
+                tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
+            )
             new_dynamic_section = extract_dynamic_section(curr_memory_str)
             # compare just the dynamic sections (memory blocks, tool rules, directories)
@@ -149,6 +151,7 @@ class BaseAgent(ABC):
                 archival_memory_size=num_archival_memories,
                 tool_rules_solver=tool_rules_solver,
                 sources=agent_state.sources,
+                max_files_open=agent_state.max_files_open,
             )
             diff = united_diff(curr_system_message_text, new_system_message_str)

letta/agents/voice_agent.py CHANGED Viewed

@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
             previous_message_count=self.num_messages,
             archival_memory_size=self.num_archival_memories,
             sources=agent_state.sources,
+            max_files_open=agent_state.max_files_open,
         )
         letta_message_db_queue = create_input_messages(
             input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor

letta/constants.py CHANGED Viewed

@@ -326,7 +326,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
 CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
 CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
 CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
-CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
 # Function return limits
 FUNCTION_RETURN_CHAR_LIMIT = 6000  # ~300 words
 BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000  # very high (we rely on implementation)
@@ -361,7 +361,9 @@ REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
 REDIS_RUN_ID_PREFIX = "agent:send_message:run_id"
 # TODO: This is temporary, eventually use token-based eviction
-MAX_FILES_OPEN = 5
+# File based controls
+DEFAULT_MAX_FILES_OPEN = 5
+DEFAULT_CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
 GET_PROVIDERS_TIMEOUT_SECONDS = 10

letta/functions/schema_generator.py CHANGED Viewed

@@ -412,12 +412,13 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
     # Validate that the function has a Google Python style docstring
     try:
         validate_google_style_docstring(function)
-    except ValueError:
+    except ValueError as e:
         logger.warning(
             f"Function `{function.__name__}` in module `{function.__module__}` "
             f"{'(tool_id=' + tool_id + ') ' if tool_id else ''}"
             f"is not in Google style docstring format. "
             f"Docstring received:\n{repr(function.__doc__[:200]) if function.__doc__ else 'None'}"
+            f"\nError: {str(e)}"
         )
     # Get the signature of the function

letta/groups/dynamic_multi_agent.py CHANGED Viewed

@@ -94,6 +94,7 @@ class DynamicMultiAgent(Agent):
                 for name, agent_id in [(agents[agent_id].agent_state.name, agent_id) for agent_id in agent_id_options]:
                     if name.lower() in assistant_message.content.lower():
                         speaker_id = agent_id
+                assert speaker_id is not None, f"No names found in {assistant_message.content}"
                 # Sum usage
                 total_usage.prompt_tokens += usage_stats.prompt_tokens

letta/helpers/converters.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import base64
 from typing import Any, Dict, List, Optional, Union
 import numpy as np
@@ -43,7 +42,10 @@ from letta.schemas.tool_rule import (
     TerminalToolRule,
     ToolRule,
 )
+from letta.settings import DatabaseChoice, settings
+if settings.database_engine == DatabaseChoice.SQLITE:
+    import sqlite_vec
 # --------------------------
 # LLMConfig Serialization
 # --------------------------
@@ -272,22 +274,28 @@ def deserialize_message_content(data: Optional[List[Dict]]) -> List[MessageConte
 def serialize_vector(vector: Optional[Union[List[float], np.ndarray]]) -> Optional[bytes]:
-    """Convert a NumPy array or list into a base64-encoded byte string."""
+    """Convert a NumPy array or list into serialized format using sqlite-vec."""
     if vector is None:
         return None
     if isinstance(vector, list):
         vector = np.array(vector, dtype=np.float32)
+    else:
+        vector = vector.astype(np.float32)
-    return base64.b64encode(vector.tobytes())
+    return sqlite_vec.serialize_float32(vector.tolist())
 def deserialize_vector(data: Optional[bytes], dialect: Dialect) -> Optional[np.ndarray]:
-    """Convert a base64-encoded byte string back into a NumPy array."""
+    """Convert serialized data back into a NumPy array using sqlite-vec format."""
     if not data:
         return None
     if dialect.name == "sqlite":
-        data = base64.b64decode(data)
+        # Use sqlite-vec format
+        if len(data) % 4 == 0:  # Must be divisible by 4 for float32
+            return np.frombuffer(data, dtype=np.float32)
+        else:
+            raise ValueError(f"Invalid sqlite-vec binary data length: {len(data)}")
     return np.frombuffer(data, dtype=np.float32)

letta/helpers/json_helpers.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import base64
 import json
 from datetime import datetime
@@ -11,7 +12,11 @@ def json_dumps(data, indent=2):
         if isinstance(obj, datetime):
             return obj.isoformat()
         if isinstance(obj, bytes):
-            return obj.decode("utf-8")
+            try:
+                return obj.decode("utf-8")
+            except Exception:
+                print(f"Error decoding bytes as utf-8: {obj}")
+                return base64.b64encode(obj).decode("utf-8")
         raise TypeError(f"Type {type(obj)} not serializable")
     return json.dumps(data, indent=indent, default=safe_serializer, ensure_ascii=False)

letta/llm_api/anthropic.py CHANGED Viewed

@@ -729,7 +729,7 @@ def _prepare_anthropic_request(
         data["temperature"] = 1.0
     if "functions" in data:
-        raise ValueError(f"'functions' unexpected in Anthropic API payload")
+        raise ValueError("'functions' unexpected in Anthropic API payload")
     # Handle tools
     if "tools" in data and data["tools"] is None:
@@ -1162,7 +1162,7 @@ def anthropic_chat_completions_process_stream(
                                     accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
                 if message_delta.function_call is not None:
-                    raise NotImplementedError(f"Old function_call style not support with stream=True")
+                    raise NotImplementedError("Old function_call style not support with stream=True")
             # overwrite response fields based on latest chunk
             if not create_message_id:

letta/llm_api/aws_bedrock.py CHANGED Viewed

@@ -1,17 +1,30 @@
+"""
+Note that this formally only supports Anthropic Bedrock.
+TODO (cliandy): determine what other providers are supported and what is needed to add support.
+"""
 import os
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 from anthropic import AnthropicBedrock
+from letta.log import get_logger
 from letta.settings import model_settings
+logger = get_logger(__name__)
 def has_valid_aws_credentials() -> bool:
     """
     Check if AWS credentials are properly configured.
     """
-    valid_aws_credentials = os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY") and os.getenv("AWS_DEFAULT_REGION")
-    return valid_aws_credentials
+    return all(
+        (
+            os.getenv("AWS_ACCESS_KEY_ID"),
+            os.getenv("AWS_SECRET_ACCESS_KEY"),
+            os.getenv("AWS_DEFAULT_REGION"),
+        )
+    )
 def get_bedrock_client(
@@ -41,48 +54,11 @@ def get_bedrock_client(
     return bedrock
-def bedrock_get_model_list(
-    region_name: str,
-    access_key_id: Optional[str] = None,
-    secret_access_key: Optional[str] = None,
-) -> List[dict]:
-    """
-    Get list of available models from Bedrock.
-    Args:
-        region_name: AWS region name
-        access_key_id: Optional AWS access key ID
-        secret_access_key: Optional AWS secret access key
-        TODO: Implement model_provider and output_modality filtering
-        model_provider: Optional provider name to filter models. If None, returns all models.
-        output_modality: Output modality to filter models. Defaults to "text".
-    Returns:
-        List of model summaries
-    """
-    import boto3
-    try:
-        bedrock = boto3.client(
-            "bedrock",
-            region_name=region_name,
-            aws_access_key_id=access_key_id,
-            aws_secret_access_key=secret_access_key,
-        )
-        response = bedrock.list_inference_profiles()
-        return response["inferenceProfileSummaries"]
-    except Exception as e:
-        print(f"Error getting model list: {str(e)}")
-        raise e
 async def bedrock_get_model_list_async(
     access_key_id: Optional[str] = None,
     secret_access_key: Optional[str] = None,
     default_region: Optional[str] = None,
-) -> List[dict]:
+) -> list[dict]:
     from aioboto3.session import Session
     try:
@@ -96,11 +72,11 @@ async def bedrock_get_model_list_async(
             response = await bedrock.list_inference_profiles()
             return response["inferenceProfileSummaries"]
     except Exception as e:
-        print(f"Error getting model list: {str(e)}")
+        logger.error(f"Error getting model list for bedrock: %s", e)
         raise e
-def bedrock_get_model_details(region_name: str, model_id: str) -> Dict[str, Any]:
+def bedrock_get_model_details(region_name: str, model_id: str) -> dict[str, Any]:
     """
     Get details for a specific model from Bedrock.
     """
@@ -121,54 +97,8 @@ def bedrock_get_model_context_window(model_id: str) -> int:
     Get context window size for a specific model.
     """
     # Bedrock doesn't provide this via API, so we maintain a mapping
-    context_windows = {
-        "anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,
-        "anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
-        "anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
-        "anthropic.claude-3-haiku-20240307-v1:0": 200000,
-        "anthropic.claude-3-opus-20240229-v1:0": 200000,
-        "anthropic.claude-3-sonnet-20240229-v1:0": 200000,
-    }
-    return context_windows.get(model_id, 200000)  # default to 100k if unknown
-"""
-{
-    "id": "msg_123",
-    "type": "message",
-    "role": "assistant",
-    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-    "content": [
-        {
-            "type": "text",
-            "text": "I see the Firefox icon. Let me click on it and then navigate to a weather website."
-        },
-        {
-            "type": "tool_use",
-            "id": "toolu_123",
-            "name": "computer",
-            "input": {
-                "action": "mouse_move",
-                "coordinate": [
-                    708,
-                    736
-                ]
-            }
-        },
-        {
-            "type": "tool_use",
-            "id": "toolu_234",
-            "name": "computer",
-            "input": {
-                "action": "left_click"
-            }
-        }
-    ],
-    "stop_reason": "tool_use",
-    "stop_sequence": null,
-    "usage": {
-        "input_tokens": 3391,
-        "output_tokens": 132
-    }
-}
-"""
+    # 200k for anthropic: https://aws.amazon.com/bedrock/anthropic/
+    if model_id.startswith("anthropic"):
+        return 200_000
+    else:
+        return 100_000  # default to 100k if unknown

letta/llm_api/deepseek.py CHANGED Viewed

@@ -120,7 +120,7 @@ def build_deepseek_chat_completions_request(
         def add_functions_to_system_message(system_message: ChatMessage):
             system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
-            system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
+            system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
         if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
             add_functions_to_system_message(

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -66,44 +66,6 @@ def google_ai_check_valid_api_key(api_key: str):
         raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
-def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
-    """Synchronous version to get model list from Google AI API using httpx."""
-    import httpx
-    from letta.utils import printd
-    url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
-    try:
-        with httpx.Client() as client:
-            response = client.get(url, headers=headers)
-            response.raise_for_status()  # Raises HTTPStatusError for 4XX/5XX status
-            response_data = response.json()  # convert to dict from string
-            # Grab the models out
-            model_list = response_data["models"]
-            return model_list
-    except httpx.HTTPStatusError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        printd(f"Got HTTPError, exception={http_err}")
-        # Print the HTTP status code
-        print(f"HTTP Error: {http_err.response.status_code}")
-        # Print the response content (error message from server)
-        print(f"Message: {http_err.response.text}")
-        raise http_err
-    except httpx.RequestError as req_err:
-        # Handle other httpx-related errors (e.g., connection error)
-        printd(f"Got RequestException, exception={req_err}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        printd(f"Got unknown Exception, exception={e}")
-        raise e
 async def google_ai_get_model_list_async(
     base_url: str, api_key: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
 ) -> List[dict]:

letta/llm_api/google_constants.py CHANGED Viewed

@@ -1,7 +1,12 @@
 GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
+    "gemini-2.5-pro": 1048576,
+    "gemini-2.5-flash": 1048576,
+    "gemini-live-2.5-flash": 1048576,
+    "gemini-2.0-flash-001": 1048576,
+    "gemini-2.0-flash-lite-001": 1048576,
+    # The following are either deprecated or discontinued.
     "gemini-2.5-pro-exp-03-25": 1048576,
     "gemini-2.5-flash-preview-04-17": 1048576,
-    "gemini-2.0-flash-001": 1048576,
     "gemini-2.0-pro-exp-02-05": 2097152,
     "gemini-2.0-flash-lite-preview-02-05": 1048576,
     "gemini-2.0-flash-thinking-exp-01-21": 1048576,
@@ -11,8 +16,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
     "gemini-1.0-pro-vision": 16384,
 }
-GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
 GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
 GOOGLE_MODEL_FOR_API_KEY_CHECK = "gemini-2.0-flash-lite"

letta/llm_api/helpers.py CHANGED Viewed

@@ -252,7 +252,7 @@ def unpack_all_inner_thoughts_from_kwargs(
 ) -> ChatCompletionResponse:
     """Strip the inner thoughts out of the tool call and put it in the message content"""
     if len(response.choices) == 0:
-        raise ValueError(f"Unpacking inner thoughts from empty response not supported")
+        raise ValueError("Unpacking inner thoughts from empty response not supported")
     new_choices = []
     for choice in response.choices:

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -67,7 +67,6 @@ def retry_with_exponential_backoff(
                 # Stop retrying if user hits Ctrl-C
                 raise KeyboardInterrupt("User intentionally stopped thread. Stopping...")
             except requests.exceptions.HTTPError as http_err:
                 if not hasattr(http_err, "response") or not http_err.response:
                     raise
@@ -175,7 +174,6 @@ def create(
     # openai
     if llm_config.model_endpoint_type == "openai":
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
@@ -256,7 +254,6 @@ def create(
         return response
     elif llm_config.model_endpoint_type == "xai":
         api_key = model_settings.xai_api_key
         if function_call is None and functions is not None and len(functions) > 0:
@@ -464,7 +461,7 @@ def create(
     #     )
     elif llm_config.model_endpoint_type == "groq":
         if stream:
-            raise NotImplementedError(f"Streaming not yet implemented for Groq.")
+            raise NotImplementedError("Streaming not yet implemented for Groq.")
         if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
             raise LettaConfigurationError(message="Groq key is missing from letta config file", missing_fields=["groq_api_key"])
@@ -517,7 +514,7 @@ def create(
         """TogetherAI endpoint that goes via /completions instead of /chat/completions"""
         if stream:
-            raise NotImplementedError(f"Streaming not yet implemented for TogetherAI (via the /completions endpoint).")
+            raise NotImplementedError("Streaming not yet implemented for TogetherAI (via the /completions endpoint).")
         if model_settings.together_api_key is None and (
             llm_config.model_endpoint == "https://api.together.ai/v1/completions"
@@ -547,7 +544,7 @@ def create(
         """Anthropic endpoint that goes via /embeddings instead of /chat/completions"""
         if stream:
-            raise NotImplementedError(f"Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
+            raise NotImplementedError("Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
         if not use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
@@ -631,7 +628,7 @@ def create(
             messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
             messages[0].content[
                 0
-            ].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
+            ].text += 'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
         return get_chat_completion(
             model=llm_config.model,
             messages=messages,

letta/llm_api/mistral.py CHANGED Viewed

@@ -1,47 +1,22 @@
-import requests
+import aiohttp
-from letta.utils import printd, smart_urljoin
+from letta.log import get_logger
+from letta.utils import smart_urljoin
+logger = get_logger(__name__)
-def mistral_get_model_list(url: str, api_key: str) -> dict:
+async def mistral_get_model_list_async(url: str, api_key: str) -> dict:
     url = smart_urljoin(url, "models")
     headers = {"Content-Type": "application/json"}
     if api_key is not None:
         headers["Authorization"] = f"Bearer {api_key}"
-    printd(f"Sending request to {url}")
-    response = None
-    try:
+    logger.debug(f"Sending request to %s", url)
+    async with aiohttp.ClientSession() as session:
         # TODO add query param "tool" to be true
-        response = requests.get(url, headers=headers)
-        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
-        response_json = response.json()  # convert to dict from string
-        return response_json
-    except requests.exceptions.HTTPError as http_err:
-        # Handle HTTP errors (e.g., response 4XX, 5XX)
-        try:
-            if response:
-                response = response.json()
-        except:
-            pass
-        printd(f"Got HTTPError, exception={http_err}, response={response}")
-        raise http_err
-    except requests.exceptions.RequestException as req_err:
-        # Handle other requests-related errors (e.g., connection error)
-        try:
-            if response:
-                response = response.json()
-        except:
-            pass
-        printd(f"Got RequestException, exception={req_err}, response={response}")
-        raise req_err
-    except Exception as e:
-        # Handle other potential errors
-        try:
-            if response:
-                response = response.json()
-        except:
-            pass
-        printd(f"Got unknown Exception, exception={e}, response={response}")
-        raise e
+        async with session.get(url, headers=headers) as response:
+            response.raise_for_status()
+            return await response.json()

letta-nightly 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl

letta-nightly 0.8.17.dev20250722104501py3-none-any.whl → 0.9.0.dev20250724081419py3-none-any.whl