PyPI - letta-nightly - Versions diffs - 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl - Mend

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

letta/__init__.py +1 -1
letta/agents/helpers.py +4 -0
letta/agents/letta_agent.py +142 -5
letta/constants.py +10 -7
letta/data_sources/connectors.py +70 -53
letta/embeddings.py +3 -240
letta/errors.py +28 -0
letta/functions/function_sets/base.py +4 -4
letta/functions/functions.py +287 -32
letta/functions/mcp_client/types.py +11 -0
letta/functions/schema_validator.py +187 -0
letta/functions/typescript_parser.py +196 -0
letta/helpers/datetime_helpers.py +8 -4
letta/helpers/tool_execution_helper.py +25 -2
letta/llm_api/anthropic_client.py +23 -18
letta/llm_api/azure_client.py +73 -0
letta/llm_api/bedrock_client.py +8 -4
letta/llm_api/google_vertex_client.py +14 -5
letta/llm_api/llm_api_tools.py +2 -217
letta/llm_api/llm_client.py +15 -1
letta/llm_api/llm_client_base.py +32 -1
letta/llm_api/openai.py +1 -0
letta/llm_api/openai_client.py +18 -28
letta/llm_api/together_client.py +55 -0
letta/orm/provider.py +1 -0
letta/orm/step_metrics.py +40 -1
letta/otel/db_pool_monitoring.py +1 -1
letta/schemas/agent.py +3 -4
letta/schemas/agent_file.py +2 -0
letta/schemas/block.py +11 -5
letta/schemas/embedding_config.py +4 -5
letta/schemas/enums.py +1 -1
letta/schemas/job.py +2 -3
letta/schemas/llm_config.py +79 -7
letta/schemas/mcp.py +0 -24
letta/schemas/message.py +0 -108
letta/schemas/openai/chat_completion_request.py +1 -0
letta/schemas/providers/__init__.py +0 -2
letta/schemas/providers/anthropic.py +106 -8
letta/schemas/providers/azure.py +102 -8
letta/schemas/providers/base.py +10 -3
letta/schemas/providers/bedrock.py +28 -16
letta/schemas/providers/letta.py +3 -3
letta/schemas/providers/ollama.py +2 -12
letta/schemas/providers/openai.py +4 -4
letta/schemas/providers/together.py +14 -2
letta/schemas/sandbox_config.py +2 -1
letta/schemas/tool.py +46 -22
letta/server/rest_api/routers/v1/agents.py +179 -38
letta/server/rest_api/routers/v1/folders.py +13 -8
letta/server/rest_api/routers/v1/providers.py +10 -3
letta/server/rest_api/routers/v1/sources.py +14 -8
letta/server/rest_api/routers/v1/steps.py +17 -1
letta/server/rest_api/routers/v1/tools.py +96 -5
letta/server/rest_api/streaming_response.py +91 -45
letta/server/server.py +27 -38
letta/services/agent_manager.py +92 -20
letta/services/agent_serialization_manager.py +11 -7
letta/services/context_window_calculator/context_window_calculator.py +40 -2
letta/services/helpers/agent_manager_helper.py +73 -12
letta/services/mcp_manager.py +109 -15
letta/services/passage_manager.py +28 -109
letta/services/provider_manager.py +24 -0
letta/services/step_manager.py +68 -0
letta/services/summarizer/summarizer.py +1 -4
letta/services/tool_executor/core_tool_executor.py +1 -1
letta/services/tool_executor/sandbox_tool_executor.py +26 -9
letta/services/tool_manager.py +82 -5
letta/services/tool_sandbox/base.py +3 -11
letta/services/tool_sandbox/modal_constants.py +17 -0
letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
letta/services/tool_sandbox/modal_sandbox.py +218 -3
letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
letta/services/tool_sandbox/modal_version_manager.py +273 -0
letta/services/tool_sandbox/safe_pickle.py +193 -0
letta/settings.py +5 -3
letta/templates/sandbox_code_file.py.j2 +2 -4
letta/templates/sandbox_code_file_async.py.j2 +2 -4
letta/utils.py +1 -1
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
letta/llm_api/anthropic.py +0 -1206
letta/llm_api/aws_bedrock.py +0 -104
letta/llm_api/azure_openai.py +0 -118
letta/llm_api/azure_openai_constants.py +0 -11
letta/llm_api/cohere.py +0 -391
letta/schemas/providers/cohere.py +0 -18
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0

letta/services/context_window_calculator/context_window_calculator.py CHANGED Viewed

@@ -21,7 +21,26 @@ class ContextWindowCalculator:
     @staticmethod
     def extract_system_components(system_message: str) -> Tuple[str, str, str]:
-        """Extract system prompt, core memory, and external memory summary from system message"""
+        """
+        Extract structured components from a formatted system message.
+        Parses the system message to extract three distinct sections marked by XML-style tags:
+        - base_instructions: The core system prompt and agent instructions
+        - memory_blocks: The agent's core memory (persistent context)
+        - memory_metadata: Metadata about external memory systems
+        Args:
+            system_message: A formatted system message containing XML-style section markers
+        Returns:
+            A tuple of (system_prompt, core_memory, external_memory_summary)
+            Each component will be an empty string if its section is not found
+        Note:
+            This method assumes a specific format with sections delimited by:
+            <base_instructions>, <memory_blocks>, and <memory_metadata> tags.
+            The extraction is position-based and expects sections in this order.
+        """
         base_start = system_message.find("<base_instructions>")
         memory_blocks_start = system_message.find("<memory_blocks>")
         metadata_start = system_message.find("<memory_metadata>")
@@ -43,7 +62,26 @@ class ContextWindowCalculator:
     @staticmethod
     def extract_summary_memory(messages: List[Any]) -> Tuple[Optional[str], int]:
-        """Extract summary memory if present and return starting index for real messages"""
+        """
+        Extract summary memory from the message list if present.
+        Summary memory is a special message injected at position 1 (after system message)
+        that contains a condensed summary of previous conversation history. This is used
+        when the full conversation history doesn't fit in the context window.
+        Args:
+            messages: List of message objects to search for summary memory
+        Returns:
+            A tuple of (summary_text, start_index) where:
+            - summary_text: The extracted summary content, or None if not found
+            - start_index: Index where actual conversation messages begin (1 or 2)
+        Detection Logic:
+            Looks for a user message at index 1 containing the phrase
+            "The following is a summary of the previous" which indicates
+            it's a summarized conversation history rather than a real user message.
+        """
         if (
             len(messages) > 1
             and messages[1].role == MessageRole.user

letta/services/helpers/agent_manager_helper.py CHANGED Viewed

@@ -20,9 +20,9 @@ from letta.constants import (
     MULTI_AGENT_TOOLS,
     STRUCTURED_OUTPUT_MODELS,
 )
-from letta.embeddings import embedding_model
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
+from letta.llm_api.llm_client import LLMClient
 from letta.orm.agent import Agent as AgentModel
 from letta.orm.agents_tags import AgentsTags
 from letta.orm.archives_agents import ArchivesAgents
@@ -156,7 +156,25 @@ def _process_tags(agent: "AgentModel", tags: List[str], replace=True):
         agent.tags.extend([tag for tag in new_tags if tag.tag not in existing_tags])
-def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool] = None, system: Optional[str] = None):
+def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool] = None, system: Optional[str] = None) -> str:
+    """
+    Derive the appropriate system message based on agent type and configuration.
+    This function determines which system prompt template to use based on the
+    agent's type and whether sleeptime functionality is enabled. If a custom
+    system message is provided, it returns that instead.
+    Args:
+        agent_type: The type of agent (e.g., memgpt_agent, sleeptime_agent, react_agent)
+        enable_sleeptime: Whether sleeptime tools should be available (affects prompt choice)
+        system: Optional custom system message to use instead of defaults
+    Returns:
+        The system message string appropriate for the agent configuration
+    Raises:
+        ValueError: If an invalid or unsupported agent type is provided
+    """
     if system is None:
         # TODO: don't hardcode
@@ -204,8 +222,33 @@ def compile_memory_metadata_block(
     memory_edit_timestamp: datetime,
     timezone: str,
     previous_message_count: int = 0,
-    archival_memory_size: int = 0,
+    archival_memory_size: Optional[int] = 0,
 ) -> str:
+    """
+    Generate a memory metadata block for the agent's system prompt.
+    This creates a structured metadata section that informs the agent about
+    the current state of its memory systems, including timing information
+    and memory counts. This helps the agent understand what information
+    is available through its tools.
+    Args:
+        memory_edit_timestamp: When memory blocks were last modified
+        timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
+        previous_message_count: Number of messages in recall memory (conversation history)
+        archival_memory_size: Number of items in archival memory (long-term storage)
+    Returns:
+        A formatted string containing the memory metadata block with XML-style tags
+    Example Output:
+        <memory_metadata>
+        - The current time is: 2024-01-15 10:30 AM PST
+        - Memory blocks were last modified: 2024-01-15 09:00 AM PST
+        - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
+        - 156 total memories you created are stored in archival memory (use tools to access them)
+        </memory_metadata>
+    """
     # Put the timestamp in the local timezone (mimicking get_local_time())
     timestamp_str = format_datetime(memory_edit_timestamp, timezone)
@@ -939,7 +982,7 @@ def _apply_relationship_filters(query, include_relationships: Optional[List[str]
     return query
-def build_passage_query(
+async def build_passage_query(
     actor: User,
     agent_id: Optional[str] = None,
     file_id: Optional[str] = None,
@@ -963,8 +1006,14 @@ def build_passage_query(
     if embed_query:
         assert embedding_config is not None, "embedding_config must be specified for vector search"
         assert query_text is not None, "query_text must be specified for vector search"
-        embedded_text = embedding_model(embedding_config).get_text_embedding(query_text)
-        embedded_text = np.array(embedded_text)
+        # Use the new LLMClient for embeddings
+        embedding_client = LLMClient.create(
+            provider_type=embedding_config.embedding_endpoint_type,
+            actor=actor,
+        )
+        embeddings = await embedding_client.request_embeddings([query_text], embedding_config)
+        embedded_text = np.array(embeddings[0])
         embedded_text = np.pad(embedded_text, (0, MAX_EMBEDDING_DIM - embedded_text.shape[0]), mode="constant").tolist()
     # Start with base query for source passages
@@ -1150,7 +1199,7 @@ def build_passage_query(
     return main_query
-def build_source_passage_query(
+async def build_source_passage_query(
     actor: User,
     agent_id: Optional[str] = None,
     file_id: Optional[str] = None,
@@ -1171,8 +1220,14 @@ def build_source_passage_query(
     if embed_query:
         assert embedding_config is not None, "embedding_config must be specified for vector search"
         assert query_text is not None, "query_text must be specified for vector search"
-        embedded_text = embedding_model(embedding_config).get_text_embedding(query_text)
-        embedded_text = np.array(embedded_text)
+        # Use the new LLMClient for embeddings
+        embedding_client = LLMClient.create(
+            provider_type=embedding_config.embedding_endpoint_type,
+            actor=actor,
+        )
+        embeddings = await embedding_client.request_embeddings([query_text], embedding_config)
+        embedded_text = np.array(embeddings[0])
         embedded_text = np.pad(embedded_text, (0, MAX_EMBEDDING_DIM - embedded_text.shape[0]), mode="constant").tolist()
     # Base query for source passages
@@ -1248,7 +1303,7 @@ def build_source_passage_query(
     return query
-def build_agent_passage_query(
+async def build_agent_passage_query(
     actor: User,
     agent_id: str,  # Required for agent passages
     query_text: Optional[str] = None,
@@ -1267,8 +1322,14 @@ def build_agent_passage_query(
     if embed_query:
         assert embedding_config is not None, "embedding_config must be specified for vector search"
         assert query_text is not None, "query_text must be specified for vector search"
-        embedded_text = embedding_model(embedding_config).get_text_embedding(query_text)
-        embedded_text = np.array(embedded_text)
+        # Use the new LLMClient for embeddings
+        embedding_client = LLMClient.create(
+            provider_type=embedding_config.embedding_endpoint_type,
+            actor=actor,
+        )
+        embeddings = await embedding_client.request_embeddings([query_text], embedding_config)
+        embedded_text = np.array(embeddings[0])
         embedded_text = np.pad(embedded_text, (0, MAX_EMBEDDING_DIM - embedded_text.shape[0]), mode="constant").tolist()
     # Base query for agent passages - join through archives_agents

letta/services/mcp_manager.py CHANGED Viewed

@@ -6,11 +6,19 @@ from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Tuple, Union
 from fastapi import HTTPException
-from sqlalchemy import null
+from sqlalchemy import delete, null
 from starlette.requests import Request
 import letta.constants as constants
-from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig
+from letta.functions.mcp_client.types import (
+    MCPServerType,
+    MCPTool,
+    MCPToolHealth,
+    SSEServerConfig,
+    StdioServerConfig,
+    StreamableHTTPServerConfig,
+)
+from letta.functions.schema_validator import validate_complete_json_schema
 from letta.log import get_logger
 from letta.orm.errors import NoResultFound
 from letta.orm.mcp_oauth import MCPOAuth, OAuthSessionStatus
@@ -49,6 +57,7 @@ class MCPManager:
     @enforce_types
     async def list_mcp_server_tools(self, mcp_server_name: str, actor: PydanticUser) -> List[MCPTool]:
         """Get a list of all tools for a specific MCP server."""
+        mcp_client = None
         try:
             mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
             mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
@@ -58,6 +67,13 @@ class MCPManager:
             # list tools
             tools = await mcp_client.list_tools()
+            # Add health information to each tool
+            for tool in tools:
+                if tool.inputSchema:
+                    health_status, reasons = validate_complete_json_schema(tool.inputSchema)
+                    tool.health = MCPToolHealth(status=health_status.value, reasons=reasons)
             return tools
         except Exception as e:
             # MCP tool listing errors are often due to connection/configuration issues, not system errors
@@ -65,7 +81,8 @@ class MCPManager:
             logger.info(f"Error listing tools for MCP server {mcp_server_name}: {e}")
             return []
         finally:
-            await mcp_client.cleanup()
+            if mcp_client:
+                await mcp_client.cleanup()
     @enforce_types
     async def execute_mcp_server_tool(
@@ -114,7 +131,16 @@ class MCPManager:
         mcp_tools = await self.list_mcp_server_tools(mcp_server_name, actor=actor)
         for mcp_tool in mcp_tools:
+            # TODO: @jnjpng move health check to tool class
             if mcp_tool.name == mcp_tool_name:
+                # Check tool health - reject only INVALID tools
+                if mcp_tool.health:
+                    if mcp_tool.health.status == "INVALID":
+                        raise ValueError(
+                            f"Tool {mcp_tool_name} cannot be attached, JSON schema is invalid."
+                            f"Reasons: {', '.join(mcp_tool.health.reasons)}"
+                        )
                 tool_create = ToolCreate.from_mcp(mcp_server_name=mcp_server_name, mcp_tool=mcp_tool)
                 return await self.tool_manager.create_mcp_tool_async(
                     tool_create=tool_create, mcp_server_name=mcp_server_name, mcp_server_id=mcp_server_id, actor=actor
@@ -169,17 +195,50 @@ class MCPManager:
     async def create_mcp_server(self, pydantic_mcp_server: MCPServer, actor: PydanticUser) -> MCPServer:
         """Create a new MCP server."""
         async with db_registry.async_session() as session:
-            # Set the organization id at the ORM layer
-            pydantic_mcp_server.organization_id = actor.organization_id
-            mcp_server_data = pydantic_mcp_server.model_dump(to_orm=True)
+            try:
+                # Set the organization id at the ORM layer
+                pydantic_mcp_server.organization_id = actor.organization_id
+                mcp_server_data = pydantic_mcp_server.model_dump(to_orm=True)
+                # Ensure custom_headers None is stored as SQL NULL, not JSON null
+                if mcp_server_data.get("custom_headers") is None:
+                    mcp_server_data.pop("custom_headers", None)
+                mcp_server = MCPServerModel(**mcp_server_data)
+                mcp_server = await mcp_server.create_async(session, actor=actor, no_commit=True)
+                # Link existing OAuth sessions for the same user and server URL
+                # This ensures OAuth sessions created during testing get linked to the server
+                server_url = getattr(mcp_server, "server_url", None)
+                if server_url:
+                    from sqlalchemy import select
+                    result = await session.execute(
+                        select(MCPOAuth).where(
+                            MCPOAuth.server_url == server_url,
+                            MCPOAuth.organization_id == actor.organization_id,
+                            MCPOAuth.user_id == actor.id,  # Only link sessions for the same user
+                            MCPOAuth.server_id.is_(None),  # Only update sessions not already linked
+                        )
+                    )
+                    oauth_sessions = result.scalars().all()
-            # Ensure custom_headers None is stored as SQL NULL, not JSON null
-            if mcp_server_data.get("custom_headers") is None:
-                mcp_server_data.pop("custom_headers", None)
+                    # TODO: @jnjpng we should upate sessions in bulk
+                    for oauth_session in oauth_sessions:
+                        oauth_session.server_id = mcp_server.id
+                        await oauth_session.update_async(db_session=session, actor=actor, no_commit=True)
-            mcp_server = MCPServerModel(**mcp_server_data)
-            mcp_server = await mcp_server.create_async(session, actor=actor)
-            return mcp_server.to_pydantic()
+                    if oauth_sessions:
+                        logger.info(
+                            f"Linked {len(oauth_sessions)} OAuth sessions to MCP server {mcp_server.id} (URL: {server_url}) for user {actor.id}"
+                        )
+                await session.commit()
+                return mcp_server.to_pydantic()
+            except Exception as e:
+                await session.rollback()
+                logger.error(f"Failed to create MCP server: {e}")
+                raise
     @enforce_types
     async def update_mcp_server_by_id(self, mcp_server_id: str, mcp_server_update: UpdateMCPServer, actor: PydanticUser) -> MCPServer:
@@ -252,7 +311,7 @@ class MCPManager:
     @enforce_types
     async def get_mcp_server(self, mcp_server_name: str, actor: PydanticUser) -> PydanticTool:
-        """Get a tool by name."""
+        """Get a MCP server by name."""
         async with db_registry.async_session() as session:
             mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor)
             mcp_server = await MCPServerModel.read_async(db_session=session, identifier=mcp_server_id, actor=actor)
@@ -286,13 +345,48 @@ class MCPManager:
     @enforce_types
     async def delete_mcp_server_by_id(self, mcp_server_id: str, actor: PydanticUser) -> None:
-        """Delete a tool by its ID."""
+        """Delete a MCP server by its ID."""
         async with db_registry.async_session() as session:
             try:
                 mcp_server = await MCPServerModel.read_async(db_session=session, identifier=mcp_server_id, actor=actor)
-                await mcp_server.hard_delete_async(db_session=session, actor=actor)
+                if not mcp_server:
+                    raise NoResultFound(f"MCP server with id {mcp_server_id} not found.")
+                server_url = getattr(mcp_server, "server_url", None)
+                # Delete OAuth sessions for the same user and server URL in the same transaction
+                # This handles orphaned sessions that were created during testing/connection
+                oauth_count = 0
+                if server_url:
+                    result = await session.execute(
+                        delete(MCPOAuth).where(
+                            MCPOAuth.server_url == server_url,
+                            MCPOAuth.organization_id == actor.organization_id,
+                            MCPOAuth.user_id == actor.id,  # Only delete sessions for the same user
+                        )
+                    )
+                    oauth_count = result.rowcount
+                    if oauth_count > 0:
+                        logger.info(
+                            f"Deleting {oauth_count} OAuth sessions for MCP server {mcp_server_id} (URL: {server_url}) for user {actor.id}"
+                        )
+                # Delete the MCP server, will cascade delete to linked OAuth sessions
+                await session.execute(
+                    delete(MCPServerModel).where(
+                        MCPServerModel.id == mcp_server_id,
+                        MCPServerModel.organization_id == actor.organization_id,
+                    )
+                )
+                await session.commit()
             except NoResultFound:
+                await session.rollback()
                 raise ValueError(f"MCP server with id {mcp_server_id} not found.")
+            except Exception as e:
+                await session.rollback()
+                logger.error(f"Failed to delete MCP server {mcp_server_id}: {e}")
+                raise
     def read_mcp_config(self) -> dict[str, Union[SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig]]:
         mcp_server_list = {}

letta/services/passage_manager.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 from datetime import datetime, timezone
 from functools import lru_cache
 from typing import List, Optional
@@ -7,8 +6,9 @@ from openai import AsyncOpenAI, OpenAI
 from sqlalchemy import select
 from letta.constants import MAX_EMBEDDING_DIM
-from letta.embeddings import embedding_model, parse_and_chunk_text
+from letta.embeddings import parse_and_chunk_text
 from letta.helpers.decorators import async_redis_cache
+from letta.llm_api.llm_client import LLMClient
 from letta.orm import ArchivesAgents
 from letta.orm.errors import NoResultFound
 from letta.orm.passage import ArchivalPassage, SourcePassage
@@ -460,7 +460,7 @@ class PassageManager:
     @enforce_types
     @trace_method
-    def insert_passage(
+    async def insert_passage(
         self,
         agent_state: AgentState,
         text: str,
@@ -469,45 +469,32 @@ class PassageManager:
         """Insert passage(s) into archival memory"""
         embedding_chunk_size = agent_state.embedding_config.embedding_chunk_size
+        embedding_client = LLMClient.create(
+            provider_type=agent_state.embedding_config.embedding_endpoint_type,
+            actor=actor,
+        )
-        # TODO eventually migrate off of llama-index for embeddings?
-        # Already causing pain for OpenAI proxy endpoints like LM Studio...
-        if agent_state.embedding_config.embedding_endpoint_type != "openai":
-            embed_model = embedding_model(agent_state.embedding_config)
+        # Get or create the default archive for the agent
+        archive = await self.archive_manager.get_or_create_default_archive_for_agent_async(
+            agent_id=agent_state.id, agent_name=agent_state.name, actor=actor
+        )
-        passages = []
+        text_chunks = list(parse_and_chunk_text(text, embedding_chunk_size))
-        try:
-            # breakup string into passages
-            for text in parse_and_chunk_text(text, embedding_chunk_size):
-                if agent_state.embedding_config.embedding_endpoint_type != "openai":
-                    embedding = embed_model.get_text_embedding(text)
-                else:
-                    # TODO should have the settings passed in via the server call
-                    embedding = get_openai_embedding(
-                        text,
-                        agent_state.embedding_config.embedding_model,
-                        agent_state.embedding_config.embedding_endpoint,
-                    )
+        if not text_chunks:
+            return []
-                if isinstance(embedding, dict):
-                    try:
-                        embedding = embedding["data"][0]["embedding"]
-                    except (KeyError, IndexError):
-                        # TODO as a fallback, see if we can find any lists in the payload
-                        raise TypeError(
-                            f"Got back an unexpected payload from text embedding function, type={type(embedding)}, value={embedding}"
-                        )
-                # Get or create the default archive for the agent
-                archive = self.archive_manager.get_or_create_default_archive_for_agent(
-                    agent_id=agent_state.id, agent_name=agent_state.name, actor=actor
-                )
+        try:
+            # Generate embeddings for all chunks using the new async API
+            embeddings = await embedding_client.request_embeddings(text_chunks, agent_state.embedding_config)
-                passage = self.create_agent_passage(
+            passages = []
+            for chunk_text, embedding in zip(text_chunks, embeddings):
+                passage = await self.create_agent_passage_async(
                     PydanticPassage(
                         organization_id=actor.organization_id,
                         archive_id=archive.id,
-                        text=text,
+                        text=chunk_text,
                         embedding=embedding,
                         embedding_config=agent_state.embedding_config,
                     ),
@@ -520,84 +507,16 @@ class PassageManager:
         except Exception as e:
             raise e
-    @enforce_types
-    @trace_method
-    async def insert_passage_async(
-        self,
-        agent_state: AgentState,
-        text: str,
-        actor: PydanticUser,
-        image_ids: Optional[List[str]] = None,
-    ) -> List[PydanticPassage]:
-        """Insert passage(s) into archival memory"""
-        # Get or create default archive for the agent
-        archive = await self.archive_manager.get_or_create_default_archive_for_agent_async(
-            agent_id=agent_state.id,
-            agent_name=agent_state.name,
+    async def _generate_embeddings_concurrent(self, text_chunks: List[str], embedding_config, actor: PydanticUser) -> List[List[float]]:
+        """Generate embeddings for all text chunks concurrently using LLMClient"""
+        embedding_client = LLMClient.create(
+            provider_type=embedding_config.embedding_endpoint_type,
             actor=actor,
         )
-        archive_id = archive.id
-        embedding_chunk_size = agent_state.embedding_config.embedding_chunk_size
-        text_chunks = list(parse_and_chunk_text(text, embedding_chunk_size))
-        if not text_chunks:
-            return []
-        try:
-            embeddings = await self._generate_embeddings_concurrent(text_chunks, agent_state.embedding_config)
-            passages = [
-                PydanticPassage(
-                    organization_id=actor.organization_id,
-                    archive_id=archive_id,
-                    text=chunk_text,
-                    embedding=embedding,
-                    embedding_config=agent_state.embedding_config,
-                )
-                for chunk_text, embedding in zip(text_chunks, embeddings)
-            ]
-            passages = await self.create_many_archival_passages_async(passages=passages, actor=actor)
-            return passages
-        except Exception as e:
-            raise e
-    async def _generate_embeddings_concurrent(self, text_chunks: List[str], embedding_config) -> List[List[float]]:
-        """Generate embeddings for all text chunks concurrently"""
-        if embedding_config.embedding_endpoint_type != "openai":
-            embed_model = embedding_model(embedding_config)
-            loop = asyncio.get_event_loop()
-            tasks = [loop.run_in_executor(None, embed_model.get_text_embedding, text) for text in text_chunks]
-            embeddings = await asyncio.gather(*tasks)
-        else:
-            tasks = [
-                get_openai_embedding_async(
-                    text,
-                    embedding_config.embedding_model,
-                    embedding_config.embedding_endpoint,
-                )
-                for text in text_chunks
-            ]
-            embeddings = await asyncio.gather(*tasks)
-        processed_embeddings = []
-        for embedding in embeddings:
-            if isinstance(embedding, dict):
-                try:
-                    processed_embeddings.append(embedding["data"][0]["embedding"])
-                except (KeyError, IndexError):
-                    raise TypeError(
-                        f"Got back an unexpected payload from text embedding function, type={type(embedding)}, value={embedding}"
-                    )
-            else:
-                processed_embeddings.append(embedding)
-        return processed_embeddings
+        embeddings = await embedding_client.request_embeddings(text_chunks, embedding_config)
+        return embeddings
     @enforce_types
     @trace_method

letta/services/provider_manager.py CHANGED Viewed

@@ -205,6 +205,28 @@ class ProviderManager:
         region = providers[0].region if providers else None
         return access_key, secret_key, region
+    @enforce_types
+    @trace_method
+    def get_azure_credentials(
+        self, provider_name: Union[str, None], actor: PydanticUser
+    ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+        providers = self.list_providers(name=provider_name, actor=actor)
+        api_key = providers[0].api_key if providers else None
+        base_url = providers[0].base_url if providers else None
+        api_version = providers[0].api_version if providers else None
+        return api_key, base_url, api_version
+    @enforce_types
+    @trace_method
+    async def get_azure_credentials_async(
+        self, provider_name: Union[str, None], actor: PydanticUser
+    ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+        providers = await self.list_providers_async(name=provider_name, actor=actor)
+        api_key = providers[0].api_key if providers else None
+        base_url = providers[0].base_url if providers else None
+        api_version = providers[0].api_version if providers else None
+        return api_key, base_url, api_version
     @enforce_types
     @trace_method
     async def check_provider_api_key(self, provider_check: ProviderCheck) -> None:
@@ -215,6 +237,8 @@ class ProviderManager:
             provider_category=ProviderCategory.byok,
             access_key=provider_check.access_key,  # This contains the access key ID for Bedrock
             region=provider_check.region,
+            base_url=provider_check.base_url,
+            api_version=provider_check.api_version,
         ).cast_to_subtype()
         # TODO: add more string sanity checks here before we hit actual endpoints

letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl