PyPI - letta-nightly - Versions diffs - 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl - Mend

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

letta/__init__.py +1 -1
letta/agents/helpers.py +4 -0
letta/agents/letta_agent.py +142 -5
letta/constants.py +10 -7
letta/data_sources/connectors.py +70 -53
letta/embeddings.py +3 -240
letta/errors.py +28 -0
letta/functions/function_sets/base.py +4 -4
letta/functions/functions.py +287 -32
letta/functions/mcp_client/types.py +11 -0
letta/functions/schema_validator.py +187 -0
letta/functions/typescript_parser.py +196 -0
letta/helpers/datetime_helpers.py +8 -4
letta/helpers/tool_execution_helper.py +25 -2
letta/llm_api/anthropic_client.py +23 -18
letta/llm_api/azure_client.py +73 -0
letta/llm_api/bedrock_client.py +8 -4
letta/llm_api/google_vertex_client.py +14 -5
letta/llm_api/llm_api_tools.py +2 -217
letta/llm_api/llm_client.py +15 -1
letta/llm_api/llm_client_base.py +32 -1
letta/llm_api/openai.py +1 -0
letta/llm_api/openai_client.py +18 -28
letta/llm_api/together_client.py +55 -0
letta/orm/provider.py +1 -0
letta/orm/step_metrics.py +40 -1
letta/otel/db_pool_monitoring.py +1 -1
letta/schemas/agent.py +3 -4
letta/schemas/agent_file.py +2 -0
letta/schemas/block.py +11 -5
letta/schemas/embedding_config.py +4 -5
letta/schemas/enums.py +1 -1
letta/schemas/job.py +2 -3
letta/schemas/llm_config.py +79 -7
letta/schemas/mcp.py +0 -24
letta/schemas/message.py +0 -108
letta/schemas/openai/chat_completion_request.py +1 -0
letta/schemas/providers/__init__.py +0 -2
letta/schemas/providers/anthropic.py +106 -8
letta/schemas/providers/azure.py +102 -8
letta/schemas/providers/base.py +10 -3
letta/schemas/providers/bedrock.py +28 -16
letta/schemas/providers/letta.py +3 -3
letta/schemas/providers/ollama.py +2 -12
letta/schemas/providers/openai.py +4 -4
letta/schemas/providers/together.py +14 -2
letta/schemas/sandbox_config.py +2 -1
letta/schemas/tool.py +46 -22
letta/server/rest_api/routers/v1/agents.py +179 -38
letta/server/rest_api/routers/v1/folders.py +13 -8
letta/server/rest_api/routers/v1/providers.py +10 -3
letta/server/rest_api/routers/v1/sources.py +14 -8
letta/server/rest_api/routers/v1/steps.py +17 -1
letta/server/rest_api/routers/v1/tools.py +96 -5
letta/server/rest_api/streaming_response.py +91 -45
letta/server/server.py +27 -38
letta/services/agent_manager.py +92 -20
letta/services/agent_serialization_manager.py +11 -7
letta/services/context_window_calculator/context_window_calculator.py +40 -2
letta/services/helpers/agent_manager_helper.py +73 -12
letta/services/mcp_manager.py +109 -15
letta/services/passage_manager.py +28 -109
letta/services/provider_manager.py +24 -0
letta/services/step_manager.py +68 -0
letta/services/summarizer/summarizer.py +1 -4
letta/services/tool_executor/core_tool_executor.py +1 -1
letta/services/tool_executor/sandbox_tool_executor.py +26 -9
letta/services/tool_manager.py +82 -5
letta/services/tool_sandbox/base.py +3 -11
letta/services/tool_sandbox/modal_constants.py +17 -0
letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
letta/services/tool_sandbox/modal_sandbox.py +218 -3
letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
letta/services/tool_sandbox/modal_version_manager.py +273 -0
letta/services/tool_sandbox/safe_pickle.py +193 -0
letta/settings.py +5 -3
letta/templates/sandbox_code_file.py.j2 +2 -4
letta/templates/sandbox_code_file_async.py.j2 +2 -4
letta/utils.py +1 -1
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
letta/llm_api/anthropic.py +0 -1206
letta/llm_api/aws_bedrock.py +0 -104
letta/llm_api/azure_openai.py +0 -118
letta/llm_api/azure_openai_constants.py +0 -11
letta/llm_api/cohere.py +0 -391
letta/schemas/providers/cohere.py +0 -18
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0

letta/functions/schema_validator.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+JSON Schema validator for OpenAI strict mode compliance.
+This module provides validation for JSON schemas to ensure they comply with
+OpenAI's strict mode requirements for tool schemas.
+"""
+from enum import Enum
+from typing import Any, Dict, List, Tuple
+class SchemaHealth(Enum):
+    """Schema health status for OpenAI strict mode compliance."""
+    STRICT_COMPLIANT = "STRICT_COMPLIANT"  # Passes OpenAI strict mode
+    NON_STRICT_ONLY = "NON_STRICT_ONLY"  # Valid JSON Schema but too loose for strict mode
+    INVALID = "INVALID"  # Broken for both
+def validate_complete_json_schema(schema: Dict[str, Any]) -> Tuple[SchemaHealth, List[str]]:
+    """
+    Validate schema for OpenAI tool strict mode compliance.
+    This validator checks for:
+    - Valid JSON Schema structure
+    - OpenAI strict mode requirements
+    - Special cases like required properties with empty object schemas
+    Args:
+        schema: The JSON schema to validate
+    Returns:
+        A tuple of (SchemaHealth, list_of_reasons)
+    """
+    reasons: List[str] = []
+    status = SchemaHealth.STRICT_COMPLIANT
+    def mark_non_strict(reason: str):
+        """Mark schema as non-strict only (valid but not strict-compliant)."""
+        nonlocal status
+        if status == SchemaHealth.STRICT_COMPLIANT:
+            status = SchemaHealth.NON_STRICT_ONLY
+        reasons.append(reason)
+    def mark_invalid(reason: str):
+        """Mark schema as invalid."""
+        nonlocal status
+        status = SchemaHealth.INVALID
+        reasons.append(reason)
+    def schema_allows_empty_object(obj_schema: Dict[str, Any]) -> bool:
+        """
+        Return True if this object schema allows {}, meaning no required props
+        and no additionalProperties content.
+        """
+        if obj_schema.get("type") != "object":
+            return False
+        props = obj_schema.get("properties", {})
+        required = obj_schema.get("required", [])
+        additional = obj_schema.get("additionalProperties", True)
+        # Empty object: no required props and additionalProperties is false
+        if not required and additional is False:
+            return True
+        return False
+    def schema_allows_empty_array(arr_schema: Dict[str, Any]) -> bool:
+        """
+        Return True if this array schema allows empty arrays with no constraints.
+        """
+        if arr_schema.get("type") != "array":
+            return False
+        # If minItems is set and > 0, it doesn't allow empty
+        min_items = arr_schema.get("minItems", 0)
+        if min_items > 0:
+            return False
+        # If items schema is not defined or very permissive, it allows empty
+        items = arr_schema.get("items")
+        if items is None:
+            return True
+        return False
+    def recurse(node: Dict[str, Any], path: str, is_root: bool = False):
+        """Recursively validate a schema node."""
+        node_type = node.get("type")
+        # Handle schemas without explicit type but with type-specific keywords
+        if not node_type:
+            # Check for type-specific keywords
+            if "properties" in node or "additionalProperties" in node:
+                node_type = "object"
+            elif "items" in node:
+                node_type = "array"
+            elif any(kw in node for kw in ["anyOf", "oneOf", "allOf"]):
+                # Union types don't require explicit type
+                pass
+            else:
+                mark_invalid(f"{path}: Missing 'type'")
+                return
+        # OBJECT
+        if node_type == "object":
+            props = node.get("properties")
+            if props is not None and not isinstance(props, dict):
+                mark_invalid(f"{path}: 'properties' must be a dict for objects")
+                return
+            if "additionalProperties" not in node:
+                mark_non_strict(f"{path}: 'additionalProperties' not explicitly set")
+            elif node["additionalProperties"] is not False:
+                mark_non_strict(f"{path}: 'additionalProperties' is not false (free-form object)")
+            required = node.get("required")
+            if required is None:
+                # Only mark as non-strict for nested objects, not root
+                if not is_root:
+                    mark_non_strict(f"{path}: 'required' not specified for object")
+                required = []
+            elif not isinstance(required, list):
+                mark_invalid(f"{path}: 'required' must be a list if present")
+                required = []
+            # OpenAI strict-mode extra checks:
+            for req_key in required:
+                if props and req_key not in props:
+                    mark_invalid(f"{path}: required contains '{req_key}' not found in properties")
+                elif props:
+                    req_schema = props[req_key]
+                    if isinstance(req_schema, dict):
+                        # Check for empty object issue
+                        if schema_allows_empty_object(req_schema):
+                            mark_invalid(f"{path}: required property '{req_key}' allows empty object (OpenAI will reject)")
+                        # Check for empty array issue
+                        if schema_allows_empty_array(req_schema):
+                            mark_invalid(f"{path}: required property '{req_key}' allows empty array (OpenAI will reject)")
+            # Recurse into properties
+            if props:
+                for prop_name, prop_schema in props.items():
+                    if isinstance(prop_schema, dict):
+                        recurse(prop_schema, f"{path}.properties.{prop_name}", is_root=False)
+                    else:
+                        mark_invalid(f"{path}.properties.{prop_name}: Not a valid schema dict")
+        # ARRAY
+        elif node_type == "array":
+            items = node.get("items")
+            if items is None:
+                mark_invalid(f"{path}: 'items' must be defined for arrays in strict mode")
+            elif not isinstance(items, dict):
+                mark_invalid(f"{path}: 'items' must be a schema dict for arrays")
+            else:
+                recurse(items, f"{path}.items", is_root=False)
+        # PRIMITIVE TYPES
+        elif node_type in ["string", "number", "integer", "boolean", "null"]:
+            # These are generally fine, but check for specific constraints
+            pass
+        # UNION TYPES
+        for kw in ("anyOf", "oneOf", "allOf"):
+            if kw in node:
+                if not isinstance(node[kw], list):
+                    mark_invalid(f"{path}: '{kw}' must be a list")
+                else:
+                    for idx, sub_schema in enumerate(node[kw]):
+                        if isinstance(sub_schema, dict):
+                            recurse(sub_schema, f"{path}.{kw}[{idx}]", is_root=False)
+                        else:
+                            mark_invalid(f"{path}.{kw}[{idx}]: Not a valid schema dict")
+    # Start validation
+    if not isinstance(schema, dict):
+        return SchemaHealth.INVALID, ["Top-level schema must be a dict"]
+    # OpenAI tools require top-level type to be object
+    if schema.get("type") != "object":
+        mark_invalid("Top-level schema 'type' must be 'object' for OpenAI tools")
+    # Begin recursive validation
+    recurse(schema, "root", is_root=True)
+    return status, reasons

letta/functions/typescript_parser.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""TypeScript function parsing for JSON schema generation."""
+import re
+from typing import Any, Dict, Optional
+from letta.errors import LettaToolCreateError
+def derive_typescript_json_schema(source_code: str, name: Optional[str] = None) -> dict:
+    """Derives the OpenAI JSON schema for a given TypeScript function source code.
+    This parser extracts the function signature, parameters, and types from TypeScript
+    code and generates a JSON schema compatible with OpenAI's function calling format.
+    Args:
+        source_code: TypeScript source code containing an exported function
+        name: Optional function name override
+    Returns:
+        JSON schema dict with name, description, and parameters
+    Raises:
+        LettaToolCreateError: If parsing fails or no exported function is found
+    """
+    try:
+        # Find the exported function
+        function_pattern = r"export\s+function\s+(\w+)\s*\((.*?)\)\s*:\s*([\w<>\[\]|]+)?"
+        match = re.search(function_pattern, source_code, re.DOTALL)
+        if not match:
+            # Try async function
+            async_pattern = r"export\s+async\s+function\s+(\w+)\s*\((.*?)\)\s*:\s*([\w<>\[\]|]+)?"
+            match = re.search(async_pattern, source_code, re.DOTALL)
+        if not match:
+            raise LettaToolCreateError("No exported function found in TypeScript source code")
+        func_name = match.group(1)
+        params_str = match.group(2).strip()
+        # return_type = match.group(3) if match.group(3) else 'any'
+        # Use provided name or extracted name
+        schema_name = name or func_name
+        # Extract JSDoc comment for description
+        description = extract_jsdoc_description(source_code, func_name)
+        if not description:
+            description = f"TypeScript function {func_name}"
+        # Parse parameters
+        parameters = parse_typescript_parameters(params_str)
+        # Build OpenAI-compatible JSON schema
+        schema = {
+            "name": schema_name,
+            "description": description,
+            "parameters": {"type": "object", "properties": parameters["properties"], "required": parameters["required"]},
+        }
+        return schema
+    except Exception as e:
+        raise LettaToolCreateError(f"TypeScript schema generation failed: {str(e)}") from e
+def extract_jsdoc_description(source_code: str, func_name: str) -> Optional[str]:
+    """Extract JSDoc description for a function."""
+    # Look for JSDoc comment before the function
+    jsdoc_pattern = r"/\*\*(.*?)\*/\s*export\s+(?:async\s+)?function\s+" + re.escape(func_name)
+    match = re.search(jsdoc_pattern, source_code, re.DOTALL)
+    if match:
+        jsdoc_content = match.group(1)
+        # Extract the main description (text before @param tags)
+        lines = jsdoc_content.split("\n")
+        description_lines = []
+        for line in lines:
+            line = line.strip().lstrip("*").strip()
+            if line and not line.startswith("@"):
+                description_lines.append(line)
+            elif line.startswith("@"):
+                break
+        if description_lines:
+            return " ".join(description_lines)
+    return None
+def parse_typescript_parameters(params_str: str) -> Dict[str, Any]:
+    """Parse TypeScript function parameters and generate JSON schema properties."""
+    properties = {}
+    required = []
+    if not params_str:
+        return {"properties": properties, "required": required}
+    # Split parameters by comma (handling nested types)
+    params = split_parameters(params_str)
+    for param in params:
+        param = param.strip()
+        if not param:
+            continue
+        # Parse parameter name, optional flag, and type
+        param_match = re.match(r"(\w+)(\?)?\s*:\s*(.+)", param)
+        if param_match:
+            param_name = param_match.group(1)
+            is_optional = param_match.group(2) == "?"
+            param_type = param_match.group(3).strip()
+            # Convert TypeScript type to JSON schema type
+            json_type = typescript_to_json_schema_type(param_type)
+            properties[param_name] = json_type
+            # Add to required list if not optional
+            if not is_optional:
+                required.append(param_name)
+    return {"properties": properties, "required": required}
+def split_parameters(params_str: str) -> list:
+    """Split parameter string by commas, handling nested types."""
+    params = []
+    current_param = ""
+    depth = 0
+    for char in params_str:
+        if char in "<[{(":
+            depth += 1
+        elif char in ">]})":
+            depth -= 1
+        elif char == "," and depth == 0:
+            params.append(current_param)
+            current_param = ""
+            continue
+        current_param += char
+    if current_param:
+        params.append(current_param)
+    return params
+def typescript_to_json_schema_type(ts_type: str) -> Dict[str, Any]:
+    """Convert TypeScript type to JSON schema type definition."""
+    ts_type = ts_type.strip()
+    # Basic type mappings
+    type_map = {
+        "string": {"type": "string"},
+        "number": {"type": "number"},
+        "boolean": {"type": "boolean"},
+        "any": {"type": "string"},  # Default to string for any
+        "void": {"type": "null"},
+        "null": {"type": "null"},
+        "undefined": {"type": "null"},
+    }
+    # Check for basic types
+    if ts_type in type_map:
+        return type_map[ts_type]
+    # Handle arrays
+    if ts_type.endswith("[]"):
+        item_type = ts_type[:-2].strip()
+        return {"type": "array", "items": typescript_to_json_schema_type(item_type)}
+    # Handle Array<T> syntax
+    array_match = re.match(r"Array<(.+)>", ts_type)
+    if array_match:
+        item_type = array_match.group(1)
+        return {"type": "array", "items": typescript_to_json_schema_type(item_type)}
+    # Handle union types (simplified - just use string)
+    if "|" in ts_type:
+        # For union types, we'll default to string for simplicity
+        # A more sophisticated parser could handle this better
+        return {"type": "string"}
+    # Handle object types (simplified)
+    if ts_type.startswith("{") and ts_type.endswith("}"):
+        return {"type": "object"}
+    # Handle Record<K, V> and similar generic types
+    record_match = re.match(r"Record<(.+),\s*(.+)>", ts_type)
+    if record_match:
+        return {"type": "object", "additionalProperties": typescript_to_json_schema_type(record_match.group(2))}
+    # Default case - treat unknown types as objects
+    return {"type": "object"}

letta/helpers/datetime_helpers.py CHANGED Viewed

@@ -118,7 +118,7 @@ class AsyncTimer:
     def __init__(self, callback_func: Callable | None = None):
         self._start_time_ns = None
         self._end_time_ns = None
-        self.elapsed_ns = None
+        self._elapsed_ns = None
         self.callback_func = callback_func
     async def __aenter__(self):
@@ -127,7 +127,7 @@ class AsyncTimer:
     async def __aexit__(self, exc_type, exc, tb):
         self._end_time_ns = time.perf_counter_ns()
-        self.elapsed_ns = self._end_time_ns - self._start_time_ns
+        self._elapsed_ns = self._end_time_ns - self._start_time_ns
         if self.callback_func:
             from asyncio import iscoroutinefunction
@@ -139,6 +139,10 @@ class AsyncTimer:
     @property
     def elapsed_ms(self):
-        if self.elapsed_ns is not None:
-            return ns_to_ms(self.elapsed_ns)
+        if self._elapsed_ns is not None:
+            return ns_to_ms(self._elapsed_ns)
         return None
+    @property
+    def elapsed_ns(self):
+        return self._elapsed_ns

letta/helpers/tool_execution_helper.py CHANGED Viewed

@@ -2,21 +2,39 @@ from collections import OrderedDict
 from typing import Any, Dict, Optional
 from letta.constants import PRE_EXECUTION_MESSAGE_ARG
+from letta.schemas.tool import MCP_TOOL_METADATA_SCHEMA_STATUS, MCP_TOOL_METADATA_SCHEMA_WARNINGS
+from letta.utils import get_logger
+logger = get_logger(__name__)
 def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
     """Enables strict mode for a tool schema by setting 'strict' to True and
     disallowing additional properties in the parameters.
+    If the tool schema is NON_STRICT_ONLY, strict mode will not be applied.
     Args:
         tool_schema (Dict[str, Any]): The original tool schema.
     Returns:
-        Dict[str, Any]: A new tool schema with strict mode enabled.
+        Dict[str, Any]: A new tool schema with strict mode conditionally enabled.
     """
     schema = tool_schema.copy()
-    # Enable strict mode
+    # Check if schema has status metadata indicating NON_STRICT_ONLY
+    schema_status = schema.get(MCP_TOOL_METADATA_SCHEMA_STATUS)
+    if schema_status == "NON_STRICT_ONLY":
+        # Don't apply strict mode for non-strict schemas
+        # Remove the metadata fields from the schema
+        schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
+        schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
+        return schema
+    elif schema_status == "INVALID":
+        # We should not be hitting this and allowing invalid schemas to be used
+        logger.error(f"Tool schema {schema} is invalid: {schema.get(MCP_TOOL_METADATA_SCHEMA_WARNINGS)}")
+    # Enable strict mode for STRICT_COMPLIANT or unspecified health status
     schema["strict"] = True
     # Ensure parameters is a valid dictionary
@@ -26,6 +44,11 @@ def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
         # Set additionalProperties to False
         parameters["additionalProperties"] = False
         schema["parameters"] = parameters
+    # Remove the metadata fields from the schema
+    schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
+    schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
     return schema

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -31,14 +31,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
-from letta.schemas.enums import ProviderCategory
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
 from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
-from letta.services.provider_manager import ProviderManager
 from letta.settings import model_settings
 DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
@@ -65,7 +63,13 @@ class AnthropicClient(LLMClientBase):
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         request_data["stream"] = True
-        return await client.beta.messages.create(**request_data)
+        # Add fine-grained tool streaming beta header for better streaming performance
+        # This helps reduce buffering when streaming tool call parameters
+        # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
+        betas = ["fine-grained-tool-streaming-2025-05-14"]
+        return await client.beta.messages.create(**request_data, betas=betas)
     @trace_method
     async def send_llm_batch_request_async(
@@ -122,19 +126,17 @@ class AnthropicClient(LLMClientBase):
     def _get_anthropic_client(
         self, llm_config: LLMConfig, async_client: bool = False
     ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
-        override_key = None
-        if llm_config.provider_category == ProviderCategory.byok:
-            override_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
+        api_key, _, _ = self.get_byok_overrides(llm_config)
         if async_client:
             return (
-                anthropic.AsyncAnthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
-                if override_key
+                anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
+                if api_key
                 else anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries)
             )
         return (
-            anthropic.Anthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
-            if override_key
+            anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
+            if api_key
             else anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries)
         )
@@ -142,19 +144,17 @@ class AnthropicClient(LLMClientBase):
     async def _get_anthropic_client_async(
         self, llm_config: LLMConfig, async_client: bool = False
     ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
-        override_key = None
-        if llm_config.provider_category == ProviderCategory.byok:
-            override_key = await ProviderManager().get_override_key_async(llm_config.provider_name, actor=self.actor)
+        api_key, _, _ = await self.get_byok_overrides_async(llm_config)
         if async_client:
             return (
-                anthropic.AsyncAnthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
-                if override_key
+                anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
+                if api_key
                 else anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries)
             )
         return (
-            anthropic.Anthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
-            if override_key
+            anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
+            if api_key
             else anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries)
         )
@@ -183,9 +183,14 @@ class AnthropicClient(LLMClientBase):
         # Extended Thinking
         if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
+            thinking_budget = max(llm_config.max_reasoning_tokens, 1024)
+            if thinking_budget != llm_config.max_reasoning_tokens:
+                logger.warning(
+                    f"Max reasoning tokens must be at least 1024 for Claude. Setting max_reasoning_tokens to 1024 for model {llm_config.model}."
+                )
             data["thinking"] = {
                 "type": "enabled",
-                "budget_tokens": llm_config.max_reasoning_tokens,
+                "budget_tokens": thinking_budget,
             }
             # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
             data["temperature"] = 1.0

letta/llm_api/azure_client.py ADDED Viewed

@@ -0,0 +1,73 @@
+import os
+from typing import List, Optional, Tuple
+from openai import AsyncAzureOpenAI, AzureOpenAI
+from openai.types.chat.chat_completion import ChatCompletion
+from letta.llm_api.openai_client import OpenAIClient
+from letta.otel.tracing import trace_method
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.enums import ProviderCategory
+from letta.schemas.llm_config import LLMConfig
+from letta.settings import model_settings
+class AzureClient(OpenAIClient):
+    def get_byok_overrides(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+        if llm_config.provider_category == ProviderCategory.byok:
+            from letta.services.provider_manager import ProviderManager
+            return ProviderManager().get_azure_credentials(llm_config.provider_name, actor=self.actor)
+        return None, None, None
+    async def get_byok_overrides_async(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+        if llm_config.provider_category == ProviderCategory.byok:
+            from letta.services.provider_manager import ProviderManager
+            return await ProviderManager().get_azure_credentials_async(llm_config.provider_name, actor=self.actor)
+        return None, None, None
+    @trace_method
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Performs underlying synchronous request to OpenAI API and returns raw response dict.
+        """
+        api_key, base_url, api_version = self.get_byok_overrides(llm_config)
+        if not api_key or not base_url or not api_version:
+            api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
+            base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
+            api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
+        client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
+        response: ChatCompletion = client.chat.completions.create(**request_data)
+        return response.model_dump()
+    @trace_method
+    async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Performs underlying asynchronous request to OpenAI API and returns raw response dict.
+        """
+        api_key, base_url, api_version = await self.get_byok_overrides_async(llm_config)
+        if not api_key or not base_url or not api_version:
+            api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
+            base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
+            api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
+        client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
+        response: ChatCompletion = await client.chat.completions.create(**request_data)
+        return response.model_dump()
+    @trace_method
+    async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
+        """Request embeddings given texts and embedding config"""
+        api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
+        base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
+        api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
+        client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url)
+        response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
+        # TODO: add total usage
+        return [r.embedding for r in response.data]

letta/llm_api/bedrock_client.py CHANGED Viewed

@@ -17,10 +17,7 @@ logger = get_logger(__name__)
 class BedrockClient(AnthropicClient):
-    @trace_method
-    async def _get_anthropic_client_async(
-        self, llm_config: LLMConfig, async_client: bool = False
-    ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic, anthropic.AsyncAnthropicBedrock, anthropic.AnthropicBedrock]:
+    async def get_byok_overrides_async(self, llm_config: LLMConfig) -> tuple[str, str, str]:
         override_access_key_id, override_secret_access_key, override_default_region = None, None, None
         if llm_config.provider_category == ProviderCategory.byok:
             (
@@ -31,6 +28,13 @@ class BedrockClient(AnthropicClient):
                 llm_config.provider_name,
                 actor=self.actor,
             )
+        return override_access_key_id, override_secret_access_key, override_default_regions
+    @trace_method
+    async def _get_anthropic_client_async(
+        self, llm_config: LLMConfig, async_client: bool = False
+    ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic, anthropic.AsyncAnthropicBedrock, anthropic.AnthropicBedrock]:
+        override_access_key_id, override_secret_access_key, override_default_region = await self.get_byok_overrides_async(llm_config)
         session = Session()
         async with session.client(

letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl