letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/helpers.py +4 -0
  3. letta/agents/letta_agent.py +142 -5
  4. letta/constants.py +10 -7
  5. letta/data_sources/connectors.py +70 -53
  6. letta/embeddings.py +3 -240
  7. letta/errors.py +28 -0
  8. letta/functions/function_sets/base.py +4 -4
  9. letta/functions/functions.py +287 -32
  10. letta/functions/mcp_client/types.py +11 -0
  11. letta/functions/schema_validator.py +187 -0
  12. letta/functions/typescript_parser.py +196 -0
  13. letta/helpers/datetime_helpers.py +8 -4
  14. letta/helpers/tool_execution_helper.py +25 -2
  15. letta/llm_api/anthropic_client.py +23 -18
  16. letta/llm_api/azure_client.py +73 -0
  17. letta/llm_api/bedrock_client.py +8 -4
  18. letta/llm_api/google_vertex_client.py +14 -5
  19. letta/llm_api/llm_api_tools.py +2 -217
  20. letta/llm_api/llm_client.py +15 -1
  21. letta/llm_api/llm_client_base.py +32 -1
  22. letta/llm_api/openai.py +1 -0
  23. letta/llm_api/openai_client.py +18 -28
  24. letta/llm_api/together_client.py +55 -0
  25. letta/orm/provider.py +1 -0
  26. letta/orm/step_metrics.py +40 -1
  27. letta/otel/db_pool_monitoring.py +1 -1
  28. letta/schemas/agent.py +3 -4
  29. letta/schemas/agent_file.py +2 -0
  30. letta/schemas/block.py +11 -5
  31. letta/schemas/embedding_config.py +4 -5
  32. letta/schemas/enums.py +1 -1
  33. letta/schemas/job.py +2 -3
  34. letta/schemas/llm_config.py +79 -7
  35. letta/schemas/mcp.py +0 -24
  36. letta/schemas/message.py +0 -108
  37. letta/schemas/openai/chat_completion_request.py +1 -0
  38. letta/schemas/providers/__init__.py +0 -2
  39. letta/schemas/providers/anthropic.py +106 -8
  40. letta/schemas/providers/azure.py +102 -8
  41. letta/schemas/providers/base.py +10 -3
  42. letta/schemas/providers/bedrock.py +28 -16
  43. letta/schemas/providers/letta.py +3 -3
  44. letta/schemas/providers/ollama.py +2 -12
  45. letta/schemas/providers/openai.py +4 -4
  46. letta/schemas/providers/together.py +14 -2
  47. letta/schemas/sandbox_config.py +2 -1
  48. letta/schemas/tool.py +46 -22
  49. letta/server/rest_api/routers/v1/agents.py +179 -38
  50. letta/server/rest_api/routers/v1/folders.py +13 -8
  51. letta/server/rest_api/routers/v1/providers.py +10 -3
  52. letta/server/rest_api/routers/v1/sources.py +14 -8
  53. letta/server/rest_api/routers/v1/steps.py +17 -1
  54. letta/server/rest_api/routers/v1/tools.py +96 -5
  55. letta/server/rest_api/streaming_response.py +91 -45
  56. letta/server/server.py +27 -38
  57. letta/services/agent_manager.py +92 -20
  58. letta/services/agent_serialization_manager.py +11 -7
  59. letta/services/context_window_calculator/context_window_calculator.py +40 -2
  60. letta/services/helpers/agent_manager_helper.py +73 -12
  61. letta/services/mcp_manager.py +109 -15
  62. letta/services/passage_manager.py +28 -109
  63. letta/services/provider_manager.py +24 -0
  64. letta/services/step_manager.py +68 -0
  65. letta/services/summarizer/summarizer.py +1 -4
  66. letta/services/tool_executor/core_tool_executor.py +1 -1
  67. letta/services/tool_executor/sandbox_tool_executor.py +26 -9
  68. letta/services/tool_manager.py +82 -5
  69. letta/services/tool_sandbox/base.py +3 -11
  70. letta/services/tool_sandbox/modal_constants.py +17 -0
  71. letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
  72. letta/services/tool_sandbox/modal_sandbox.py +218 -3
  73. letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
  74. letta/services/tool_sandbox/modal_version_manager.py +273 -0
  75. letta/services/tool_sandbox/safe_pickle.py +193 -0
  76. letta/settings.py +5 -3
  77. letta/templates/sandbox_code_file.py.j2 +2 -4
  78. letta/templates/sandbox_code_file_async.py.j2 +2 -4
  79. letta/utils.py +1 -1
  80. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
  81. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
  82. letta/llm_api/anthropic.py +0 -1206
  83. letta/llm_api/aws_bedrock.py +0 -104
  84. letta/llm_api/azure_openai.py +0 -118
  85. letta/llm_api/azure_openai_constants.py +0 -11
  86. letta/llm_api/cohere.py +0 -391
  87. letta/schemas/providers/cohere.py +0 -18
  88. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
  89. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
  90. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,187 @@
1
+ """
2
+ JSON Schema validator for OpenAI strict mode compliance.
3
+
4
+ This module provides validation for JSON schemas to ensure they comply with
5
+ OpenAI's strict mode requirements for tool schemas.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Any, Dict, List, Tuple
10
+
11
+
12
+ class SchemaHealth(Enum):
13
+ """Schema health status for OpenAI strict mode compliance."""
14
+
15
+ STRICT_COMPLIANT = "STRICT_COMPLIANT" # Passes OpenAI strict mode
16
+ NON_STRICT_ONLY = "NON_STRICT_ONLY" # Valid JSON Schema but too loose for strict mode
17
+ INVALID = "INVALID" # Broken for both
18
+
19
+
20
+ def validate_complete_json_schema(schema: Dict[str, Any]) -> Tuple[SchemaHealth, List[str]]:
21
+ """
22
+ Validate schema for OpenAI tool strict mode compliance.
23
+
24
+ This validator checks for:
25
+ - Valid JSON Schema structure
26
+ - OpenAI strict mode requirements
27
+ - Special cases like required properties with empty object schemas
28
+
29
+ Args:
30
+ schema: The JSON schema to validate
31
+
32
+ Returns:
33
+ A tuple of (SchemaHealth, list_of_reasons)
34
+ """
35
+
36
+ reasons: List[str] = []
37
+ status = SchemaHealth.STRICT_COMPLIANT
38
+
39
+ def mark_non_strict(reason: str):
40
+ """Mark schema as non-strict only (valid but not strict-compliant)."""
41
+ nonlocal status
42
+ if status == SchemaHealth.STRICT_COMPLIANT:
43
+ status = SchemaHealth.NON_STRICT_ONLY
44
+ reasons.append(reason)
45
+
46
+ def mark_invalid(reason: str):
47
+ """Mark schema as invalid."""
48
+ nonlocal status
49
+ status = SchemaHealth.INVALID
50
+ reasons.append(reason)
51
+
52
+ def schema_allows_empty_object(obj_schema: Dict[str, Any]) -> bool:
53
+ """
54
+ Return True if this object schema allows {}, meaning no required props
55
+ and no additionalProperties content.
56
+ """
57
+ if obj_schema.get("type") != "object":
58
+ return False
59
+ props = obj_schema.get("properties", {})
60
+ required = obj_schema.get("required", [])
61
+ additional = obj_schema.get("additionalProperties", True)
62
+
63
+ # Empty object: no required props and additionalProperties is false
64
+ if not required and additional is False:
65
+ return True
66
+ return False
67
+
68
+ def schema_allows_empty_array(arr_schema: Dict[str, Any]) -> bool:
69
+ """
70
+ Return True if this array schema allows empty arrays with no constraints.
71
+ """
72
+ if arr_schema.get("type") != "array":
73
+ return False
74
+
75
+ # If minItems is set and > 0, it doesn't allow empty
76
+ min_items = arr_schema.get("minItems", 0)
77
+ if min_items > 0:
78
+ return False
79
+
80
+ # If items schema is not defined or very permissive, it allows empty
81
+ items = arr_schema.get("items")
82
+ if items is None:
83
+ return True
84
+
85
+ return False
86
+
87
+ def recurse(node: Dict[str, Any], path: str, is_root: bool = False):
88
+ """Recursively validate a schema node."""
89
+ node_type = node.get("type")
90
+
91
+ # Handle schemas without explicit type but with type-specific keywords
92
+ if not node_type:
93
+ # Check for type-specific keywords
94
+ if "properties" in node or "additionalProperties" in node:
95
+ node_type = "object"
96
+ elif "items" in node:
97
+ node_type = "array"
98
+ elif any(kw in node for kw in ["anyOf", "oneOf", "allOf"]):
99
+ # Union types don't require explicit type
100
+ pass
101
+ else:
102
+ mark_invalid(f"{path}: Missing 'type'")
103
+ return
104
+
105
+ # OBJECT
106
+ if node_type == "object":
107
+ props = node.get("properties")
108
+ if props is not None and not isinstance(props, dict):
109
+ mark_invalid(f"{path}: 'properties' must be a dict for objects")
110
+ return
111
+
112
+ if "additionalProperties" not in node:
113
+ mark_non_strict(f"{path}: 'additionalProperties' not explicitly set")
114
+ elif node["additionalProperties"] is not False:
115
+ mark_non_strict(f"{path}: 'additionalProperties' is not false (free-form object)")
116
+
117
+ required = node.get("required")
118
+ if required is None:
119
+ # Only mark as non-strict for nested objects, not root
120
+ if not is_root:
121
+ mark_non_strict(f"{path}: 'required' not specified for object")
122
+ required = []
123
+ elif not isinstance(required, list):
124
+ mark_invalid(f"{path}: 'required' must be a list if present")
125
+ required = []
126
+
127
+ # OpenAI strict-mode extra checks:
128
+ for req_key in required:
129
+ if props and req_key not in props:
130
+ mark_invalid(f"{path}: required contains '{req_key}' not found in properties")
131
+ elif props:
132
+ req_schema = props[req_key]
133
+ if isinstance(req_schema, dict):
134
+ # Check for empty object issue
135
+ if schema_allows_empty_object(req_schema):
136
+ mark_invalid(f"{path}: required property '{req_key}' allows empty object (OpenAI will reject)")
137
+ # Check for empty array issue
138
+ if schema_allows_empty_array(req_schema):
139
+ mark_invalid(f"{path}: required property '{req_key}' allows empty array (OpenAI will reject)")
140
+
141
+ # Recurse into properties
142
+ if props:
143
+ for prop_name, prop_schema in props.items():
144
+ if isinstance(prop_schema, dict):
145
+ recurse(prop_schema, f"{path}.properties.{prop_name}", is_root=False)
146
+ else:
147
+ mark_invalid(f"{path}.properties.{prop_name}: Not a valid schema dict")
148
+
149
+ # ARRAY
150
+ elif node_type == "array":
151
+ items = node.get("items")
152
+ if items is None:
153
+ mark_invalid(f"{path}: 'items' must be defined for arrays in strict mode")
154
+ elif not isinstance(items, dict):
155
+ mark_invalid(f"{path}: 'items' must be a schema dict for arrays")
156
+ else:
157
+ recurse(items, f"{path}.items", is_root=False)
158
+
159
+ # PRIMITIVE TYPES
160
+ elif node_type in ["string", "number", "integer", "boolean", "null"]:
161
+ # These are generally fine, but check for specific constraints
162
+ pass
163
+
164
+ # UNION TYPES
165
+ for kw in ("anyOf", "oneOf", "allOf"):
166
+ if kw in node:
167
+ if not isinstance(node[kw], list):
168
+ mark_invalid(f"{path}: '{kw}' must be a list")
169
+ else:
170
+ for idx, sub_schema in enumerate(node[kw]):
171
+ if isinstance(sub_schema, dict):
172
+ recurse(sub_schema, f"{path}.{kw}[{idx}]", is_root=False)
173
+ else:
174
+ mark_invalid(f"{path}.{kw}[{idx}]: Not a valid schema dict")
175
+
176
+ # Start validation
177
+ if not isinstance(schema, dict):
178
+ return SchemaHealth.INVALID, ["Top-level schema must be a dict"]
179
+
180
+ # OpenAI tools require top-level type to be object
181
+ if schema.get("type") != "object":
182
+ mark_invalid("Top-level schema 'type' must be 'object' for OpenAI tools")
183
+
184
+ # Begin recursive validation
185
+ recurse(schema, "root", is_root=True)
186
+
187
+ return status, reasons
@@ -0,0 +1,196 @@
1
+ """TypeScript function parsing for JSON schema generation."""
2
+
3
+ import re
4
+ from typing import Any, Dict, Optional
5
+
6
+ from letta.errors import LettaToolCreateError
7
+
8
+
9
+ def derive_typescript_json_schema(source_code: str, name: Optional[str] = None) -> dict:
10
+ """Derives the OpenAI JSON schema for a given TypeScript function source code.
11
+
12
+ This parser extracts the function signature, parameters, and types from TypeScript
13
+ code and generates a JSON schema compatible with OpenAI's function calling format.
14
+
15
+ Args:
16
+ source_code: TypeScript source code containing an exported function
17
+ name: Optional function name override
18
+
19
+ Returns:
20
+ JSON schema dict with name, description, and parameters
21
+
22
+ Raises:
23
+ LettaToolCreateError: If parsing fails or no exported function is found
24
+ """
25
+ try:
26
+ # Find the exported function
27
+ function_pattern = r"export\s+function\s+(\w+)\s*\((.*?)\)\s*:\s*([\w<>\[\]|]+)?"
28
+ match = re.search(function_pattern, source_code, re.DOTALL)
29
+
30
+ if not match:
31
+ # Try async function
32
+ async_pattern = r"export\s+async\s+function\s+(\w+)\s*\((.*?)\)\s*:\s*([\w<>\[\]|]+)?"
33
+ match = re.search(async_pattern, source_code, re.DOTALL)
34
+
35
+ if not match:
36
+ raise LettaToolCreateError("No exported function found in TypeScript source code")
37
+
38
+ func_name = match.group(1)
39
+ params_str = match.group(2).strip()
40
+ # return_type = match.group(3) if match.group(3) else 'any'
41
+
42
+ # Use provided name or extracted name
43
+ schema_name = name or func_name
44
+
45
+ # Extract JSDoc comment for description
46
+ description = extract_jsdoc_description(source_code, func_name)
47
+ if not description:
48
+ description = f"TypeScript function {func_name}"
49
+
50
+ # Parse parameters
51
+ parameters = parse_typescript_parameters(params_str)
52
+
53
+ # Build OpenAI-compatible JSON schema
54
+ schema = {
55
+ "name": schema_name,
56
+ "description": description,
57
+ "parameters": {"type": "object", "properties": parameters["properties"], "required": parameters["required"]},
58
+ }
59
+
60
+ return schema
61
+
62
+ except Exception as e:
63
+ raise LettaToolCreateError(f"TypeScript schema generation failed: {str(e)}") from e
64
+
65
+
66
+ def extract_jsdoc_description(source_code: str, func_name: str) -> Optional[str]:
67
+ """Extract JSDoc description for a function."""
68
+ # Look for JSDoc comment before the function
69
+ jsdoc_pattern = r"/\*\*(.*?)\*/\s*export\s+(?:async\s+)?function\s+" + re.escape(func_name)
70
+ match = re.search(jsdoc_pattern, source_code, re.DOTALL)
71
+
72
+ if match:
73
+ jsdoc_content = match.group(1)
74
+ # Extract the main description (text before @param tags)
75
+ lines = jsdoc_content.split("\n")
76
+ description_lines = []
77
+
78
+ for line in lines:
79
+ line = line.strip().lstrip("*").strip()
80
+ if line and not line.startswith("@"):
81
+ description_lines.append(line)
82
+ elif line.startswith("@"):
83
+ break
84
+
85
+ if description_lines:
86
+ return " ".join(description_lines)
87
+
88
+ return None
89
+
90
+
91
+ def parse_typescript_parameters(params_str: str) -> Dict[str, Any]:
92
+ """Parse TypeScript function parameters and generate JSON schema properties."""
93
+ properties = {}
94
+ required = []
95
+
96
+ if not params_str:
97
+ return {"properties": properties, "required": required}
98
+
99
+ # Split parameters by comma (handling nested types)
100
+ params = split_parameters(params_str)
101
+
102
+ for param in params:
103
+ param = param.strip()
104
+ if not param:
105
+ continue
106
+
107
+ # Parse parameter name, optional flag, and type
108
+ param_match = re.match(r"(\w+)(\?)?\s*:\s*(.+)", param)
109
+ if param_match:
110
+ param_name = param_match.group(1)
111
+ is_optional = param_match.group(2) == "?"
112
+ param_type = param_match.group(3).strip()
113
+
114
+ # Convert TypeScript type to JSON schema type
115
+ json_type = typescript_to_json_schema_type(param_type)
116
+
117
+ properties[param_name] = json_type
118
+
119
+ # Add to required list if not optional
120
+ if not is_optional:
121
+ required.append(param_name)
122
+
123
+ return {"properties": properties, "required": required}
124
+
125
+
126
+ def split_parameters(params_str: str) -> list:
127
+ """Split parameter string by commas, handling nested types."""
128
+ params = []
129
+ current_param = ""
130
+ depth = 0
131
+
132
+ for char in params_str:
133
+ if char in "<[{(":
134
+ depth += 1
135
+ elif char in ">]})":
136
+ depth -= 1
137
+ elif char == "," and depth == 0:
138
+ params.append(current_param)
139
+ current_param = ""
140
+ continue
141
+
142
+ current_param += char
143
+
144
+ if current_param:
145
+ params.append(current_param)
146
+
147
+ return params
148
+
149
+
150
+ def typescript_to_json_schema_type(ts_type: str) -> Dict[str, Any]:
151
+ """Convert TypeScript type to JSON schema type definition."""
152
+ ts_type = ts_type.strip()
153
+
154
+ # Basic type mappings
155
+ type_map = {
156
+ "string": {"type": "string"},
157
+ "number": {"type": "number"},
158
+ "boolean": {"type": "boolean"},
159
+ "any": {"type": "string"}, # Default to string for any
160
+ "void": {"type": "null"},
161
+ "null": {"type": "null"},
162
+ "undefined": {"type": "null"},
163
+ }
164
+
165
+ # Check for basic types
166
+ if ts_type in type_map:
167
+ return type_map[ts_type]
168
+
169
+ # Handle arrays
170
+ if ts_type.endswith("[]"):
171
+ item_type = ts_type[:-2].strip()
172
+ return {"type": "array", "items": typescript_to_json_schema_type(item_type)}
173
+
174
+ # Handle Array<T> syntax
175
+ array_match = re.match(r"Array<(.+)>", ts_type)
176
+ if array_match:
177
+ item_type = array_match.group(1)
178
+ return {"type": "array", "items": typescript_to_json_schema_type(item_type)}
179
+
180
+ # Handle union types (simplified - just use string)
181
+ if "|" in ts_type:
182
+ # For union types, we'll default to string for simplicity
183
+ # A more sophisticated parser could handle this better
184
+ return {"type": "string"}
185
+
186
+ # Handle object types (simplified)
187
+ if ts_type.startswith("{") and ts_type.endswith("}"):
188
+ return {"type": "object"}
189
+
190
+ # Handle Record<K, V> and similar generic types
191
+ record_match = re.match(r"Record<(.+),\s*(.+)>", ts_type)
192
+ if record_match:
193
+ return {"type": "object", "additionalProperties": typescript_to_json_schema_type(record_match.group(2))}
194
+
195
+ # Default case - treat unknown types as objects
196
+ return {"type": "object"}
@@ -118,7 +118,7 @@ class AsyncTimer:
118
118
  def __init__(self, callback_func: Callable | None = None):
119
119
  self._start_time_ns = None
120
120
  self._end_time_ns = None
121
- self.elapsed_ns = None
121
+ self._elapsed_ns = None
122
122
  self.callback_func = callback_func
123
123
 
124
124
  async def __aenter__(self):
@@ -127,7 +127,7 @@ class AsyncTimer:
127
127
 
128
128
  async def __aexit__(self, exc_type, exc, tb):
129
129
  self._end_time_ns = time.perf_counter_ns()
130
- self.elapsed_ns = self._end_time_ns - self._start_time_ns
130
+ self._elapsed_ns = self._end_time_ns - self._start_time_ns
131
131
  if self.callback_func:
132
132
  from asyncio import iscoroutinefunction
133
133
 
@@ -139,6 +139,10 @@ class AsyncTimer:
139
139
 
140
140
  @property
141
141
  def elapsed_ms(self):
142
- if self.elapsed_ns is not None:
143
- return ns_to_ms(self.elapsed_ns)
142
+ if self._elapsed_ns is not None:
143
+ return ns_to_ms(self._elapsed_ns)
144
144
  return None
145
+
146
+ @property
147
+ def elapsed_ns(self):
148
+ return self._elapsed_ns
@@ -2,21 +2,39 @@ from collections import OrderedDict
2
2
  from typing import Any, Dict, Optional
3
3
 
4
4
  from letta.constants import PRE_EXECUTION_MESSAGE_ARG
5
+ from letta.schemas.tool import MCP_TOOL_METADATA_SCHEMA_STATUS, MCP_TOOL_METADATA_SCHEMA_WARNINGS
6
+ from letta.utils import get_logger
7
+
8
+ logger = get_logger(__name__)
5
9
 
6
10
 
7
11
  def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
8
12
  """Enables strict mode for a tool schema by setting 'strict' to True and
9
13
  disallowing additional properties in the parameters.
10
14
 
15
+ If the tool schema is NON_STRICT_ONLY, strict mode will not be applied.
16
+
11
17
  Args:
12
18
  tool_schema (Dict[str, Any]): The original tool schema.
13
19
 
14
20
  Returns:
15
- Dict[str, Any]: A new tool schema with strict mode enabled.
21
+ Dict[str, Any]: A new tool schema with strict mode conditionally enabled.
16
22
  """
17
23
  schema = tool_schema.copy()
18
24
 
19
- # Enable strict mode
25
+ # Check if schema has status metadata indicating NON_STRICT_ONLY
26
+ schema_status = schema.get(MCP_TOOL_METADATA_SCHEMA_STATUS)
27
+ if schema_status == "NON_STRICT_ONLY":
28
+ # Don't apply strict mode for non-strict schemas
29
+ # Remove the metadata fields from the schema
30
+ schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
31
+ schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
32
+ return schema
33
+ elif schema_status == "INVALID":
34
+ # We should not be hitting this and allowing invalid schemas to be used
35
+ logger.error(f"Tool schema {schema} is invalid: {schema.get(MCP_TOOL_METADATA_SCHEMA_WARNINGS)}")
36
+
37
+ # Enable strict mode for STRICT_COMPLIANT or unspecified health status
20
38
  schema["strict"] = True
21
39
 
22
40
  # Ensure parameters is a valid dictionary
@@ -26,6 +44,11 @@ def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
26
44
  # Set additionalProperties to False
27
45
  parameters["additionalProperties"] = False
28
46
  schema["parameters"] = parameters
47
+
48
+ # Remove the metadata fields from the schema
49
+ schema.pop(MCP_TOOL_METADATA_SCHEMA_STATUS, None)
50
+ schema.pop(MCP_TOOL_METADATA_SCHEMA_WARNINGS, None)
51
+
29
52
  return schema
30
53
 
31
54
 
@@ -31,14 +31,12 @@ from letta.llm_api.llm_client_base import LLMClientBase
31
31
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
32
32
  from letta.log import get_logger
33
33
  from letta.otel.tracing import trace_method
34
- from letta.schemas.enums import ProviderCategory
35
34
  from letta.schemas.llm_config import LLMConfig
36
35
  from letta.schemas.message import Message as PydanticMessage
37
36
  from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
38
37
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
39
38
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
40
39
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
41
- from letta.services.provider_manager import ProviderManager
42
40
  from letta.settings import model_settings
43
41
 
44
42
  DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
@@ -65,7 +63,13 @@ class AnthropicClient(LLMClientBase):
65
63
  async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
66
64
  client = await self._get_anthropic_client_async(llm_config, async_client=True)
67
65
  request_data["stream"] = True
68
- return await client.beta.messages.create(**request_data)
66
+
67
+ # Add fine-grained tool streaming beta header for better streaming performance
68
+ # This helps reduce buffering when streaming tool call parameters
69
+ # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
70
+ betas = ["fine-grained-tool-streaming-2025-05-14"]
71
+
72
+ return await client.beta.messages.create(**request_data, betas=betas)
69
73
 
70
74
  @trace_method
71
75
  async def send_llm_batch_request_async(
@@ -122,19 +126,17 @@ class AnthropicClient(LLMClientBase):
122
126
  def _get_anthropic_client(
123
127
  self, llm_config: LLMConfig, async_client: bool = False
124
128
  ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
125
- override_key = None
126
- if llm_config.provider_category == ProviderCategory.byok:
127
- override_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
129
+ api_key, _, _ = self.get_byok_overrides(llm_config)
128
130
 
129
131
  if async_client:
130
132
  return (
131
- anthropic.AsyncAnthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
132
- if override_key
133
+ anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
134
+ if api_key
133
135
  else anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries)
134
136
  )
135
137
  return (
136
- anthropic.Anthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
137
- if override_key
138
+ anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
139
+ if api_key
138
140
  else anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries)
139
141
  )
140
142
 
@@ -142,19 +144,17 @@ class AnthropicClient(LLMClientBase):
142
144
  async def _get_anthropic_client_async(
143
145
  self, llm_config: LLMConfig, async_client: bool = False
144
146
  ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
145
- override_key = None
146
- if llm_config.provider_category == ProviderCategory.byok:
147
- override_key = await ProviderManager().get_override_key_async(llm_config.provider_name, actor=self.actor)
147
+ api_key, _, _ = await self.get_byok_overrides_async(llm_config)
148
148
 
149
149
  if async_client:
150
150
  return (
151
- anthropic.AsyncAnthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
152
- if override_key
151
+ anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
152
+ if api_key
153
153
  else anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries)
154
154
  )
155
155
  return (
156
- anthropic.Anthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
157
- if override_key
156
+ anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries)
157
+ if api_key
158
158
  else anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries)
159
159
  )
160
160
 
@@ -183,9 +183,14 @@ class AnthropicClient(LLMClientBase):
183
183
 
184
184
  # Extended Thinking
185
185
  if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
186
+ thinking_budget = max(llm_config.max_reasoning_tokens, 1024)
187
+ if thinking_budget != llm_config.max_reasoning_tokens:
188
+ logger.warning(
189
+ f"Max reasoning tokens must be at least 1024 for Claude. Setting max_reasoning_tokens to 1024 for model {llm_config.model}."
190
+ )
186
191
  data["thinking"] = {
187
192
  "type": "enabled",
188
- "budget_tokens": llm_config.max_reasoning_tokens,
193
+ "budget_tokens": thinking_budget,
189
194
  }
190
195
  # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
191
196
  data["temperature"] = 1.0
@@ -0,0 +1,73 @@
1
+ import os
2
+ from typing import List, Optional, Tuple
3
+
4
+ from openai import AsyncAzureOpenAI, AzureOpenAI
5
+ from openai.types.chat.chat_completion import ChatCompletion
6
+
7
+ from letta.llm_api.openai_client import OpenAIClient
8
+ from letta.otel.tracing import trace_method
9
+ from letta.schemas.embedding_config import EmbeddingConfig
10
+ from letta.schemas.enums import ProviderCategory
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.settings import model_settings
13
+
14
+
15
+ class AzureClient(OpenAIClient):
16
+
17
+ def get_byok_overrides(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
18
+ if llm_config.provider_category == ProviderCategory.byok:
19
+ from letta.services.provider_manager import ProviderManager
20
+
21
+ return ProviderManager().get_azure_credentials(llm_config.provider_name, actor=self.actor)
22
+
23
+ return None, None, None
24
+
25
+ async def get_byok_overrides_async(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
26
+ if llm_config.provider_category == ProviderCategory.byok:
27
+ from letta.services.provider_manager import ProviderManager
28
+
29
+ return await ProviderManager().get_azure_credentials_async(llm_config.provider_name, actor=self.actor)
30
+
31
+ return None, None, None
32
+
33
+ @trace_method
34
+ def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
35
+ """
36
+ Performs underlying synchronous request to OpenAI API and returns raw response dict.
37
+ """
38
+ api_key, base_url, api_version = self.get_byok_overrides(llm_config)
39
+ if not api_key or not base_url or not api_version:
40
+ api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
41
+ base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
42
+ api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
43
+
44
+ client = AzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
45
+ response: ChatCompletion = client.chat.completions.create(**request_data)
46
+ return response.model_dump()
47
+
48
+ @trace_method
49
+ async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
50
+ """
51
+ Performs underlying asynchronous request to OpenAI API and returns raw response dict.
52
+ """
53
+ api_key, base_url, api_version = await self.get_byok_overrides_async(llm_config)
54
+ if not api_key or not base_url or not api_version:
55
+ api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
56
+ base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
57
+ api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
58
+
59
+ client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
60
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
61
+ return response.model_dump()
62
+
63
+ @trace_method
64
+ async def request_embeddings(self, inputs: List[str], embedding_config: EmbeddingConfig) -> List[List[float]]:
65
+ """Request embeddings given texts and embedding config"""
66
+ api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
67
+ base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
68
+ api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
69
+ client = AsyncAzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url)
70
+ response = await client.embeddings.create(model=embedding_config.embedding_model, input=inputs)
71
+
72
+ # TODO: add total usage
73
+ return [r.embedding for r in response.data]
@@ -17,10 +17,7 @@ logger = get_logger(__name__)
17
17
 
18
18
  class BedrockClient(AnthropicClient):
19
19
 
20
- @trace_method
21
- async def _get_anthropic_client_async(
22
- self, llm_config: LLMConfig, async_client: bool = False
23
- ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic, anthropic.AsyncAnthropicBedrock, anthropic.AnthropicBedrock]:
20
+ async def get_byok_overrides_async(self, llm_config: LLMConfig) -> tuple[str, str, str]:
24
21
  override_access_key_id, override_secret_access_key, override_default_region = None, None, None
25
22
  if llm_config.provider_category == ProviderCategory.byok:
26
23
  (
@@ -31,6 +28,13 @@ class BedrockClient(AnthropicClient):
31
28
  llm_config.provider_name,
32
29
  actor=self.actor,
33
30
  )
31
+ return override_access_key_id, override_secret_access_key, override_default_regions
32
+
33
+ @trace_method
34
+ async def _get_anthropic_client_async(
35
+ self, llm_config: LLMConfig, async_client: bool = False
36
+ ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic, anthropic.AsyncAnthropicBedrock, anthropic.AnthropicBedrock]:
37
+ override_access_key_id, override_secret_access_key, override_default_region = await self.get_byok_overrides_async(llm_config)
34
38
 
35
39
  session = Session()
36
40
  async with session.client(