letta-nightly 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +5 -3
  2. letta/agent.py +3 -2
  3. letta/agents/base_agent.py +4 -1
  4. letta/agents/voice_agent.py +1 -0
  5. letta/constants.py +4 -2
  6. letta/functions/schema_generator.py +2 -1
  7. letta/groups/dynamic_multi_agent.py +1 -0
  8. letta/helpers/converters.py +13 -5
  9. letta/helpers/json_helpers.py +6 -1
  10. letta/llm_api/anthropic.py +2 -2
  11. letta/llm_api/aws_bedrock.py +24 -94
  12. letta/llm_api/deepseek.py +1 -1
  13. letta/llm_api/google_ai_client.py +0 -38
  14. letta/llm_api/google_constants.py +6 -3
  15. letta/llm_api/helpers.py +1 -1
  16. letta/llm_api/llm_api_tools.py +4 -7
  17. letta/llm_api/mistral.py +12 -37
  18. letta/llm_api/openai.py +17 -17
  19. letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
  20. letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
  21. letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
  22. letta/local_llm/constants.py +2 -23
  23. letta/local_llm/json_parser.py +11 -1
  24. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
  25. letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
  26. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
  27. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
  28. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
  29. letta/local_llm/ollama/api.py +2 -2
  30. letta/orm/__init__.py +1 -0
  31. letta/orm/agent.py +33 -2
  32. letta/orm/files_agents.py +13 -10
  33. letta/orm/mixins.py +8 -0
  34. letta/orm/prompt.py +13 -0
  35. letta/orm/sqlite_functions.py +61 -17
  36. letta/otel/db_pool_monitoring.py +13 -12
  37. letta/schemas/agent.py +69 -4
  38. letta/schemas/agent_file.py +2 -0
  39. letta/schemas/block.py +11 -0
  40. letta/schemas/embedding_config.py +15 -3
  41. letta/schemas/enums.py +2 -0
  42. letta/schemas/file.py +1 -1
  43. letta/schemas/folder.py +74 -0
  44. letta/schemas/memory.py +12 -6
  45. letta/schemas/prompt.py +9 -0
  46. letta/schemas/providers/__init__.py +47 -0
  47. letta/schemas/providers/anthropic.py +78 -0
  48. letta/schemas/providers/azure.py +80 -0
  49. letta/schemas/providers/base.py +201 -0
  50. letta/schemas/providers/bedrock.py +78 -0
  51. letta/schemas/providers/cerebras.py +79 -0
  52. letta/schemas/providers/cohere.py +18 -0
  53. letta/schemas/providers/deepseek.py +63 -0
  54. letta/schemas/providers/google_gemini.py +102 -0
  55. letta/schemas/providers/google_vertex.py +54 -0
  56. letta/schemas/providers/groq.py +35 -0
  57. letta/schemas/providers/letta.py +39 -0
  58. letta/schemas/providers/lmstudio.py +97 -0
  59. letta/schemas/providers/mistral.py +41 -0
  60. letta/schemas/providers/ollama.py +151 -0
  61. letta/schemas/providers/openai.py +241 -0
  62. letta/schemas/providers/together.py +85 -0
  63. letta/schemas/providers/vllm.py +57 -0
  64. letta/schemas/providers/xai.py +66 -0
  65. letta/server/db.py +0 -5
  66. letta/server/rest_api/app.py +4 -3
  67. letta/server/rest_api/routers/v1/__init__.py +2 -0
  68. letta/server/rest_api/routers/v1/agents.py +152 -4
  69. letta/server/rest_api/routers/v1/folders.py +490 -0
  70. letta/server/rest_api/routers/v1/providers.py +2 -2
  71. letta/server/rest_api/routers/v1/sources.py +21 -26
  72. letta/server/rest_api/routers/v1/tools.py +90 -15
  73. letta/server/server.py +50 -95
  74. letta/services/agent_manager.py +420 -81
  75. letta/services/agent_serialization_manager.py +707 -0
  76. letta/services/block_manager.py +132 -11
  77. letta/services/file_manager.py +104 -29
  78. letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
  79. letta/services/file_processor/file_processor.py +75 -24
  80. letta/services/file_processor/parser/markitdown_parser.py +95 -0
  81. letta/services/files_agents_manager.py +57 -17
  82. letta/services/group_manager.py +7 -0
  83. letta/services/helpers/agent_manager_helper.py +25 -15
  84. letta/services/provider_manager.py +2 -2
  85. letta/services/source_manager.py +35 -16
  86. letta/services/tool_executor/files_tool_executor.py +12 -5
  87. letta/services/tool_manager.py +12 -0
  88. letta/services/tool_sandbox/e2b_sandbox.py +52 -48
  89. letta/settings.py +9 -6
  90. letta/streaming_utils.py +2 -1
  91. letta/utils.py +34 -1
  92. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
  93. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
  94. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -5,16 +5,18 @@ try:
5
5
  __version__ = version("letta")
6
6
  except PackageNotFoundError:
7
7
  # Fallback for development installations
8
- __version__ = "0.8.17"
8
+ __version__ = "0.9.0"
9
9
 
10
10
  if os.environ.get("LETTA_VERSION"):
11
11
  __version__ = os.environ["LETTA_VERSION"]
12
12
 
13
-
14
13
  # import clients
15
14
  from letta.client.client import RESTClient
16
15
 
17
- # imports for easier access
16
+ # Import sqlite_functions early to ensure event handlers are registered
17
+ from letta.orm import sqlite_functions
18
+
19
+ # # imports for easier access
18
20
  from letta.schemas.agent import AgentState
19
21
  from letta.schemas.block import Block
20
22
  from letta.schemas.embedding_config import EmbeddingConfig
letta/agent.py CHANGED
@@ -36,6 +36,7 @@ from letta.interface import AgentInterface
36
36
  from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
37
37
  from letta.llm_api.llm_api_tools import create
38
38
  from letta.llm_api.llm_client import LLMClient
39
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
39
40
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
40
41
  from letta.log import get_logger
41
42
  from letta.memory import summarize_messages
@@ -548,8 +549,8 @@ class Agent(BaseAgent):
548
549
  return messages, False, True # force a heartbeat to allow agent to handle error
549
550
 
550
551
  # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
551
- if "inner_thoughts" in function_args:
552
- response_message.content = function_args.pop("inner_thoughts")
552
+ if INNER_THOUGHTS_KWARG in function_args:
553
+ response_message.content = function_args.pop(INNER_THOUGHTS_KWARG)
553
554
  # The content if then internal monologue, not chat
554
555
  if response_message.content and not nonnull_content:
555
556
  self.interface.internal_monologue(response_message.content, msg_obj=messages[-1], chunk_index=chunk_index)
@@ -122,7 +122,9 @@ class BaseAgent(ABC):
122
122
  curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
123
123
 
124
124
  # generate just the memory string with current state for comparison
125
- curr_memory_str = agent_state.memory.compile(tool_usage_rules=tool_constraint_block, sources=agent_state.sources)
125
+ curr_memory_str = agent_state.memory.compile(
126
+ tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
127
+ )
126
128
  new_dynamic_section = extract_dynamic_section(curr_memory_str)
127
129
 
128
130
  # compare just the dynamic sections (memory blocks, tool rules, directories)
@@ -149,6 +151,7 @@ class BaseAgent(ABC):
149
151
  archival_memory_size=num_archival_memories,
150
152
  tool_rules_solver=tool_rules_solver,
151
153
  sources=agent_state.sources,
154
+ max_files_open=agent_state.max_files_open,
152
155
  )
153
156
 
154
157
  diff = united_diff(curr_system_message_text, new_system_message_str)
@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
153
153
  previous_message_count=self.num_messages,
154
154
  archival_memory_size=self.num_archival_memories,
155
155
  sources=agent_state.sources,
156
+ max_files_open=agent_state.max_files_open,
156
157
  )
157
158
  letta_message_db_queue = create_input_messages(
158
159
  input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor
letta/constants.py CHANGED
@@ -326,7 +326,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
326
326
  CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
327
327
  CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
328
328
  CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
329
- CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
329
+
330
330
  # Function return limits
331
331
  FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words
332
332
  BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000 # very high (we rely on implementation)
@@ -361,7 +361,9 @@ REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
361
361
  REDIS_RUN_ID_PREFIX = "agent:send_message:run_id"
362
362
 
363
363
  # TODO: This is temporary, eventually use token-based eviction
364
- MAX_FILES_OPEN = 5
364
+ # File based controls
365
+ DEFAULT_MAX_FILES_OPEN = 5
366
+ DEFAULT_CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
365
367
 
366
368
  GET_PROVIDERS_TIMEOUT_SECONDS = 10
367
369
 
@@ -412,12 +412,13 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
412
412
  # Validate that the function has a Google Python style docstring
413
413
  try:
414
414
  validate_google_style_docstring(function)
415
- except ValueError:
415
+ except ValueError as e:
416
416
  logger.warning(
417
417
  f"Function `{function.__name__}` in module `{function.__module__}` "
418
418
  f"{'(tool_id=' + tool_id + ') ' if tool_id else ''}"
419
419
  f"is not in Google style docstring format. "
420
420
  f"Docstring received:\n{repr(function.__doc__[:200]) if function.__doc__ else 'None'}"
421
+ f"\nError: {str(e)}"
421
422
  )
422
423
 
423
424
  # Get the signature of the function
@@ -94,6 +94,7 @@ class DynamicMultiAgent(Agent):
94
94
  for name, agent_id in [(agents[agent_id].agent_state.name, agent_id) for agent_id in agent_id_options]:
95
95
  if name.lower() in assistant_message.content.lower():
96
96
  speaker_id = agent_id
97
+ assert speaker_id is not None, f"No names found in {assistant_message.content}"
97
98
 
98
99
  # Sum usage
99
100
  total_usage.prompt_tokens += usage_stats.prompt_tokens
@@ -1,4 +1,3 @@
1
- import base64
2
1
  from typing import Any, Dict, List, Optional, Union
3
2
 
4
3
  import numpy as np
@@ -43,7 +42,10 @@ from letta.schemas.tool_rule import (
43
42
  TerminalToolRule,
44
43
  ToolRule,
45
44
  )
45
+ from letta.settings import DatabaseChoice, settings
46
46
 
47
+ if settings.database_engine == DatabaseChoice.SQLITE:
48
+ import sqlite_vec
47
49
  # --------------------------
48
50
  # LLMConfig Serialization
49
51
  # --------------------------
@@ -272,22 +274,28 @@ def deserialize_message_content(data: Optional[List[Dict]]) -> List[MessageConte
272
274
 
273
275
 
274
276
  def serialize_vector(vector: Optional[Union[List[float], np.ndarray]]) -> Optional[bytes]:
275
- """Convert a NumPy array or list into a base64-encoded byte string."""
277
+ """Convert a NumPy array or list into serialized format using sqlite-vec."""
276
278
  if vector is None:
277
279
  return None
278
280
  if isinstance(vector, list):
279
281
  vector = np.array(vector, dtype=np.float32)
282
+ else:
283
+ vector = vector.astype(np.float32)
280
284
 
281
- return base64.b64encode(vector.tobytes())
285
+ return sqlite_vec.serialize_float32(vector.tolist())
282
286
 
283
287
 
284
288
  def deserialize_vector(data: Optional[bytes], dialect: Dialect) -> Optional[np.ndarray]:
285
- """Convert a base64-encoded byte string back into a NumPy array."""
289
+ """Convert serialized data back into a NumPy array using sqlite-vec format."""
286
290
  if not data:
287
291
  return None
288
292
 
289
293
  if dialect.name == "sqlite":
290
- data = base64.b64decode(data)
294
+ # Use sqlite-vec format
295
+ if len(data) % 4 == 0: # Must be divisible by 4 for float32
296
+ return np.frombuffer(data, dtype=np.float32)
297
+ else:
298
+ raise ValueError(f"Invalid sqlite-vec binary data length: {len(data)}")
291
299
 
292
300
  return np.frombuffer(data, dtype=np.float32)
293
301
 
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import json
2
3
  from datetime import datetime
3
4
 
@@ -11,7 +12,11 @@ def json_dumps(data, indent=2):
11
12
  if isinstance(obj, datetime):
12
13
  return obj.isoformat()
13
14
  if isinstance(obj, bytes):
14
- return obj.decode("utf-8")
15
+ try:
16
+ return obj.decode("utf-8")
17
+ except Exception:
18
+ print(f"Error decoding bytes as utf-8: {obj}")
19
+ return base64.b64encode(obj).decode("utf-8")
15
20
  raise TypeError(f"Type {type(obj)} not serializable")
16
21
 
17
22
  return json.dumps(data, indent=indent, default=safe_serializer, ensure_ascii=False)
@@ -729,7 +729,7 @@ def _prepare_anthropic_request(
729
729
  data["temperature"] = 1.0
730
730
 
731
731
  if "functions" in data:
732
- raise ValueError(f"'functions' unexpected in Anthropic API payload")
732
+ raise ValueError("'functions' unexpected in Anthropic API payload")
733
733
 
734
734
  # Handle tools
735
735
  if "tools" in data and data["tools"] is None:
@@ -1162,7 +1162,7 @@ def anthropic_chat_completions_process_stream(
1162
1162
  accum_message.tool_calls[tool_call_delta.index].function.arguments += tool_call_delta.function.arguments
1163
1163
 
1164
1164
  if message_delta.function_call is not None:
1165
- raise NotImplementedError(f"Old function_call style not support with stream=True")
1165
+ raise NotImplementedError("Old function_call style not support with stream=True")
1166
1166
 
1167
1167
  # overwrite response fields based on latest chunk
1168
1168
  if not create_message_id:
@@ -1,17 +1,30 @@
1
+ """
2
+ Note that this formally only supports Anthropic Bedrock.
3
+ TODO (cliandy): determine what other providers are supported and what is needed to add support.
4
+ """
5
+
1
6
  import os
2
- from typing import Any, Dict, List, Optional
7
+ from typing import Any, Optional
3
8
 
4
9
  from anthropic import AnthropicBedrock
5
10
 
11
+ from letta.log import get_logger
6
12
  from letta.settings import model_settings
7
13
 
14
+ logger = get_logger(__name__)
15
+
8
16
 
9
17
  def has_valid_aws_credentials() -> bool:
10
18
  """
11
19
  Check if AWS credentials are properly configured.
12
20
  """
13
- valid_aws_credentials = os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY") and os.getenv("AWS_DEFAULT_REGION")
14
- return valid_aws_credentials
21
+ return all(
22
+ (
23
+ os.getenv("AWS_ACCESS_KEY_ID"),
24
+ os.getenv("AWS_SECRET_ACCESS_KEY"),
25
+ os.getenv("AWS_DEFAULT_REGION"),
26
+ )
27
+ )
15
28
 
16
29
 
17
30
  def get_bedrock_client(
@@ -41,48 +54,11 @@ def get_bedrock_client(
41
54
  return bedrock
42
55
 
43
56
 
44
- def bedrock_get_model_list(
45
- region_name: str,
46
- access_key_id: Optional[str] = None,
47
- secret_access_key: Optional[str] = None,
48
- ) -> List[dict]:
49
- """
50
- Get list of available models from Bedrock.
51
-
52
- Args:
53
- region_name: AWS region name
54
- access_key_id: Optional AWS access key ID
55
- secret_access_key: Optional AWS secret access key
56
-
57
- TODO: Implement model_provider and output_modality filtering
58
- model_provider: Optional provider name to filter models. If None, returns all models.
59
- output_modality: Output modality to filter models. Defaults to "text".
60
-
61
- Returns:
62
- List of model summaries
63
-
64
- """
65
- import boto3
66
-
67
- try:
68
- bedrock = boto3.client(
69
- "bedrock",
70
- region_name=region_name,
71
- aws_access_key_id=access_key_id,
72
- aws_secret_access_key=secret_access_key,
73
- )
74
- response = bedrock.list_inference_profiles()
75
- return response["inferenceProfileSummaries"]
76
- except Exception as e:
77
- print(f"Error getting model list: {str(e)}")
78
- raise e
79
-
80
-
81
57
  async def bedrock_get_model_list_async(
82
58
  access_key_id: Optional[str] = None,
83
59
  secret_access_key: Optional[str] = None,
84
60
  default_region: Optional[str] = None,
85
- ) -> List[dict]:
61
+ ) -> list[dict]:
86
62
  from aioboto3.session import Session
87
63
 
88
64
  try:
@@ -96,11 +72,11 @@ async def bedrock_get_model_list_async(
96
72
  response = await bedrock.list_inference_profiles()
97
73
  return response["inferenceProfileSummaries"]
98
74
  except Exception as e:
99
- print(f"Error getting model list: {str(e)}")
75
+ logger.error(f"Error getting model list for bedrock: %s", e)
100
76
  raise e
101
77
 
102
78
 
103
- def bedrock_get_model_details(region_name: str, model_id: str) -> Dict[str, Any]:
79
+ def bedrock_get_model_details(region_name: str, model_id: str) -> dict[str, Any]:
104
80
  """
105
81
  Get details for a specific model from Bedrock.
106
82
  """
@@ -121,54 +97,8 @@ def bedrock_get_model_context_window(model_id: str) -> int:
121
97
  Get context window size for a specific model.
122
98
  """
123
99
  # Bedrock doesn't provide this via API, so we maintain a mapping
124
- context_windows = {
125
- "anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,
126
- "anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
127
- "anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
128
- "anthropic.claude-3-haiku-20240307-v1:0": 200000,
129
- "anthropic.claude-3-opus-20240229-v1:0": 200000,
130
- "anthropic.claude-3-sonnet-20240229-v1:0": 200000,
131
- }
132
- return context_windows.get(model_id, 200000) # default to 100k if unknown
133
-
134
-
135
- """
136
- {
137
- "id": "msg_123",
138
- "type": "message",
139
- "role": "assistant",
140
- "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
141
- "content": [
142
- {
143
- "type": "text",
144
- "text": "I see the Firefox icon. Let me click on it and then navigate to a weather website."
145
- },
146
- {
147
- "type": "tool_use",
148
- "id": "toolu_123",
149
- "name": "computer",
150
- "input": {
151
- "action": "mouse_move",
152
- "coordinate": [
153
- 708,
154
- 736
155
- ]
156
- }
157
- },
158
- {
159
- "type": "tool_use",
160
- "id": "toolu_234",
161
- "name": "computer",
162
- "input": {
163
- "action": "left_click"
164
- }
165
- }
166
- ],
167
- "stop_reason": "tool_use",
168
- "stop_sequence": null,
169
- "usage": {
170
- "input_tokens": 3391,
171
- "output_tokens": 132
172
- }
173
- }
174
- """
100
+ # 200k for anthropic: https://aws.amazon.com/bedrock/anthropic/
101
+ if model_id.startswith("anthropic"):
102
+ return 200_000
103
+ else:
104
+ return 100_000 # default to 100k if unknown
letta/llm_api/deepseek.py CHANGED
@@ -120,7 +120,7 @@ def build_deepseek_chat_completions_request(
120
120
 
121
121
  def add_functions_to_system_message(system_message: ChatMessage):
122
122
  system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
123
- system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
123
+ system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
124
124
 
125
125
  if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
126
126
  add_functions_to_system_message(
@@ -66,44 +66,6 @@ def google_ai_check_valid_api_key(api_key: str):
66
66
  raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
67
67
 
68
68
 
69
- def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
70
- """Synchronous version to get model list from Google AI API using httpx."""
71
- import httpx
72
-
73
- from letta.utils import printd
74
-
75
- url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
76
-
77
- try:
78
- with httpx.Client() as client:
79
- response = client.get(url, headers=headers)
80
- response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
81
- response_data = response.json() # convert to dict from string
82
-
83
- # Grab the models out
84
- model_list = response_data["models"]
85
- return model_list
86
-
87
- except httpx.HTTPStatusError as http_err:
88
- # Handle HTTP errors (e.g., response 4XX, 5XX)
89
- printd(f"Got HTTPError, exception={http_err}")
90
- # Print the HTTP status code
91
- print(f"HTTP Error: {http_err.response.status_code}")
92
- # Print the response content (error message from server)
93
- print(f"Message: {http_err.response.text}")
94
- raise http_err
95
-
96
- except httpx.RequestError as req_err:
97
- # Handle other httpx-related errors (e.g., connection error)
98
- printd(f"Got RequestException, exception={req_err}")
99
- raise req_err
100
-
101
- except Exception as e:
102
- # Handle other potential errors
103
- printd(f"Got unknown Exception, exception={e}")
104
- raise e
105
-
106
-
107
69
  async def google_ai_get_model_list_async(
108
70
  base_url: str, api_key: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
109
71
  ) -> List[dict]:
@@ -1,7 +1,12 @@
1
1
  GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
2
+ "gemini-2.5-pro": 1048576,
3
+ "gemini-2.5-flash": 1048576,
4
+ "gemini-live-2.5-flash": 1048576,
5
+ "gemini-2.0-flash-001": 1048576,
6
+ "gemini-2.0-flash-lite-001": 1048576,
7
+ # The following are either deprecated or discontinued.
2
8
  "gemini-2.5-pro-exp-03-25": 1048576,
3
9
  "gemini-2.5-flash-preview-04-17": 1048576,
4
- "gemini-2.0-flash-001": 1048576,
5
10
  "gemini-2.0-pro-exp-02-05": 2097152,
6
11
  "gemini-2.0-flash-lite-preview-02-05": 1048576,
7
12
  "gemini-2.0-flash-thinking-exp-01-21": 1048576,
@@ -11,8 +16,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
11
16
  "gemini-1.0-pro-vision": 16384,
12
17
  }
13
18
 
14
- GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
15
-
16
19
  GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
17
20
 
18
21
  GOOGLE_MODEL_FOR_API_KEY_CHECK = "gemini-2.0-flash-lite"
letta/llm_api/helpers.py CHANGED
@@ -252,7 +252,7 @@ def unpack_all_inner_thoughts_from_kwargs(
252
252
  ) -> ChatCompletionResponse:
253
253
  """Strip the inner thoughts out of the tool call and put it in the message content"""
254
254
  if len(response.choices) == 0:
255
- raise ValueError(f"Unpacking inner thoughts from empty response not supported")
255
+ raise ValueError("Unpacking inner thoughts from empty response not supported")
256
256
 
257
257
  new_choices = []
258
258
  for choice in response.choices:
@@ -67,7 +67,6 @@ def retry_with_exponential_backoff(
67
67
  # Stop retrying if user hits Ctrl-C
68
68
  raise KeyboardInterrupt("User intentionally stopped thread. Stopping...")
69
69
  except requests.exceptions.HTTPError as http_err:
70
-
71
70
  if not hasattr(http_err, "response") or not http_err.response:
72
71
  raise
73
72
 
@@ -175,7 +174,6 @@ def create(
175
174
 
176
175
  # openai
177
176
  if llm_config.model_endpoint_type == "openai":
178
-
179
177
  if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
180
178
  # only is a problem if we are *not* using an openai proxy
181
179
  raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
@@ -256,7 +254,6 @@ def create(
256
254
  return response
257
255
 
258
256
  elif llm_config.model_endpoint_type == "xai":
259
-
260
257
  api_key = model_settings.xai_api_key
261
258
 
262
259
  if function_call is None and functions is not None and len(functions) > 0:
@@ -464,7 +461,7 @@ def create(
464
461
  # )
465
462
  elif llm_config.model_endpoint_type == "groq":
466
463
  if stream:
467
- raise NotImplementedError(f"Streaming not yet implemented for Groq.")
464
+ raise NotImplementedError("Streaming not yet implemented for Groq.")
468
465
 
469
466
  if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
470
467
  raise LettaConfigurationError(message="Groq key is missing from letta config file", missing_fields=["groq_api_key"])
@@ -517,7 +514,7 @@ def create(
517
514
  """TogetherAI endpoint that goes via /completions instead of /chat/completions"""
518
515
 
519
516
  if stream:
520
- raise NotImplementedError(f"Streaming not yet implemented for TogetherAI (via the /completions endpoint).")
517
+ raise NotImplementedError("Streaming not yet implemented for TogetherAI (via the /completions endpoint).")
521
518
 
522
519
  if model_settings.together_api_key is None and (
523
520
  llm_config.model_endpoint == "https://api.together.ai/v1/completions"
@@ -547,7 +544,7 @@ def create(
547
544
  """Anthropic endpoint that goes via /embeddings instead of /chat/completions"""
548
545
 
549
546
  if stream:
550
- raise NotImplementedError(f"Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
547
+ raise NotImplementedError("Streaming not yet implemented for Anthropic (via the /embeddings endpoint).")
551
548
  if not use_tool_naming:
552
549
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
553
550
 
@@ -631,7 +628,7 @@ def create(
631
628
  messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
632
629
  messages[0].content[
633
630
  0
634
- ].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
631
+ ].text += 'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
635
632
  return get_chat_completion(
636
633
  model=llm_config.model,
637
634
  messages=messages,
letta/llm_api/mistral.py CHANGED
@@ -1,47 +1,22 @@
1
- import requests
1
+ import aiohttp
2
2
 
3
- from letta.utils import printd, smart_urljoin
3
+ from letta.log import get_logger
4
+ from letta.utils import smart_urljoin
4
5
 
6
+ logger = get_logger(__name__)
5
7
 
6
- def mistral_get_model_list(url: str, api_key: str) -> dict:
8
+
9
+ async def mistral_get_model_list_async(url: str, api_key: str) -> dict:
7
10
  url = smart_urljoin(url, "models")
8
11
 
9
12
  headers = {"Content-Type": "application/json"}
10
13
  if api_key is not None:
11
14
  headers["Authorization"] = f"Bearer {api_key}"
12
15
 
13
- printd(f"Sending request to {url}")
14
- response = None
15
- try:
16
+ logger.debug(f"Sending request to %s", url)
17
+
18
+ async with aiohttp.ClientSession() as session:
16
19
  # TODO add query param "tool" to be true
17
- response = requests.get(url, headers=headers)
18
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
19
- response_json = response.json() # convert to dict from string
20
- return response_json
21
- except requests.exceptions.HTTPError as http_err:
22
- # Handle HTTP errors (e.g., response 4XX, 5XX)
23
- try:
24
- if response:
25
- response = response.json()
26
- except:
27
- pass
28
- printd(f"Got HTTPError, exception={http_err}, response={response}")
29
- raise http_err
30
- except requests.exceptions.RequestException as req_err:
31
- # Handle other requests-related errors (e.g., connection error)
32
- try:
33
- if response:
34
- response = response.json()
35
- except:
36
- pass
37
- printd(f"Got RequestException, exception={req_err}, response={response}")
38
- raise req_err
39
- except Exception as e:
40
- # Handle other potential errors
41
- try:
42
- if response:
43
- response = response.json()
44
- except:
45
- pass
46
- printd(f"Got unknown Exception, exception={e}, response={response}")
47
- raise e
20
+ async with session.get(url, headers=headers) as response:
21
+ response.raise_for_status()
22
+ return await response.json()