letta-nightly 0.8.3.dev20250611104259__py3-none-any.whl → 0.8.4.dev20250613104250__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +11 -1
  3. letta/agents/base_agent.py +11 -4
  4. letta/agents/ephemeral_summary_agent.py +3 -2
  5. letta/agents/letta_agent.py +109 -78
  6. letta/agents/letta_agent_batch.py +4 -3
  7. letta/agents/voice_agent.py +3 -3
  8. letta/agents/voice_sleeptime_agent.py +3 -2
  9. letta/client/client.py +6 -3
  10. letta/constants.py +6 -0
  11. letta/data_sources/connectors.py +3 -5
  12. letta/functions/async_composio_toolset.py +4 -1
  13. letta/functions/function_sets/files.py +4 -3
  14. letta/functions/schema_generator.py +5 -2
  15. letta/groups/sleeptime_multi_agent_v2.py +4 -3
  16. letta/helpers/converters.py +7 -1
  17. letta/helpers/message_helper.py +31 -11
  18. letta/helpers/tool_rule_solver.py +69 -4
  19. letta/interfaces/anthropic_streaming_interface.py +8 -1
  20. letta/interfaces/openai_streaming_interface.py +4 -1
  21. letta/llm_api/anthropic_client.py +4 -4
  22. letta/llm_api/openai_client.py +56 -11
  23. letta/local_llm/utils.py +3 -20
  24. letta/orm/sqlalchemy_base.py +7 -1
  25. letta/otel/metric_registry.py +26 -0
  26. letta/otel/metrics.py +78 -14
  27. letta/schemas/letta_message_content.py +64 -3
  28. letta/schemas/letta_request.py +5 -1
  29. letta/schemas/message.py +61 -14
  30. letta/schemas/openai/chat_completion_request.py +1 -1
  31. letta/schemas/providers.py +41 -14
  32. letta/schemas/tool_rule.py +67 -0
  33. letta/schemas/user.py +2 -2
  34. letta/server/rest_api/routers/v1/agents.py +22 -12
  35. letta/server/rest_api/routers/v1/sources.py +13 -25
  36. letta/server/server.py +10 -5
  37. letta/services/agent_manager.py +5 -1
  38. letta/services/file_manager.py +219 -0
  39. letta/services/file_processor/chunker/line_chunker.py +119 -14
  40. letta/services/file_processor/file_processor.py +8 -8
  41. letta/services/file_processor/file_types.py +303 -0
  42. letta/services/file_processor/parser/mistral_parser.py +2 -11
  43. letta/services/helpers/agent_manager_helper.py +6 -0
  44. letta/services/message_manager.py +32 -0
  45. letta/services/organization_manager.py +4 -6
  46. letta/services/passage_manager.py +1 -0
  47. letta/services/source_manager.py +0 -208
  48. letta/services/tool_executor/composio_tool_executor.py +5 -1
  49. letta/services/tool_executor/files_tool_executor.py +291 -15
  50. letta/services/user_manager.py +8 -8
  51. letta/system.py +3 -1
  52. letta/utils.py +7 -13
  53. {letta_nightly-0.8.3.dev20250611104259.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/METADATA +2 -2
  54. {letta_nightly-0.8.3.dev20250611104259.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/RECORD +57 -55
  55. {letta_nightly-0.8.3.dev20250611104259.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/LICENSE +0 -0
  56. {letta_nightly-0.8.3.dev20250611104259.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/WHEEL +0 -0
  57. {letta_nightly-0.8.3.dev20250611104259.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/entry_points.txt +0 -0
letta/client/client.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import sys
2
2
  import time
3
- from typing import Callable, Dict, Generator, List, Optional, Union
3
+ from typing import Callable, Dict, List, Optional, Union
4
4
 
5
5
  import requests
6
6
 
@@ -18,7 +18,7 @@ from letta.schemas.file import FileMetadata
18
18
  from letta.schemas.job import Job
19
19
  from letta.schemas.letta_message import LettaMessage, LettaMessageUnion
20
20
  from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
21
- from letta.schemas.letta_response import LettaResponse, LettaStreamingResponse
21
+ from letta.schemas.letta_response import LettaResponse
22
22
  from letta.schemas.llm_config import LLMConfig
23
23
  from letta.schemas.memory import ArchivalMemorySummary, ChatMemory, CreateArchivalMemory, Memory, RecallMemorySummary
24
24
  from letta.schemas.message import Message, MessageCreate
@@ -136,6 +136,7 @@ class AbstractClient(object):
136
136
  stream: Optional[bool] = False,
137
137
  stream_steps: bool = False,
138
138
  stream_tokens: bool = False,
139
+ max_steps: Optional[int] = None,
139
140
  ) -> LettaResponse:
140
141
  raise NotImplementedError
141
142
 
@@ -977,7 +978,8 @@ class RESTClient(AbstractClient):
977
978
  stream: Optional[bool] = False,
978
979
  stream_steps: bool = False,
979
980
  stream_tokens: bool = False,
980
- ) -> Union[LettaResponse, Generator[LettaStreamingResponse, None, None]]:
981
+ max_steps: Optional[int] = 10,
982
+ ) -> LettaResponse:
981
983
  """
982
984
  Send a message to an agent
983
985
 
@@ -988,6 +990,7 @@ class RESTClient(AbstractClient):
988
990
  name(str): Name of the sender
989
991
  stream (bool): Stream the response (default: `False`)
990
992
  stream_tokens (bool): Stream tokens (default: `False`)
993
+ max_steps (int): Maximum number of steps the agent should take (default: 10)
991
994
 
992
995
  Returns:
993
996
  response (LettaResponse): Response from the agent
letta/constants.py CHANGED
@@ -31,6 +31,9 @@ LETTA_TOOL_MODULE_NAMES = [
31
31
  LETTA_FILES_TOOL_MODULE_NAME,
32
32
  ]
33
33
 
34
+ DEFAULT_ORG_ID = "org-00000000-0000-4000-8000-000000000000"
35
+ DEFAULT_ORG_NAME = "default_org"
36
+
34
37
 
35
38
  # String in the error message for when the context window is too large
36
39
  # Example full message:
@@ -43,6 +46,9 @@ IN_CONTEXT_MEMORY_KEYWORD = "CORE_MEMORY"
43
46
  # OpenAI error message: Invalid 'messages[1].tool_calls[0].id': string too long. Expected a string with maximum length 29, but got a string with length 36 instead.
44
47
  TOOL_CALL_ID_MAX_LEN = 29
45
48
 
49
+ # Max steps for agent loop
50
+ DEFAULT_MAX_STEPS = 50
51
+
46
52
  # minimum context window size
47
53
  MIN_CONTEXT_WINDOW = 4096
48
54
 
@@ -8,8 +8,8 @@ from letta.embeddings import embedding_model
8
8
  from letta.schemas.file import FileMetadata
9
9
  from letta.schemas.passage import Passage
10
10
  from letta.schemas.source import Source
11
+ from letta.services.file_manager import FileManager
11
12
  from letta.services.passage_manager import PassageManager
12
- from letta.services.source_manager import SourceManager
13
13
 
14
14
 
15
15
  class DataConnector:
@@ -38,9 +38,7 @@ class DataConnector:
38
38
  """
39
39
 
40
40
 
41
- async def load_data(
42
- connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User"
43
- ):
41
+ async def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, file_manager: FileManager, actor: "User"):
44
42
  from letta.llm_api.llm_client import LLMClient
45
43
  from letta.schemas.embedding_config import EmbeddingConfig
46
44
 
@@ -94,7 +92,7 @@ async def load_data(
94
92
 
95
93
  for file_metadata in connector.find_files(source):
96
94
  file_count += 1
97
- await source_manager.create_file(file_metadata, actor)
95
+ await file_manager.create_file(file_metadata, actor)
98
96
 
99
97
  # generate passages
100
98
  for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
@@ -84,7 +84,10 @@ class AsyncComposioToolSet(BaseComposioToolSet, runtime="letta", description_cha
84
84
  # Handle specific error codes from Composio API
85
85
  if error_code == 10401 or "API_KEY_NOT_FOUND" in error_message:
86
86
  raise ApiKeyNotProvidedError()
87
- if "connected account not found" in error_message.lower():
87
+ if (
88
+ "connected account not found" in error_message.lower()
89
+ or "no connected account found" in error_message.lower()
90
+ ):
88
91
  raise ConnectedAccountNotFoundError(f"Connected account not found: {error_message}")
89
92
  if "enum metadata not found" in error_message.lower():
90
93
  raise EnumMetadataNotFound(f"Enum metadata not found: {error_message}")
@@ -32,12 +32,13 @@ async def close_file(agent_state: "AgentState", file_name: str) -> str:
32
32
  raise NotImplementedError("Tool not implemented. Please contact the Letta team.")
33
33
 
34
34
 
35
- async def grep(agent_state: "AgentState", pattern: str) -> str:
35
+ async def grep(agent_state: "AgentState", pattern: str, include: Optional[str] = None) -> str:
36
36
  """
37
- Grep tool to search files across data sources with keywords.
37
+ Grep tool to search files across data sources with a keyword or regex pattern.
38
38
 
39
39
  Args:
40
- pattern (str): Keyword or regex pattern to search.
40
+ pattern (str): Keyword or regex pattern to search within file contents.
41
+ include (Optional[str]): Optional keyword or regex pattern to filter filenames to include in the search.
41
42
 
42
43
  Returns:
43
44
  str: Matching lines or summary output.
@@ -547,8 +547,11 @@ def generate_tool_schema_for_composio(
547
547
  property_schema["enum"] = field_props["enum"]
548
548
 
549
549
  # Handle array item types
550
- if field_props["type"] == "array" and "items" in field_props:
551
- property_schema["items"] = field_props["items"]
550
+ if field_props["type"] == "array":
551
+ if "items" in field_props:
552
+ property_schema["items"] = field_props["items"]
553
+ elif "anyOf" in field_props:
554
+ property_schema["items"] = [t for t in field_props["anyOf"] if "items" in t][0]["items"]
552
555
 
553
556
  # Add the property to the schema
554
557
  properties_json[field_name] = property_schema
@@ -4,6 +4,7 @@ from typing import AsyncGenerator, List, Optional
4
4
 
5
5
  from letta.agents.base_agent import BaseAgent
6
6
  from letta.agents.letta_agent import LettaAgent
7
+ from letta.constants import DEFAULT_MAX_STEPS
7
8
  from letta.groups.helpers import stringify_message
8
9
  from letta.otel.tracing import trace_method
9
10
  from letta.schemas.enums import JobStatus
@@ -61,7 +62,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
61
62
  async def step(
62
63
  self,
63
64
  input_messages: List[MessageCreate],
64
- max_steps: int = 10,
65
+ max_steps: int = DEFAULT_MAX_STEPS,
65
66
  use_assistant_message: bool = True,
66
67
  request_start_timestamp_ns: Optional[int] = None,
67
68
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -131,7 +132,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
131
132
  async def step_stream_no_tokens(
132
133
  self,
133
134
  input_messages: List[MessageCreate],
134
- max_steps: int = 10,
135
+ max_steps: int = DEFAULT_MAX_STEPS,
135
136
  use_assistant_message: bool = True,
136
137
  request_start_timestamp_ns: Optional[int] = None,
137
138
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -149,7 +150,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
149
150
  async def step_stream(
150
151
  self,
151
152
  input_messages: List[MessageCreate],
152
- max_steps: int = 10,
153
+ max_steps: int = DEFAULT_MAX_STEPS,
153
154
  use_assistant_message: bool = True,
154
155
  request_start_timestamp_ns: Optional[int] = None,
155
156
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -12,6 +12,8 @@ from letta.schemas.agent import AgentStepState
12
12
  from letta.schemas.embedding_config import EmbeddingConfig
13
13
  from letta.schemas.enums import ProviderType, ToolRuleType
14
14
  from letta.schemas.letta_message_content import (
15
+ ImageContent,
16
+ ImageSourceType,
15
17
  MessageContent,
16
18
  MessageContentType,
17
19
  OmittedReasoningContent,
@@ -216,12 +218,13 @@ def serialize_message_content(message_content: Optional[List[Union[MessageConten
216
218
  serialized_message_content = []
217
219
  for content in message_content:
218
220
  if isinstance(content, MessageContent):
221
+ if content.type == MessageContentType.image:
222
+ assert content.source.type == ImageSourceType.letta, f"Invalid image source type: {content.source.type}"
219
223
  serialized_message_content.append(content.model_dump(mode="json"))
220
224
  elif isinstance(content, dict):
221
225
  serialized_message_content.append(content) # Already a dictionary, leave it as-is
222
226
  else:
223
227
  raise TypeError(f"Unexpected message content type: {type(content)}")
224
-
225
228
  return serialized_message_content
226
229
 
227
230
 
@@ -238,6 +241,9 @@ def deserialize_message_content(data: Optional[List[Dict]]) -> List[MessageConte
238
241
  content_type = item.get("type")
239
242
  if content_type == MessageContentType.text:
240
243
  content = TextContent(**item)
244
+ elif content_type == MessageContentType.image:
245
+ assert item["source"]["type"] == ImageSourceType.letta, f'Invalid image source type: {item["source"]["type"]}'
246
+ content = ImageContent(**item)
241
247
  elif content_type == MessageContentType.tool_call:
242
248
  content = ToolCallContent(**item)
243
249
  elif content_type == MessageContentType.tool_return:
@@ -1,6 +1,11 @@
1
+ import base64
2
+ import mimetypes
3
+
4
+ import httpx
5
+
1
6
  from letta import system
2
7
  from letta.schemas.enums import MessageRole
3
- from letta.schemas.letta_message_content import TextContent
8
+ from letta.schemas.letta_message_content import Base64Image, ImageContent, ImageSourceType, TextContent
4
9
  from letta.schemas.message import Message, MessageCreate
5
10
 
6
11
 
@@ -33,24 +38,39 @@ def _convert_message_create_to_message(
33
38
 
34
39
  # Extract message content
35
40
  if isinstance(message_create.content, str):
41
+ assert message_create.content != "", "Message content must not be empty"
42
+ message_content = [TextContent(text=message_create.content)]
43
+ elif isinstance(message_create.content, list) and len(message_create.content) > 0:
36
44
  message_content = message_create.content
37
- elif message_create.content and len(message_create.content) > 0 and isinstance(message_create.content[0], TextContent):
38
- message_content = message_create.content[0].text
39
45
  else:
40
46
  raise ValueError("Message content is empty or invalid")
41
47
 
42
- # Apply wrapping if needed
43
- if message_create.role not in {MessageRole.user, MessageRole.system}:
44
- raise ValueError(f"Invalid message role: {message_create.role}")
45
- elif message_create.role == MessageRole.user and wrap_user_message:
46
- message_content = system.package_user_message(user_message=message_content)
47
- elif message_create.role == MessageRole.system and wrap_system_message:
48
- message_content = system.package_system_message(system_message=message_content)
48
+ assert message_create.role in {MessageRole.user, MessageRole.system}, f"Invalid message role: {message_create.role}"
49
+ for content in message_content:
50
+ if isinstance(content, TextContent):
51
+ # Apply wrapping if needed
52
+ if message_create.role == MessageRole.user and wrap_user_message:
53
+ content.text = system.package_user_message(user_message=content.text)
54
+ elif message_create.role == MessageRole.system and wrap_system_message:
55
+ content.text = system.package_system_message(system_message=content.text)
56
+ elif isinstance(content, ImageContent):
57
+ if content.source.type == ImageSourceType.url:
58
+ # Convert URL image to Base64Image if needed
59
+ image_response = httpx.get(content.source.url)
60
+ image_response.raise_for_status()
61
+ image_media_type = image_response.headers.get("content-type")
62
+ if not image_media_type:
63
+ image_media_type, _ = mimetypes.guess_type(content.source.url)
64
+ image_data = base64.standard_b64encode(image_response.content).decode("utf-8")
65
+ content.source = Base64Image(media_type=image_media_type, data=image_data)
66
+ if content.source.type == ImageSourceType.letta and not content.source.data:
67
+ # TODO: hydrate letta image with data from db
68
+ pass
49
69
 
50
70
  return Message(
51
71
  agent_id=agent_id,
52
72
  role=message_create.role,
53
- content=[TextContent(text=message_content)] if message_content else [],
73
+ content=message_content,
54
74
  name=message_create.name,
55
75
  model=None, # assigned later?
56
76
  tool_calls=None, # irrelevant
@@ -2,6 +2,7 @@ from typing import List, Optional, Set, Union
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
5
+ from letta.schemas.block import Block
5
6
  from letta.schemas.enums import ToolRuleType
6
7
  from letta.schemas.tool_rule import (
7
8
  BaseToolRule,
@@ -116,10 +117,10 @@ class ToolRulesSolver(BaseModel):
116
117
  return list(available_tools)
117
118
  else:
118
119
  # Collect valid tools from all child-based rules
119
- valid_tool_sets = [
120
- rule.get_valid_tools(self.tool_call_history, available_tools, last_function_response)
121
- for rule in self.child_based_tool_rules + self.parent_tool_rules
122
- ]
120
+ valid_tool_sets = []
121
+ for rule in self.child_based_tool_rules + self.parent_tool_rules:
122
+ tools = rule.get_valid_tools(self.tool_call_history, available_tools, last_function_response)
123
+ valid_tool_sets.append(tools)
123
124
 
124
125
  # Compute intersection of all valid tool sets
125
126
  final_allowed_tools = set.intersection(*valid_tool_sets) if valid_tool_sets else available_tools
@@ -141,6 +142,70 @@ class ToolRulesSolver(BaseModel):
141
142
  """Check if the tool is defined as a continue tool in the tool rules."""
142
143
  return any(rule.tool_name == tool_name for rule in self.continue_tool_rules)
143
144
 
145
+ def compile_tool_rule_prompts(self) -> Optional[Block]:
146
+ """
147
+ Compile prompt templates from all tool rules into an ephemeral Block.
148
+
149
+ Returns:
150
+ Optional[str]: Compiled prompt string with tool rule constraints, or None if no templates exist.
151
+ """
152
+ compiled_prompts = []
153
+
154
+ all_rules = (
155
+ self.init_tool_rules
156
+ + self.continue_tool_rules
157
+ + self.child_based_tool_rules
158
+ + self.parent_tool_rules
159
+ + self.terminal_tool_rules
160
+ )
161
+
162
+ for rule in all_rules:
163
+ rendered = rule.render_prompt()
164
+ if rendered:
165
+ compiled_prompts.append(rendered)
166
+
167
+ if compiled_prompts:
168
+ return Block(
169
+ label="tool_usage_rules",
170
+ value="\n".join(compiled_prompts),
171
+ description="The following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow.",
172
+ )
173
+ return None
174
+
175
+ def guess_rule_violation(self, tool_name: str) -> List[str]:
176
+ """
177
+ Check if the given tool name or the previous tool in history matches any tool rule,
178
+ and return rendered prompt templates for matching rules.
179
+
180
+ Args:
181
+ tool_name: The name of the tool to check for rule violations
182
+
183
+ Returns:
184
+ List of rendered prompt templates from matching tool rules
185
+ """
186
+ violated_rules = []
187
+
188
+ # Get the previous tool from history if it exists
189
+ previous_tool = self.tool_call_history[-1] if self.tool_call_history else None
190
+
191
+ # Check all tool rules for matches
192
+ all_rules = (
193
+ self.init_tool_rules
194
+ + self.continue_tool_rules
195
+ + self.child_based_tool_rules
196
+ + self.parent_tool_rules
197
+ + self.terminal_tool_rules
198
+ )
199
+
200
+ for rule in all_rules:
201
+ # Check if the current tool name or previous tool matches this rule's tool_name
202
+ if rule.tool_name == tool_name or (previous_tool and rule.tool_name == previous_tool):
203
+ rendered_prompt = rule.render_prompt()
204
+ if rendered_prompt:
205
+ violated_rules.append(rendered_prompt)
206
+
207
+ return violated_rules
208
+
144
209
  @staticmethod
145
210
  def validate_conditional_tool(rule: ConditionalToolRule):
146
211
  """
@@ -91,7 +91,14 @@ class AnthropicStreamingInterface:
91
91
  def get_tool_call_object(self) -> ToolCall:
92
92
  """Useful for agent loop"""
93
93
  # hack for tool rules
94
- tool_input = json.loads(self.accumulated_tool_call_args)
94
+ try:
95
+ tool_input = json.loads(self.accumulated_tool_call_args)
96
+ except json.JSONDecodeError as e:
97
+ logger.warning(
98
+ f"Failed to decode tool call arguments for tool_call_id={self.tool_call_id}, "
99
+ f"name={self.tool_call_name}. Raw input: {self.accumulated_tool_call_args!r}. Error: {e}"
100
+ )
101
+ raise
95
102
  if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
96
103
  arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
97
104
  else:
@@ -60,8 +60,11 @@ class OpenAIStreamingInterface:
60
60
  def get_tool_call_object(self) -> ToolCall:
61
61
  """Useful for agent loop"""
62
62
  function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer
63
+ tool_call_id = self.last_flushed_function_id if self.last_flushed_function_id else self.function_id_buffer
64
+ if not tool_call_id:
65
+ raise ValueError("No tool call ID available")
63
66
  return ToolCall(
64
- id=self.last_flushed_function_id,
67
+ id=tool_call_id,
65
68
  function=FunctionCall(arguments=self.current_function_arguments, name=function_name),
66
69
  )
67
70
 
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import logging
2
3
  import re
3
4
  from typing import Dict, List, Optional, Union
4
5
 
@@ -201,7 +202,7 @@ class AnthropicClient(LLMClientBase):
201
202
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
202
203
  tools_for_request = [OpenAITool(function=f) for f in tools]
203
204
  elif force_tool_call is not None:
204
- tool_choice = {"type": "tool", "name": force_tool_call}
205
+ tool_choice = {"type": "tool", "name": force_tool_call, "disable_parallel_tool_use": True}
205
206
  tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
206
207
 
207
208
  # need to have this setting to be able to put inner thoughts in kwargs
@@ -271,6 +272,8 @@ class AnthropicClient(LLMClientBase):
271
272
  return data
272
273
 
273
274
  async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[OpenAITool] = None) -> int:
275
+ logging.getLogger("httpx").setLevel(logging.WARNING)
276
+
274
277
  client = anthropic.AsyncAnthropic()
275
278
  if messages and len(messages) == 0:
276
279
  messages = None
@@ -286,9 +289,6 @@ class AnthropicClient(LLMClientBase):
286
289
  tools=anthropic_tools or [],
287
290
  )
288
291
  except:
289
- import ipdb
290
-
291
- ipdb.set_trace()
292
292
  raise
293
293
 
294
294
  token_count = result.input_tokens
@@ -26,6 +26,7 @@ from letta.log import get_logger
26
26
  from letta.otel.tracing import trace_method
27
27
  from letta.schemas.embedding_config import EmbeddingConfig
28
28
  from letta.schemas.enums import ProviderCategory, ProviderType
29
+ from letta.schemas.letta_message_content import MessageContentType
29
30
  from letta.schemas.llm_config import LLMConfig
30
31
  from letta.schemas.message import Message as PydanticMessage
31
32
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -93,20 +94,13 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
93
94
  # TODO move into LLMConfig as a field?
94
95
  def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
95
96
  """Certain providers require the tool choice to be set to 'auto'."""
96
-
97
97
  if "nebius.com" in llm_config.model_endpoint:
98
98
  return True
99
99
  if "together.ai" in llm_config.model_endpoint or "together.xyz" in llm_config.model_endpoint:
100
100
  return True
101
- # proxy also has this issue (FIXME check)
102
- elif llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
103
- return True
104
- # same with vLLM (FIXME check)
105
- elif llm_config.handle and "vllm" in llm_config.handle:
101
+ if llm_config.handle and "vllm" in llm_config.handle:
106
102
  return True
107
- else:
108
- # will use "required" instead of "auto"
109
- return False
103
+ return False
110
104
 
111
105
 
112
106
  class OpenAIClient(LLMClientBase):
@@ -203,7 +197,7 @@ class OpenAIClient(LLMClientBase):
203
197
  # TODO: This vllm checking is very brittle and is a patch at most
204
198
  tool_choice = None
205
199
  if requires_auto_tool_choice(llm_config):
206
- tool_choice = "auto" # TODO change to "required" once proxy supports it
200
+ tool_choice = "auto"
207
201
  elif tools:
208
202
  # only set if tools is non-Null
209
203
  tool_choice = "required"
@@ -213,7 +207,7 @@ class OpenAIClient(LLMClientBase):
213
207
 
214
208
  data = ChatCompletionRequest(
215
209
  model=model,
216
- messages=openai_message_list,
210
+ messages=fill_image_content_in_messages(openai_message_list, messages),
217
211
  tools=[OpenAITool(type="function", function=f) for f in tools] if tools else None,
218
212
  tool_choice=tool_choice,
219
213
  user=str(),
@@ -221,6 +215,9 @@ class OpenAIClient(LLMClientBase):
221
215
  # NOTE: the reasoners that don't support temperature require 1.0, not None
222
216
  temperature=llm_config.temperature if supports_temperature_param(model) else 1.0,
223
217
  )
218
+ if tools and supports_parallel_tool_calling(model):
219
+ data.parallel_tool_calls = False
220
+
224
221
  # always set user id for openai requests
225
222
  if self.actor:
226
223
  data.user = self.actor.id
@@ -402,3 +399,51 @@ class OpenAIClient(LLMClientBase):
402
399
 
403
400
  # Fallback for unexpected errors
404
401
  return super().handle_llm_error(e)
402
+
403
+
404
+ def fill_image_content_in_messages(openai_message_list: List[dict], pydantic_message_list: List[PydanticMessage]) -> List[dict]:
405
+ """
406
+ Converts image content to openai format.
407
+ """
408
+
409
+ if len(openai_message_list) != len(pydantic_message_list):
410
+ return openai_message_list
411
+
412
+ new_message_list = []
413
+ for idx in range(len(openai_message_list)):
414
+ openai_message, pydantic_message = openai_message_list[idx], pydantic_message_list[idx]
415
+ if pydantic_message.role != "user":
416
+ new_message_list.append(openai_message)
417
+ continue
418
+
419
+ if not isinstance(pydantic_message.content, list) or (
420
+ len(pydantic_message.content) == 1 and pydantic_message.content[0].type == MessageContentType.text
421
+ ):
422
+ new_message_list.append(openai_message)
423
+ continue
424
+
425
+ message_content = []
426
+ for content in pydantic_message.content:
427
+ if content.type == MessageContentType.text:
428
+ message_content.append(
429
+ {
430
+ "type": "text",
431
+ "text": content.text,
432
+ }
433
+ )
434
+ elif content.type == MessageContentType.image:
435
+ message_content.append(
436
+ {
437
+ "type": "image_url",
438
+ "image_url": {
439
+ "url": f"data:{content.source.media_type};base64,{content.source.data}",
440
+ "detail": content.source.detail or "auto",
441
+ },
442
+ }
443
+ )
444
+ else:
445
+ raise ValueError(f"Unsupported content type {content.type}")
446
+
447
+ new_message_list.append({"role": "user", "content": message_content})
448
+
449
+ return new_message_list
letta/local_llm/utils.py CHANGED
@@ -44,24 +44,6 @@ def post_json_auth_request(uri, json_payload, auth_type, auth_key):
44
44
  return response
45
45
 
46
46
 
47
- # deprecated for Box
48
- class DotDict(dict):
49
- """Allow dot access on properties similar to OpenAI response object"""
50
-
51
- def __getattr__(self, attr):
52
- return self.get(attr)
53
-
54
- def __setattr__(self, key, value):
55
- self[key] = value
56
-
57
- # following methods necessary for pickling
58
- def __getstate__(self):
59
- return vars(self)
60
-
61
- def __setstate__(self, state):
62
- vars(self).update(state)
63
-
64
-
65
47
  def load_grammar_file(grammar):
66
48
  # Set grammar
67
49
  grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grammars", f"{grammar}.gbnf")
@@ -79,8 +61,9 @@ def load_grammar_file(grammar):
79
61
 
80
62
  # TODO: support tokenizers/tokenizer apis available in local models
81
63
  def count_tokens(s: str, model: str = "gpt-4") -> int:
82
- encoding = tiktoken.encoding_for_model(model)
83
- return len(encoding.encode(s))
64
+ from letta.utils import count_tokens
65
+
66
+ return count_tokens(s, model)
84
67
 
85
68
 
86
69
  def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
@@ -494,6 +494,8 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
494
494
 
495
495
  identifiers = [] if identifier is None else [identifier]
496
496
  query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
497
+ if query is None:
498
+ raise NoResultFound(f"{cls.__name__} not found with identifier {identifier}")
497
499
 
498
500
  if settings.letta_pg_uri_no_default:
499
501
  await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
@@ -533,6 +535,8 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
533
535
  NoResultFound: if the object is not found
534
536
  """
535
537
  query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
538
+ if query is None:
539
+ return []
536
540
  results = db_session.execute(query).scalars().all()
537
541
  return cls._read_multiple_postprocess(results, identifiers, query_conditions)
538
542
 
@@ -553,6 +557,8 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
553
557
  The primary accessor for ORM record(s)
554
558
  """
555
559
  query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
560
+ if query is None:
561
+ return []
556
562
  results = await db_session.execute(query)
557
563
  return cls._read_multiple_postprocess(results.scalars().all(), identifiers, query_conditions)
558
564
 
@@ -582,7 +588,7 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
582
588
  query_conditions.append(f"id='{identifiers}'")
583
589
  elif not kwargs:
584
590
  logger.debug(f"No identifiers provided for {cls.__name__}, returning empty list")
585
- return []
591
+ return None, query_conditions
586
592
 
587
593
  if kwargs:
588
594
  query = query.filter_by(**kwargs)
@@ -120,3 +120,29 @@ class MetricRegistry:
120
120
  unit="1",
121
121
  ),
122
122
  )
123
+
124
+ # (includes endpoint_path, method, status_code)
125
+ @property
126
+ def endpoint_e2e_ms_histogram(self) -> Histogram:
127
+ return self._get_or_create_metric(
128
+ "hist_endpoint_e2e_ms",
129
+ partial(
130
+ self._meter.create_histogram,
131
+ name="hist_endpoint_e2e_ms",
132
+ description="Histogram for endpoint e2e time (ms)",
133
+ unit="ms",
134
+ ),
135
+ )
136
+
137
+ # (includes endpoint_path, method, status_code)
138
+ @property
139
+ def endpoint_request_counter(self) -> Counter:
140
+ return self._get_or_create_metric(
141
+ "count_endpoint_requests",
142
+ partial(
143
+ self._meter.create_counter,
144
+ name="count_endpoint_requests",
145
+ description="Counts the number of endpoint requests",
146
+ unit="1",
147
+ ),
148
+ )