letta-nightly 0.7.20.dev20250521104258__py3-none-any.whl → 0.7.21.dev20250522104246__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +290 -3
  3. letta/agents/base_agent.py +0 -55
  4. letta/agents/helpers.py +5 -0
  5. letta/agents/letta_agent.py +314 -64
  6. letta/agents/letta_agent_batch.py +102 -55
  7. letta/agents/voice_agent.py +5 -5
  8. letta/client/client.py +9 -18
  9. letta/constants.py +55 -1
  10. letta/functions/function_sets/builtin.py +27 -0
  11. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  12. letta/interfaces/anthropic_streaming_interface.py +10 -1
  13. letta/interfaces/openai_streaming_interface.py +9 -2
  14. letta/llm_api/anthropic.py +21 -2
  15. letta/llm_api/anthropic_client.py +33 -6
  16. letta/llm_api/google_ai_client.py +136 -423
  17. letta/llm_api/google_vertex_client.py +173 -22
  18. letta/llm_api/llm_api_tools.py +27 -0
  19. letta/llm_api/llm_client.py +1 -1
  20. letta/llm_api/llm_client_base.py +32 -21
  21. letta/llm_api/openai.py +57 -0
  22. letta/llm_api/openai_client.py +7 -11
  23. letta/memory.py +0 -1
  24. letta/orm/__init__.py +1 -0
  25. letta/orm/enums.py +1 -0
  26. letta/orm/provider_trace.py +26 -0
  27. letta/orm/step.py +1 -0
  28. letta/schemas/provider_trace.py +43 -0
  29. letta/schemas/providers.py +210 -65
  30. letta/schemas/step.py +1 -0
  31. letta/schemas/tool.py +4 -0
  32. letta/server/db.py +37 -19
  33. letta/server/rest_api/routers/v1/__init__.py +2 -0
  34. letta/server/rest_api/routers/v1/agents.py +57 -34
  35. letta/server/rest_api/routers/v1/blocks.py +3 -3
  36. letta/server/rest_api/routers/v1/identities.py +24 -26
  37. letta/server/rest_api/routers/v1/jobs.py +3 -3
  38. letta/server/rest_api/routers/v1/llms.py +13 -8
  39. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
  40. letta/server/rest_api/routers/v1/tags.py +3 -3
  41. letta/server/rest_api/routers/v1/telemetry.py +18 -0
  42. letta/server/rest_api/routers/v1/tools.py +6 -6
  43. letta/server/rest_api/streaming_response.py +105 -0
  44. letta/server/rest_api/utils.py +4 -0
  45. letta/server/server.py +140 -1
  46. letta/services/agent_manager.py +251 -18
  47. letta/services/block_manager.py +52 -37
  48. letta/services/helpers/noop_helper.py +10 -0
  49. letta/services/identity_manager.py +43 -38
  50. letta/services/job_manager.py +29 -0
  51. letta/services/message_manager.py +111 -0
  52. letta/services/sandbox_config_manager.py +36 -0
  53. letta/services/step_manager.py +146 -0
  54. letta/services/telemetry_manager.py +58 -0
  55. letta/services/tool_executor/tool_execution_manager.py +49 -5
  56. letta/services/tool_executor/tool_execution_sandbox.py +47 -0
  57. letta/services/tool_executor/tool_executor.py +236 -7
  58. letta/services/tool_manager.py +160 -1
  59. letta/services/tool_sandbox/e2b_sandbox.py +65 -3
  60. letta/settings.py +10 -2
  61. letta/tracing.py +5 -5
  62. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/METADATA +3 -2
  63. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/RECORD +66 -59
  64. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/LICENSE +0 -0
  65. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/WHEEL +0 -0
  66. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/entry_points.txt +0 -0
@@ -5,14 +5,16 @@ from typing import List, Optional
5
5
  from google import genai
6
6
  from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
7
7
 
8
+ from letta.constants import NON_USER_MSG_PREFIX
8
9
  from letta.helpers.datetime_helpers import get_utc_time_int
9
10
  from letta.helpers.json_helpers import json_dumps, json_loads
10
- from letta.llm_api.google_ai_client import GoogleAIClient
11
+ from letta.llm_api.llm_client_base import LLMClientBase
11
12
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
12
13
  from letta.local_llm.utils import count_tokens
13
14
  from letta.log import get_logger
14
15
  from letta.schemas.llm_config import LLMConfig
15
16
  from letta.schemas.message import Message as PydanticMessage
17
+ from letta.schemas.openai.chat_completion_request import Tool
16
18
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
17
19
  from letta.settings import model_settings, settings
18
20
  from letta.utils import get_tool_call_id
@@ -20,18 +22,21 @@ from letta.utils import get_tool_call_id
20
22
  logger = get_logger(__name__)
21
23
 
22
24
 
23
- class GoogleVertexClient(GoogleAIClient):
25
+ class GoogleVertexClient(LLMClientBase):
24
26
 
25
- def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
26
- """
27
- Performs underlying request to llm and returns raw response.
28
- """
29
- client = genai.Client(
27
+ def _get_client(self):
28
+ return genai.Client(
30
29
  vertexai=True,
31
30
  project=model_settings.google_cloud_project,
32
31
  location=model_settings.google_cloud_location,
33
32
  http_options={"api_version": "v1"},
34
33
  )
34
+
35
+ def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
36
+ """
37
+ Performs underlying request to llm and returns raw response.
38
+ """
39
+ client = self._get_client()
35
40
  response = client.models.generate_content(
36
41
  model=llm_config.model,
37
42
  contents=request_data["contents"],
@@ -43,12 +48,7 @@ class GoogleVertexClient(GoogleAIClient):
43
48
  """
44
49
  Performs underlying request to llm and returns raw response.
45
50
  """
46
- client = genai.Client(
47
- vertexai=True,
48
- project=model_settings.google_cloud_project,
49
- location=model_settings.google_cloud_location,
50
- http_options={"api_version": "v1"},
51
- )
51
+ client = self._get_client()
52
52
  response = await client.aio.models.generate_content(
53
53
  model=llm_config.model,
54
54
  contents=request_data["contents"],
@@ -56,6 +56,139 @@ class GoogleVertexClient(GoogleAIClient):
56
56
  )
57
57
  return response.model_dump()
58
58
 
59
+ def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
60
+ """Google AI API requires all function call returns are immediately followed by a 'model' role message.
61
+
62
+ In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
63
+ so there is no natural follow-up 'model' role message.
64
+
65
+ To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
66
+ with role == 'model' that is placed in-betweeen and function output
67
+ (role == 'tool') and user message (role == 'user').
68
+ """
69
+ dummy_yield_message = {
70
+ "role": "model",
71
+ "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
72
+ }
73
+ messages_with_padding = []
74
+ for i, message in enumerate(messages):
75
+ messages_with_padding.append(message)
76
+ # Check if the current message role is 'tool' and the next message role is 'user'
77
+ if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
78
+ messages_with_padding.append(dummy_yield_message)
79
+
80
+ return messages_with_padding
81
+
82
+ def _clean_google_ai_schema_properties(self, schema_part: dict):
83
+ """Recursively clean schema parts to remove unsupported Google AI keywords."""
84
+ if not isinstance(schema_part, dict):
85
+ return
86
+
87
+ # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
88
+ # * Only a subset of the OpenAPI schema is supported.
89
+ # * Supported parameter types in Python are limited.
90
+ unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
91
+ keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
92
+ for key_to_remove in keys_to_remove_at_this_level:
93
+ logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
94
+ del schema_part[key_to_remove]
95
+
96
+ if schema_part.get("type") == "string" and "format" in schema_part:
97
+ allowed_formats = ["enum", "date-time"]
98
+ if schema_part["format"] not in allowed_formats:
99
+ logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
100
+ del schema_part["format"]
101
+
102
+ # Check properties within the current level
103
+ if "properties" in schema_part and isinstance(schema_part["properties"], dict):
104
+ for prop_name, prop_schema in schema_part["properties"].items():
105
+ self._clean_google_ai_schema_properties(prop_schema)
106
+
107
+ # Check items within arrays
108
+ if "items" in schema_part and isinstance(schema_part["items"], dict):
109
+ self._clean_google_ai_schema_properties(schema_part["items"])
110
+
111
+ # Check within anyOf, allOf, oneOf lists
112
+ for key in ["anyOf", "allOf", "oneOf"]:
113
+ if key in schema_part and isinstance(schema_part[key], list):
114
+ for item_schema in schema_part[key]:
115
+ self._clean_google_ai_schema_properties(item_schema)
116
+
117
+ def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
118
+ """
119
+ OpenAI style:
120
+ "tools": [{
121
+ "type": "function",
122
+ "function": {
123
+ "name": "find_movies",
124
+ "description": "find ....",
125
+ "parameters": {
126
+ "type": "object",
127
+ "properties": {
128
+ PARAM: {
129
+ "type": PARAM_TYPE, # eg "string"
130
+ "description": PARAM_DESCRIPTION,
131
+ },
132
+ ...
133
+ },
134
+ "required": List[str],
135
+ }
136
+ }
137
+ }
138
+ ]
139
+
140
+ Google AI style:
141
+ "tools": [{
142
+ "functionDeclarations": [{
143
+ "name": "find_movies",
144
+ "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
145
+ "parameters": {
146
+ "type": "OBJECT",
147
+ "properties": {
148
+ "location": {
149
+ "type": "STRING",
150
+ "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
151
+ },
152
+ "description": {
153
+ "type": "STRING",
154
+ "description": "Any kind of description including category or genre, title words, attributes, etc."
155
+ }
156
+ },
157
+ "required": ["description"]
158
+ }
159
+ }, {
160
+ "name": "find_theaters",
161
+ ...
162
+ """
163
+ function_list = [
164
+ dict(
165
+ name=t.function.name,
166
+ description=t.function.description,
167
+ parameters=t.function.parameters, # TODO need to unpack
168
+ )
169
+ for t in tools
170
+ ]
171
+
172
+ # Add inner thoughts if needed
173
+ for func in function_list:
174
+ # Note: Google AI API used to have weird casing requirements, but not any more
175
+
176
+ # Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
177
+ if "parameters" in func and isinstance(func["parameters"], dict):
178
+ self._clean_google_ai_schema_properties(func["parameters"])
179
+
180
+ # Add inner thoughts
181
+ if llm_config.put_inner_thoughts_in_kwargs:
182
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
183
+
184
+ func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
185
+ "type": "string",
186
+ "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
187
+ }
188
+ func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
189
+
190
+ return [{"functionDeclarations": function_list}]
191
+
59
192
  def build_request_data(
60
193
  self,
61
194
  messages: List[PydanticMessage],
@@ -66,11 +199,29 @@ class GoogleVertexClient(GoogleAIClient):
66
199
  """
67
200
  Constructs a request object in the expected data format for this client.
68
201
  """
69
- request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
70
- request_data["config"] = request_data.pop("generation_config")
71
- request_data["config"]["tools"] = request_data.pop("tools")
72
202
 
73
- tool_names = [t["name"] for t in tools] if tools else []
203
+ if tools:
204
+ tool_objs = [Tool(type="function", function=t) for t in tools]
205
+ tool_names = [t.function.name for t in tool_objs]
206
+ # Convert to the exact payload style Google expects
207
+ formatted_tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
208
+ else:
209
+ formatted_tools = []
210
+ tool_names = []
211
+
212
+ contents = self.add_dummy_model_messages(
213
+ [m.to_google_ai_dict() for m in messages],
214
+ )
215
+
216
+ request_data = {
217
+ "contents": contents,
218
+ "config": {
219
+ "temperature": llm_config.temperature,
220
+ "max_output_tokens": llm_config.max_tokens,
221
+ "tools": formatted_tools,
222
+ },
223
+ }
224
+
74
225
  if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
75
226
  request_data["config"]["response_mime_type"] = "application/json"
76
227
  request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
@@ -89,11 +240,11 @@ class GoogleVertexClient(GoogleAIClient):
89
240
  # Add thinking_config
90
241
  # If enable_reasoner is False, set thinking_budget to 0
91
242
  # Otherwise, use the value from max_reasoning_tokens
92
- thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
93
- thinking_config = ThinkingConfig(
94
- thinking_budget=thinking_budget,
95
- )
96
- request_data["config"]["thinking_config"] = thinking_config.model_dump()
243
+ if llm_config.enable_reasoner:
244
+ thinking_config = ThinkingConfig(
245
+ thinking_budget=llm_config.max_reasoning_tokens,
246
+ )
247
+ request_data["config"]["thinking_config"] = thinking_config.model_dump()
97
248
 
98
249
  return request_data
99
250
 
@@ -20,15 +20,19 @@ from letta.llm_api.openai import (
20
20
  build_openai_chat_completions_request,
21
21
  openai_chat_completions_process_stream,
22
22
  openai_chat_completions_request,
23
+ prepare_openai_payload,
23
24
  )
24
25
  from letta.local_llm.chat_completion_proxy import get_chat_completion
25
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
26
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
+ from letta.orm.user import User
27
29
  from letta.schemas.enums import ProviderCategory
28
30
  from letta.schemas.llm_config import LLMConfig
29
31
  from letta.schemas.message import Message
30
32
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
31
33
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
34
+ from letta.schemas.provider_trace import ProviderTraceCreate
35
+ from letta.services.telemetry_manager import TelemetryManager
32
36
  from letta.settings import ModelSettings
33
37
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
34
38
  from letta.tracing import log_event, trace_method
@@ -142,6 +146,9 @@ def create(
142
146
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
143
147
  put_inner_thoughts_first: bool = True,
144
148
  name: Optional[str] = None,
149
+ telemetry_manager: Optional[TelemetryManager] = None,
150
+ step_id: Optional[str] = None,
151
+ actor: Optional[User] = None,
145
152
  ) -> ChatCompletionResponse:
146
153
  """Return response to chat completion with backoff"""
147
154
  from letta.utils import printd
@@ -233,6 +240,16 @@ def create(
233
240
  if isinstance(stream_interface, AgentChunkStreamingInterface):
234
241
  stream_interface.stream_end()
235
242
 
243
+ telemetry_manager.create_provider_trace(
244
+ actor=actor,
245
+ provider_trace_create=ProviderTraceCreate(
246
+ request_json=prepare_openai_payload(data),
247
+ response_json=response.model_json_schema(),
248
+ step_id=step_id,
249
+ organization_id=actor.organization_id,
250
+ ),
251
+ )
252
+
236
253
  if llm_config.put_inner_thoughts_in_kwargs:
237
254
  response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
238
255
 
@@ -407,6 +424,16 @@ def create(
407
424
  if llm_config.put_inner_thoughts_in_kwargs:
408
425
  response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
409
426
 
427
+ telemetry_manager.create_provider_trace(
428
+ actor=actor,
429
+ provider_trace_create=ProviderTraceCreate(
430
+ request_json=chat_completion_request.model_json_schema(),
431
+ response_json=response.model_json_schema(),
432
+ step_id=step_id,
433
+ organization_id=actor.organization_id,
434
+ ),
435
+ )
436
+
410
437
  return response
411
438
 
412
439
  # elif llm_config.model_endpoint_type == "cohere":
@@ -51,7 +51,7 @@ class LLMClient:
51
51
  put_inner_thoughts_first=put_inner_thoughts_first,
52
52
  actor=actor,
53
53
  )
54
- case ProviderType.openai:
54
+ case ProviderType.openai | ProviderType.together:
55
55
  from letta.llm_api.openai_client import OpenAIClient
56
56
 
57
57
  return OpenAIClient(
@@ -9,7 +9,9 @@ from letta.errors import LLMError
9
9
  from letta.schemas.llm_config import LLMConfig
10
10
  from letta.schemas.message import Message
11
11
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
12
- from letta.tracing import log_event
12
+ from letta.schemas.provider_trace import ProviderTraceCreate
13
+ from letta.services.telemetry_manager import TelemetryManager
14
+ from letta.tracing import log_event, trace_method
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from letta.orm import User
@@ -31,13 +33,15 @@ class LLMClientBase:
31
33
  self.put_inner_thoughts_first = put_inner_thoughts_first
32
34
  self.use_tool_naming = use_tool_naming
33
35
 
36
+ @trace_method
34
37
  def send_llm_request(
35
38
  self,
36
39
  messages: List[Message],
37
40
  llm_config: LLMConfig,
38
41
  tools: Optional[List[dict]] = None, # TODO: change to Tool object
39
- stream: bool = False,
40
42
  force_tool_call: Optional[str] = None,
43
+ telemetry_manager: Optional["TelemetryManager"] = None,
44
+ step_id: Optional[str] = None,
41
45
  ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
42
46
  """
43
47
  Issues a request to the downstream model endpoint and parses response.
@@ -48,37 +52,51 @@ class LLMClientBase:
48
52
 
49
53
  try:
50
54
  log_event(name="llm_request_sent", attributes=request_data)
51
- if stream:
52
- return self.stream(request_data, llm_config)
53
- else:
54
- response_data = self.request(request_data, llm_config)
55
+ response_data = self.request(request_data, llm_config)
56
+ if step_id and telemetry_manager:
57
+ telemetry_manager.create_provider_trace(
58
+ actor=self.actor,
59
+ provider_trace_create=ProviderTraceCreate(
60
+ request_json=request_data,
61
+ response_json=response_data,
62
+ step_id=step_id,
63
+ organization_id=self.actor.organization_id,
64
+ ),
65
+ )
55
66
  log_event(name="llm_response_received", attributes=response_data)
56
67
  except Exception as e:
57
68
  raise self.handle_llm_error(e)
58
69
 
59
70
  return self.convert_response_to_chat_completion(response_data, messages, llm_config)
60
71
 
72
+ @trace_method
61
73
  async def send_llm_request_async(
62
74
  self,
75
+ request_data: dict,
63
76
  messages: List[Message],
64
77
  llm_config: LLMConfig,
65
- tools: Optional[List[dict]] = None, # TODO: change to Tool object
66
- stream: bool = False,
67
- force_tool_call: Optional[str] = None,
78
+ telemetry_manager: "TelemetryManager | None" = None,
79
+ step_id: str | None = None,
68
80
  ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
69
81
  """
70
82
  Issues a request to the downstream model endpoint.
71
83
  If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
72
84
  Otherwise returns a ChatCompletionResponse.
73
85
  """
74
- request_data = self.build_request_data(messages, llm_config, tools, force_tool_call)
75
86
 
76
87
  try:
77
88
  log_event(name="llm_request_sent", attributes=request_data)
78
- if stream:
79
- return await self.stream_async(request_data, llm_config)
80
- else:
81
- response_data = await self.request_async(request_data, llm_config)
89
+ response_data = await self.request_async(request_data, llm_config)
90
+ await telemetry_manager.create_provider_trace_async(
91
+ actor=self.actor,
92
+ provider_trace_create=ProviderTraceCreate(
93
+ request_json=request_data,
94
+ response_json=response_data,
95
+ step_id=step_id,
96
+ organization_id=self.actor.organization_id,
97
+ ),
98
+ )
99
+
82
100
  log_event(name="llm_response_received", attributes=response_data)
83
101
  except Exception as e:
84
102
  raise self.handle_llm_error(e)
@@ -133,13 +151,6 @@ class LLMClientBase:
133
151
  """
134
152
  raise NotImplementedError
135
153
 
136
- @abstractmethod
137
- def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
138
- """
139
- Performs underlying streaming request to llm and returns raw response.
140
- """
141
- raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
142
-
143
154
  @abstractmethod
144
155
  async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
145
156
  """
letta/llm_api/openai.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import warnings
2
2
  from typing import Generator, List, Optional, Union
3
3
 
4
+ import httpx
4
5
  import requests
5
6
  from openai import OpenAI
6
7
 
@@ -110,6 +111,62 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
110
111
  raise e
111
112
 
112
113
 
114
+ async def openai_get_model_list_async(
115
+ url: str,
116
+ api_key: Optional[str] = None,
117
+ fix_url: bool = False,
118
+ extra_params: Optional[dict] = None,
119
+ client: Optional["httpx.AsyncClient"] = None,
120
+ ) -> dict:
121
+ """https://platform.openai.com/docs/api-reference/models/list"""
122
+ from letta.utils import printd
123
+
124
+ # In some cases we may want to double-check the URL and do basic correction
125
+ if fix_url and not url.endswith("/v1"):
126
+ url = smart_urljoin(url, "v1")
127
+
128
+ url = smart_urljoin(url, "models")
129
+
130
+ headers = {"Content-Type": "application/json"}
131
+ if api_key is not None:
132
+ headers["Authorization"] = f"Bearer {api_key}"
133
+
134
+ printd(f"Sending request to {url}")
135
+
136
+ # Use provided client or create a new one
137
+ close_client = False
138
+ if client is None:
139
+ client = httpx.AsyncClient()
140
+ close_client = True
141
+
142
+ try:
143
+ response = await client.get(url, headers=headers, params=extra_params)
144
+ response.raise_for_status()
145
+ result = response.json()
146
+ printd(f"response = {result}")
147
+ return result
148
+ except httpx.HTTPStatusError as http_err:
149
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
150
+ error_response = None
151
+ try:
152
+ error_response = http_err.response.json()
153
+ except:
154
+ error_response = {"status_code": http_err.response.status_code, "text": http_err.response.text}
155
+ printd(f"Got HTTPError, exception={http_err}, response={error_response}")
156
+ raise http_err
157
+ except httpx.RequestError as req_err:
158
+ # Handle other httpx-related errors (e.g., connection error)
159
+ printd(f"Got RequestException, exception={req_err}")
160
+ raise req_err
161
+ except Exception as e:
162
+ # Handle other potential errors
163
+ printd(f"Got unknown Exception, exception={e}")
164
+ raise e
165
+ finally:
166
+ if close_client:
167
+ await client.aclose()
168
+
169
+
113
170
  def build_openai_chat_completions_request(
114
171
  llm_config: LLMConfig,
115
172
  messages: List[_Message],
@@ -2,7 +2,7 @@ import os
2
2
  from typing import List, Optional
3
3
 
4
4
  import openai
5
- from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
5
+ from openai import AsyncOpenAI, AsyncStream, OpenAI
6
6
  from openai.types.chat.chat_completion import ChatCompletion
7
7
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
8
8
 
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
22
22
  from letta.llm_api.llm_client_base import LLMClientBase
23
23
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
24
24
  from letta.log import get_logger
25
- from letta.schemas.enums import ProviderCategory
25
+ from letta.schemas.enums import ProviderCategory, ProviderType
26
26
  from letta.schemas.llm_config import LLMConfig
27
27
  from letta.schemas.message import Message as PydanticMessage
28
28
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase):
113
113
  from letta.services.provider_manager import ProviderManager
114
114
 
115
115
  api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
116
+ if llm_config.model_endpoint_type == ProviderType.together:
117
+ api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY")
116
118
 
117
119
  if not api_key:
118
120
  api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -254,20 +256,14 @@ class OpenAIClient(LLMClientBase):
254
256
 
255
257
  return chat_completion_response
256
258
 
257
- def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
258
- """
259
- Performs underlying streaming request to OpenAI and returns the stream iterator.
260
- """
261
- client = OpenAI(**self._prepare_client_kwargs(llm_config))
262
- response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
263
- return response_stream
264
-
265
259
  async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
266
260
  """
267
261
  Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
268
262
  """
269
263
  client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config))
270
- response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
264
+ response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
265
+ **request_data, stream=True, stream_options={"include_usage": True}
266
+ )
271
267
  return response_stream
272
268
 
273
269
  def handle_llm_error(self, e: Exception) -> Exception:
letta/memory.py CHANGED
@@ -93,7 +93,6 @@ def summarize_messages(
93
93
  response = llm_client.send_llm_request(
94
94
  messages=message_sequence,
95
95
  llm_config=llm_config_no_inner_thoughts,
96
- stream=False,
97
96
  )
98
97
  else:
99
98
  response = create(
letta/orm/__init__.py CHANGED
@@ -19,6 +19,7 @@ from letta.orm.message import Message
19
19
  from letta.orm.organization import Organization
20
20
  from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
21
21
  from letta.orm.provider import Provider
22
+ from letta.orm.provider_trace import ProviderTrace
22
23
  from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
23
24
  from letta.orm.source import Source
24
25
  from letta.orm.sources_agents import SourcesAgents
letta/orm/enums.py CHANGED
@@ -8,6 +8,7 @@ class ToolType(str, Enum):
8
8
  LETTA_MULTI_AGENT_CORE = "letta_multi_agent_core"
9
9
  LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
10
10
  LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
11
+ LETTA_BUILTIN = "letta_builtin"
11
12
  EXTERNAL_COMPOSIO = "external_composio"
12
13
  EXTERNAL_LANGCHAIN = "external_langchain"
13
14
  # TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?
@@ -0,0 +1,26 @@
1
+ import uuid
2
+
3
+ from sqlalchemy import JSON, Index, String
4
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
5
+
6
+ from letta.orm.mixins import OrganizationMixin
7
+ from letta.orm.sqlalchemy_base import SqlalchemyBase
8
+ from letta.schemas.provider_trace import ProviderTrace as PydanticProviderTrace
9
+
10
+
11
+ class ProviderTrace(SqlalchemyBase, OrganizationMixin):
12
+ """Defines data model for storing provider trace information"""
13
+
14
+ __tablename__ = "provider_traces"
15
+ __pydantic_model__ = PydanticProviderTrace
16
+ __table_args__ = (Index("ix_step_id", "step_id"),)
17
+
18
+ id: Mapped[str] = mapped_column(
19
+ primary_key=True, doc="Unique provider trace identifier", default=lambda: f"provider_trace-{uuid.uuid4()}"
20
+ )
21
+ request_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider request")
22
+ response_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider response")
23
+ step_id: Mapped[str] = mapped_column(String, nullable=True, doc="ID of the step that this trace is associated with")
24
+
25
+ # Relationships
26
+ organization: Mapped["Organization"] = relationship("Organization", lazy="selectin")
letta/orm/step.py CHANGED
@@ -35,6 +35,7 @@ class Step(SqlalchemyBase):
35
35
  )
36
36
  agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
37
37
  provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
38
+ provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
38
39
  model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
39
40
  model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
40
41
  context_window_limit: Mapped[Optional[int]] = mapped_column(
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any, Dict, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from letta.helpers.datetime_helpers import get_utc_time
9
+ from letta.schemas.letta_base import OrmMetadataBase
10
+
11
+
12
+ class BaseProviderTrace(OrmMetadataBase):
13
+ __id_prefix__ = "provider_trace"
14
+
15
+
16
+ class ProviderTraceCreate(BaseModel):
17
+ """Request to create a provider trace"""
18
+
19
+ request_json: dict[str, Any] = Field(..., description="JSON content of the provider request")
20
+ response_json: dict[str, Any] = Field(..., description="JSON content of the provider response")
21
+ step_id: str = Field(None, description="ID of the step that this trace is associated with")
22
+ organization_id: str = Field(..., description="The unique identifier of the organization.")
23
+
24
+
25
+ class ProviderTrace(BaseProviderTrace):
26
+ """
27
+ Letta's internal representation of a provider trace.
28
+
29
+ Attributes:
30
+ id (str): The unique identifier of the provider trace.
31
+ request_json (Dict[str, Any]): JSON content of the provider request.
32
+ response_json (Dict[str, Any]): JSON content of the provider response.
33
+ step_id (str): ID of the step that this trace is associated with.
34
+ organization_id (str): The unique identifier of the organization.
35
+ created_at (datetime): The timestamp when the object was created.
36
+ """
37
+
38
+ id: str = BaseProviderTrace.generate_id_field()
39
+ request_json: Dict[str, Any] = Field(..., description="JSON content of the provider request")
40
+ response_json: Dict[str, Any] = Field(..., description="JSON content of the provider response")
41
+ step_id: Optional[str] = Field(None, description="ID of the step that this trace is associated with")
42
+ organization_id: str = Field(..., description="The unique identifier of the organization.")
43
+ created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")