letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +8 -12
  3. letta/agents/exceptions.py +6 -0
  4. letta/agents/letta_agent.py +48 -35
  5. letta/agents/letta_agent_batch.py +6 -2
  6. letta/agents/voice_agent.py +10 -7
  7. letta/constants.py +5 -1
  8. letta/functions/composio_helpers.py +100 -0
  9. letta/functions/functions.py +4 -2
  10. letta/functions/helpers.py +19 -99
  11. letta/groups/helpers.py +1 -0
  12. letta/groups/sleeptime_multi_agent.py +5 -1
  13. letta/helpers/message_helper.py +21 -4
  14. letta/helpers/tool_execution_helper.py +1 -1
  15. letta/interfaces/anthropic_streaming_interface.py +165 -158
  16. letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
  17. letta/llm_api/anthropic.py +15 -10
  18. letta/llm_api/anthropic_client.py +5 -1
  19. letta/llm_api/google_vertex_client.py +1 -1
  20. letta/llm_api/llm_api_tools.py +7 -0
  21. letta/llm_api/llm_client.py +12 -2
  22. letta/llm_api/llm_client_base.py +4 -0
  23. letta/llm_api/openai.py +9 -3
  24. letta/llm_api/openai_client.py +18 -4
  25. letta/memory.py +3 -1
  26. letta/orm/group.py +2 -0
  27. letta/orm/provider.py +10 -0
  28. letta/schemas/agent.py +0 -1
  29. letta/schemas/enums.py +11 -0
  30. letta/schemas/group.py +24 -0
  31. letta/schemas/llm_config.py +1 -0
  32. letta/schemas/llm_config_overrides.py +2 -2
  33. letta/schemas/providers.py +75 -20
  34. letta/schemas/tool.py +3 -8
  35. letta/server/rest_api/app.py +12 -0
  36. letta/server/rest_api/chat_completions_interface.py +1 -1
  37. letta/server/rest_api/interface.py +8 -10
  38. letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
  39. letta/server/rest_api/routers/v1/agents.py +1 -1
  40. letta/server/rest_api/routers/v1/llms.py +4 -3
  41. letta/server/rest_api/routers/v1/providers.py +4 -1
  42. letta/server/rest_api/routers/v1/voice.py +0 -2
  43. letta/server/rest_api/utils.py +8 -19
  44. letta/server/server.py +25 -11
  45. letta/services/group_manager.py +58 -0
  46. letta/services/provider_manager.py +25 -14
  47. letta/services/summarizer/summarizer.py +15 -7
  48. letta/services/tool_executor/tool_execution_manager.py +1 -1
  49. letta/services/tool_executor/tool_executor.py +3 -3
  50. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
  51. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
  52. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
  53. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
  54. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0
@@ -35,7 +35,7 @@ from letta.schemas.letta_message import (
35
35
  from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
36
36
  from letta.schemas.message import Message
37
37
  from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
38
- from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
38
+ from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
39
39
 
40
40
  logger = get_logger(__name__)
41
41
 
@@ -56,7 +56,7 @@ class AnthropicStreamingInterface:
56
56
  """
57
57
 
58
58
  def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
59
- self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
59
+ self.json_parser: JSONParser = PydanticJSONParser()
60
60
  self.use_assistant_message = use_assistant_message
61
61
 
62
62
  # Premake IDs for database writes
@@ -68,7 +68,7 @@ class AnthropicStreamingInterface:
68
68
  self.accumulated_inner_thoughts = []
69
69
  self.tool_call_id = None
70
70
  self.tool_call_name = None
71
- self.accumulated_tool_call_args = []
71
+ self.accumulated_tool_call_args = ""
72
72
  self.previous_parse = {}
73
73
 
74
74
  # usage trackers
@@ -85,193 +85,200 @@ class AnthropicStreamingInterface:
85
85
 
86
86
  def get_tool_call_object(self) -> ToolCall:
87
87
  """Useful for agent loop"""
88
- return ToolCall(
89
- id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
90
- )
88
+ return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=self.accumulated_tool_call_args, name=self.tool_call_name))
91
89
 
92
90
  def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
93
91
  """
94
92
  Check if inner thoughts are complete in the current tool call arguments
95
93
  by looking for a closing quote after the inner_thoughts field
96
94
  """
97
- if not self.put_inner_thoughts_in_kwarg:
98
- # None of the things should have inner thoughts in kwargs
99
- return True
100
- else:
101
- parsed = self.optimistic_json_parser.parse(combined_args)
102
- # TODO: This will break on tools with 0 input
103
- return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
95
+ try:
96
+ if not self.put_inner_thoughts_in_kwarg:
97
+ # None of the things should have inner thoughts in kwargs
98
+ return True
99
+ else:
100
+ parsed = self.json_parser.parse(combined_args)
101
+ # TODO: This will break on tools with 0 input
102
+ return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
103
+ except Exception as e:
104
+ logger.error("Error checking inner thoughts: %s", e)
105
+ raise
104
106
 
105
107
  async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
106
- async with stream:
107
- async for event in stream:
108
- # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
109
- if isinstance(event, BetaRawContentBlockStartEvent):
110
- content = event.content_block
111
-
112
- if isinstance(content, BetaTextBlock):
113
- self.anthropic_mode = EventMode.TEXT
114
- # TODO: Can capture citations, etc.
115
- elif isinstance(content, BetaToolUseBlock):
116
- self.anthropic_mode = EventMode.TOOL_USE
117
- self.tool_call_id = content.id
118
- self.tool_call_name = content.name
119
- self.inner_thoughts_complete = False
120
-
121
- if not self.use_assistant_message:
122
- # Buffer the initial tool call message instead of yielding immediately
123
- tool_call_msg = ToolCallMessage(
124
- id=self.letta_tool_message_id,
125
- tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
108
+ try:
109
+ async with stream:
110
+ async for event in stream:
111
+ # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
112
+ if isinstance(event, BetaRawContentBlockStartEvent):
113
+ content = event.content_block
114
+
115
+ if isinstance(content, BetaTextBlock):
116
+ self.anthropic_mode = EventMode.TEXT
117
+ # TODO: Can capture citations, etc.
118
+ elif isinstance(content, BetaToolUseBlock):
119
+ self.anthropic_mode = EventMode.TOOL_USE
120
+ self.tool_call_id = content.id
121
+ self.tool_call_name = content.name
122
+ self.inner_thoughts_complete = False
123
+
124
+ if not self.use_assistant_message:
125
+ # Buffer the initial tool call message instead of yielding immediately
126
+ tool_call_msg = ToolCallMessage(
127
+ id=self.letta_tool_message_id,
128
+ tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
129
+ date=datetime.now(timezone.utc).isoformat(),
130
+ )
131
+ self.tool_call_buffer.append(tool_call_msg)
132
+ elif isinstance(content, BetaThinkingBlock):
133
+ self.anthropic_mode = EventMode.THINKING
134
+ # TODO: Can capture signature, etc.
135
+ elif isinstance(content, BetaRedactedThinkingBlock):
136
+ self.anthropic_mode = EventMode.REDACTED_THINKING
137
+
138
+ hidden_reasoning_message = HiddenReasoningMessage(
139
+ id=self.letta_assistant_message_id,
140
+ state="redacted",
141
+ hidden_reasoning=content.data,
126
142
  date=datetime.now(timezone.utc).isoformat(),
127
143
  )
128
- self.tool_call_buffer.append(tool_call_msg)
129
- elif isinstance(content, BetaThinkingBlock):
130
- self.anthropic_mode = EventMode.THINKING
131
- # TODO: Can capture signature, etc.
132
- elif isinstance(content, BetaRedactedThinkingBlock):
133
- self.anthropic_mode = EventMode.REDACTED_THINKING
134
-
135
- hidden_reasoning_message = HiddenReasoningMessage(
136
- id=self.letta_assistant_message_id,
137
- state="redacted",
138
- hidden_reasoning=content.data,
139
- date=datetime.now(timezone.utc).isoformat(),
140
- )
141
- self.reasoning_messages.append(hidden_reasoning_message)
142
- yield hidden_reasoning_message
143
-
144
- elif isinstance(event, BetaRawContentBlockDeltaEvent):
145
- delta = event.delta
146
-
147
- if isinstance(delta, BetaTextDelta):
148
- # Safety check
149
- if not self.anthropic_mode == EventMode.TEXT:
150
- raise RuntimeError(
151
- f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
152
- )
144
+ self.reasoning_messages.append(hidden_reasoning_message)
145
+ yield hidden_reasoning_message
153
146
 
154
- # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
155
- delta.text = delta.text.replace("</thinking>", "")
156
- self.accumulated_inner_thoughts.append(delta.text)
157
-
158
- reasoning_message = ReasoningMessage(
159
- id=self.letta_assistant_message_id,
160
- reasoning=self.accumulated_inner_thoughts[-1],
161
- date=datetime.now(timezone.utc).isoformat(),
162
- )
163
- self.reasoning_messages.append(reasoning_message)
164
- yield reasoning_message
165
-
166
- elif isinstance(delta, BetaInputJSONDelta):
167
- if not self.anthropic_mode == EventMode.TOOL_USE:
168
- raise RuntimeError(
169
- f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
170
- )
147
+ elif isinstance(event, BetaRawContentBlockDeltaEvent):
148
+ delta = event.delta
171
149
 
172
- self.accumulated_tool_call_args.append(delta.partial_json)
173
- combined_args = "".join(self.accumulated_tool_call_args)
174
- current_parsed = self.optimistic_json_parser.parse(combined_args)
150
+ if isinstance(delta, BetaTextDelta):
151
+ # Safety check
152
+ if not self.anthropic_mode == EventMode.TEXT:
153
+ raise RuntimeError(
154
+ f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
155
+ )
175
156
 
176
- # Start detecting a difference in inner thoughts
177
- previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
178
- current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
179
- inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
157
+ # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
158
+ delta.text = delta.text.replace("</thinking>", "")
159
+ self.accumulated_inner_thoughts.append(delta.text)
180
160
 
181
- if inner_thoughts_diff:
182
161
  reasoning_message = ReasoningMessage(
183
162
  id=self.letta_assistant_message_id,
184
- reasoning=inner_thoughts_diff,
163
+ reasoning=self.accumulated_inner_thoughts[-1],
185
164
  date=datetime.now(timezone.utc).isoformat(),
186
165
  )
187
166
  self.reasoning_messages.append(reasoning_message)
188
167
  yield reasoning_message
189
168
 
190
- # Check if inner thoughts are complete - if so, flush the buffer
191
- if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(combined_args):
192
- self.inner_thoughts_complete = True
193
- # Flush all buffered tool call messages
194
- for buffered_msg in self.tool_call_buffer:
195
- yield buffered_msg
196
- self.tool_call_buffer = []
169
+ elif isinstance(delta, BetaInputJSONDelta):
170
+ if not self.anthropic_mode == EventMode.TOOL_USE:
171
+ raise RuntimeError(
172
+ f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
173
+ )
174
+
175
+ self.accumulated_tool_call_args += delta.partial_json
176
+ current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
197
177
 
198
- # Start detecting special case of "send_message"
199
- if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
200
- previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
201
- current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
202
- send_message_diff = current_send_message[len(previous_send_message) :]
178
+ # Start detecting a difference in inner thoughts
179
+ previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
180
+ current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
181
+ inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
203
182
 
204
- # Only stream out if it's not an empty string
205
- if send_message_diff:
206
- yield AssistantMessage(
183
+ if inner_thoughts_diff:
184
+ reasoning_message = ReasoningMessage(
207
185
  id=self.letta_assistant_message_id,
208
- content=[TextContent(text=send_message_diff)],
186
+ reasoning=inner_thoughts_diff,
209
187
  date=datetime.now(timezone.utc).isoformat(),
210
188
  )
211
- else:
212
- # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
213
- tool_call_msg = ToolCallMessage(
214
- id=self.letta_tool_message_id,
215
- tool_call=ToolCallDelta(arguments=delta.partial_json),
216
- date=datetime.now(timezone.utc).isoformat(),
217
- )
218
-
219
- if self.inner_thoughts_complete:
220
- yield tool_call_msg
189
+ self.reasoning_messages.append(reasoning_message)
190
+ yield reasoning_message
191
+
192
+ # Check if inner thoughts are complete - if so, flush the buffer
193
+ if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
194
+ self.inner_thoughts_complete = True
195
+ # Flush all buffered tool call messages
196
+ for buffered_msg in self.tool_call_buffer:
197
+ yield buffered_msg
198
+ self.tool_call_buffer = []
199
+
200
+ # Start detecting special case of "send_message"
201
+ if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
202
+ previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
203
+ current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
204
+ send_message_diff = current_send_message[len(previous_send_message) :]
205
+
206
+ # Only stream out if it's not an empty string
207
+ if send_message_diff:
208
+ yield AssistantMessage(
209
+ id=self.letta_assistant_message_id,
210
+ content=[TextContent(text=send_message_diff)],
211
+ date=datetime.now(timezone.utc).isoformat(),
212
+ )
221
213
  else:
222
- self.tool_call_buffer.append(tool_call_msg)
214
+ # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
215
+ tool_call_msg = ToolCallMessage(
216
+ id=self.letta_tool_message_id,
217
+ tool_call=ToolCallDelta(arguments=delta.partial_json),
218
+ date=datetime.now(timezone.utc).isoformat(),
219
+ )
223
220
 
224
- # Set previous parse
225
- self.previous_parse = current_parsed
226
- elif isinstance(delta, BetaThinkingDelta):
227
- # Safety check
228
- if not self.anthropic_mode == EventMode.THINKING:
229
- raise RuntimeError(
230
- f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
221
+ if self.inner_thoughts_complete:
222
+ yield tool_call_msg
223
+ else:
224
+ self.tool_call_buffer.append(tool_call_msg)
225
+
226
+ # Set previous parse
227
+ self.previous_parse = current_parsed
228
+ elif isinstance(delta, BetaThinkingDelta):
229
+ # Safety check
230
+ if not self.anthropic_mode == EventMode.THINKING:
231
+ raise RuntimeError(
232
+ f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
233
+ )
234
+
235
+ reasoning_message = ReasoningMessage(
236
+ id=self.letta_assistant_message_id,
237
+ source="reasoner_model",
238
+ reasoning=delta.thinking,
239
+ date=datetime.now(timezone.utc).isoformat(),
231
240
  )
241
+ self.reasoning_messages.append(reasoning_message)
242
+ yield reasoning_message
243
+ elif isinstance(delta, BetaSignatureDelta):
244
+ # Safety check
245
+ if not self.anthropic_mode == EventMode.THINKING:
246
+ raise RuntimeError(
247
+ f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
248
+ )
232
249
 
233
- reasoning_message = ReasoningMessage(
234
- id=self.letta_assistant_message_id,
235
- source="reasoner_model",
236
- reasoning=delta.thinking,
237
- date=datetime.now(timezone.utc).isoformat(),
238
- )
239
- self.reasoning_messages.append(reasoning_message)
240
- yield reasoning_message
241
- elif isinstance(delta, BetaSignatureDelta):
242
- # Safety check
243
- if not self.anthropic_mode == EventMode.THINKING:
244
- raise RuntimeError(
245
- f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
250
+ reasoning_message = ReasoningMessage(
251
+ id=self.letta_assistant_message_id,
252
+ source="reasoner_model",
253
+ reasoning="",
254
+ date=datetime.now(timezone.utc).isoformat(),
255
+ signature=delta.signature,
246
256
  )
257
+ self.reasoning_messages.append(reasoning_message)
258
+ yield reasoning_message
259
+ elif isinstance(event, BetaRawMessageStartEvent):
260
+ self.message_id = event.message.id
261
+ self.input_tokens += event.message.usage.input_tokens
262
+ self.output_tokens += event.message.usage.output_tokens
263
+ elif isinstance(event, BetaRawMessageDeltaEvent):
264
+ self.output_tokens += event.usage.output_tokens
265
+ elif isinstance(event, BetaRawMessageStopEvent):
266
+ # Don't do anything here! We don't want to stop the stream.
267
+ pass
268
+ elif isinstance(event, BetaRawContentBlockStopEvent):
269
+ # If we're exiting a tool use block and there are still buffered messages,
270
+ # we should flush them now
271
+ if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
272
+ for buffered_msg in self.tool_call_buffer:
273
+ yield buffered_msg
274
+ self.tool_call_buffer = []
247
275
 
248
- reasoning_message = ReasoningMessage(
249
- id=self.letta_assistant_message_id,
250
- source="reasoner_model",
251
- reasoning="",
252
- date=datetime.now(timezone.utc).isoformat(),
253
- signature=delta.signature,
254
- )
255
- self.reasoning_messages.append(reasoning_message)
256
- yield reasoning_message
257
- elif isinstance(event, BetaRawMessageStartEvent):
258
- self.message_id = event.message.id
259
- self.input_tokens += event.message.usage.input_tokens
260
- self.output_tokens += event.message.usage.output_tokens
261
- elif isinstance(event, BetaRawMessageDeltaEvent):
262
- self.output_tokens += event.usage.output_tokens
263
- elif isinstance(event, BetaRawMessageStopEvent):
264
- # Don't do anything here! We don't want to stop the stream.
265
- pass
266
- elif isinstance(event, BetaRawContentBlockStopEvent):
267
- # If we're exiting a tool use block and there are still buffered messages,
268
- # we should flush them now
269
- if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
270
- for buffered_msg in self.tool_call_buffer:
271
- yield buffered_msg
272
- self.tool_call_buffer = []
273
-
274
- self.anthropic_mode = None
276
+ self.anthropic_mode = None
277
+ except Exception as e:
278
+ logger.error("Error processing stream: %s", e)
279
+ raise
280
+ finally:
281
+ logger.info("AnthropicStreamingInterface: Stream processing complete.")
275
282
 
276
283
  def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
277
284
  def _process_group(
@@ -5,7 +5,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice,
5
5
 
6
6
  from letta.constants import PRE_EXECUTION_MESSAGE_ARG
7
7
  from letta.interfaces.utils import _format_sse_chunk
8
- from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
8
+ from letta.server.rest_api.json_parser import OptimisticJSONParser
9
9
 
10
10
 
11
11
  class OpenAIChatCompletionsStreamingInterface:
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
28
  from letta.log import get_logger
29
+ from letta.schemas.enums import ProviderType
29
30
  from letta.schemas.message import Message as _Message
30
31
  from letta.schemas.message import MessageRole as _MessageRole
31
32
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
@@ -128,11 +129,12 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
128
129
  # NOTE: currently there is no GET /models, so we need to hardcode
129
130
  # return MODEL_LIST
130
131
 
131
- anthropic_override_key = ProviderManager().get_anthropic_override_key()
132
- if anthropic_override_key:
133
- anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
132
+ if api_key:
133
+ anthropic_client = anthropic.Anthropic(api_key=api_key)
134
134
  elif model_settings.anthropic_api_key:
135
135
  anthropic_client = anthropic.Anthropic()
136
+ else:
137
+ raise ValueError("No API key provided")
136
138
 
137
139
  models = anthropic_client.models.list()
138
140
  models_json = models.model_dump()
@@ -738,13 +740,14 @@ def anthropic_chat_completions_request(
738
740
  put_inner_thoughts_in_kwargs: bool = False,
739
741
  extended_thinking: bool = False,
740
742
  max_reasoning_tokens: Optional[int] = None,
743
+ provider_name: Optional[str] = None,
741
744
  betas: List[str] = ["tools-2024-04-04"],
742
745
  ) -> ChatCompletionResponse:
743
746
  """https://docs.anthropic.com/claude/docs/tool-use"""
744
747
  anthropic_client = None
745
- anthropic_override_key = ProviderManager().get_anthropic_override_key()
746
- if anthropic_override_key:
747
- anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
748
+ if provider_name and provider_name != ProviderType.anthropic.value:
749
+ api_key = ProviderManager().get_override_key(provider_name)
750
+ anthropic_client = anthropic.Anthropic(api_key=api_key)
748
751
  elif model_settings.anthropic_api_key:
749
752
  anthropic_client = anthropic.Anthropic()
750
753
  else:
@@ -796,6 +799,7 @@ def anthropic_chat_completions_request_stream(
796
799
  put_inner_thoughts_in_kwargs: bool = False,
797
800
  extended_thinking: bool = False,
798
801
  max_reasoning_tokens: Optional[int] = None,
802
+ provider_name: Optional[str] = None,
799
803
  betas: List[str] = ["tools-2024-04-04"],
800
804
  ) -> Generator[ChatCompletionChunkResponse, None, None]:
801
805
  """Stream chat completions from Anthropic API.
@@ -810,10 +814,9 @@ def anthropic_chat_completions_request_stream(
810
814
  extended_thinking=extended_thinking,
811
815
  max_reasoning_tokens=max_reasoning_tokens,
812
816
  )
813
-
814
- anthropic_override_key = ProviderManager().get_anthropic_override_key()
815
- if anthropic_override_key:
816
- anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
817
+ if provider_name and provider_name != ProviderType.anthropic.value:
818
+ api_key = ProviderManager().get_override_key(provider_name)
819
+ anthropic_client = anthropic.Anthropic(api_key=api_key)
817
820
  elif model_settings.anthropic_api_key:
818
821
  anthropic_client = anthropic.Anthropic()
819
822
 
@@ -860,6 +863,7 @@ def anthropic_chat_completions_process_stream(
860
863
  put_inner_thoughts_in_kwargs: bool = False,
861
864
  extended_thinking: bool = False,
862
865
  max_reasoning_tokens: Optional[int] = None,
866
+ provider_name: Optional[str] = None,
863
867
  create_message_id: bool = True,
864
868
  create_message_datetime: bool = True,
865
869
  betas: List[str] = ["tools-2024-04-04"],
@@ -944,6 +948,7 @@ def anthropic_chat_completions_process_stream(
944
948
  put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
945
949
  extended_thinking=extended_thinking,
946
950
  max_reasoning_tokens=max_reasoning_tokens,
951
+ provider_name=provider_name,
947
952
  betas=betas,
948
953
  )
949
954
  ):
@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
29
29
  from letta.log import get_logger
30
+ from letta.schemas.enums import ProviderType
30
31
  from letta.schemas.llm_config import LLMConfig
31
32
  from letta.schemas.message import Message as PydanticMessage
32
33
  from letta.schemas.openai.chat_completion_request import Tool
@@ -112,7 +113,10 @@ class AnthropicClient(LLMClientBase):
112
113
 
113
114
  @trace_method
114
115
  def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
115
- override_key = ProviderManager().get_anthropic_override_key()
116
+ override_key = None
117
+ if self.provider_name and self.provider_name != ProviderType.anthropic.value:
118
+ override_key = ProviderManager().get_override_key(self.provider_name)
119
+
116
120
  if async_client:
117
121
  return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
118
122
  return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
@@ -63,7 +63,7 @@ class GoogleVertexClient(GoogleAIClient):
63
63
  # Add thinking_config
64
64
  # If enable_reasoner is False, set thinking_budget to 0
65
65
  # Otherwise, use the value from max_reasoning_tokens
66
- thinking_budget = 0 if not self.llm_config.enable_reasoner else self.llm_config.max_reasoning_tokens
66
+ thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
67
67
  thinking_config = ThinkingConfig(
68
68
  thinking_budget=thinking_budget,
69
69
  )
@@ -24,6 +24,7 @@ from letta.llm_api.openai import (
24
24
  from letta.local_llm.chat_completion_proxy import get_chat_completion
25
25
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
26
26
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
27
+ from letta.schemas.enums import ProviderType
27
28
  from letta.schemas.llm_config import LLMConfig
28
29
  from letta.schemas.message import Message
29
30
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
@@ -171,6 +172,10 @@ def create(
171
172
  if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
172
173
  # only is a problem if we are *not* using an openai proxy
173
174
  raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
175
+ elif llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
176
+ from letta.services.provider_manager import ProviderManager
177
+
178
+ api_key = ProviderManager().get_override_key(llm_config.provider_name)
174
179
  elif model_settings.openai_api_key is None:
175
180
  # the openai python client requires a dummy API key
176
181
  api_key = "DUMMY_API_KEY"
@@ -373,6 +378,7 @@ def create(
373
378
  stream_interface=stream_interface,
374
379
  extended_thinking=llm_config.enable_reasoner,
375
380
  max_reasoning_tokens=llm_config.max_reasoning_tokens,
381
+ provider_name=llm_config.provider_name,
376
382
  name=name,
377
383
  )
378
384
 
@@ -383,6 +389,7 @@ def create(
383
389
  put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
384
390
  extended_thinking=llm_config.enable_reasoner,
385
391
  max_reasoning_tokens=llm_config.max_reasoning_tokens,
392
+ provider_name=llm_config.provider_name,
386
393
  )
387
394
 
388
395
  if llm_config.put_inner_thoughts_in_kwargs:
@@ -9,8 +9,10 @@ class LLMClient:
9
9
 
10
10
  @staticmethod
11
11
  def create(
12
- provider: ProviderType,
12
+ provider_type: ProviderType,
13
+ provider_name: Optional[str] = None,
13
14
  put_inner_thoughts_first: bool = True,
15
+ actor_id: Optional[str] = None,
14
16
  ) -> Optional[LLMClientBase]:
15
17
  """
16
18
  Create an LLM client based on the model endpoint type.
@@ -25,30 +27,38 @@ class LLMClient:
25
27
  Raises:
26
28
  ValueError: If the model endpoint type is not supported
27
29
  """
28
- match provider:
30
+ match provider_type:
29
31
  case ProviderType.google_ai:
30
32
  from letta.llm_api.google_ai_client import GoogleAIClient
31
33
 
32
34
  return GoogleAIClient(
35
+ provider_name=provider_name,
33
36
  put_inner_thoughts_first=put_inner_thoughts_first,
37
+ actor_id=actor_id,
34
38
  )
35
39
  case ProviderType.google_vertex:
36
40
  from letta.llm_api.google_vertex_client import GoogleVertexClient
37
41
 
38
42
  return GoogleVertexClient(
43
+ provider_name=provider_name,
39
44
  put_inner_thoughts_first=put_inner_thoughts_first,
45
+ actor_id=actor_id,
40
46
  )
41
47
  case ProviderType.anthropic:
42
48
  from letta.llm_api.anthropic_client import AnthropicClient
43
49
 
44
50
  return AnthropicClient(
51
+ provider_name=provider_name,
45
52
  put_inner_thoughts_first=put_inner_thoughts_first,
53
+ actor_id=actor_id,
46
54
  )
47
55
  case ProviderType.openai:
48
56
  from letta.llm_api.openai_client import OpenAIClient
49
57
 
50
58
  return OpenAIClient(
59
+ provider_name=provider_name,
51
60
  put_inner_thoughts_first=put_inner_thoughts_first,
61
+ actor_id=actor_id,
52
62
  )
53
63
  case _:
54
64
  return None
@@ -20,9 +20,13 @@ class LLMClientBase:
20
20
 
21
21
  def __init__(
22
22
  self,
23
+ provider_name: Optional[str] = None,
23
24
  put_inner_thoughts_first: Optional[bool] = True,
24
25
  use_tool_naming: bool = True,
26
+ actor_id: Optional[str] = None,
25
27
  ):
28
+ self.actor_id = actor_id
29
+ self.provider_name = provider_name
26
30
  self.put_inner_thoughts_first = put_inner_thoughts_first
27
31
  self.use_tool_naming = use_tool_naming
28
32
 
letta/llm_api/openai.py CHANGED
@@ -157,11 +157,17 @@ def build_openai_chat_completions_request(
157
157
  # if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
158
158
  # data.response_format = {"type": "json_object"}
159
159
 
160
+ # always set user id for openai requests
161
+ if user_id:
162
+ data.user = str(user_id)
163
+
160
164
  if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
161
- # override user id for inference.memgpt.ai
162
- import uuid
165
+ if not user_id:
166
+ # override user id for inference.letta.com
167
+ import uuid
168
+
169
+ data.user = str(uuid.UUID(int=0))
163
170
 
164
- data.user = str(uuid.UUID(int=0))
165
171
  data.model = "memgpt-openai"
166
172
 
167
173
  if use_structured_output and data.tools is not None and len(data.tools) > 0: