letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +8 -12
- letta/agents/exceptions.py +6 -0
- letta/agents/letta_agent.py +48 -35
- letta/agents/letta_agent_batch.py +6 -2
- letta/agents/voice_agent.py +10 -7
- letta/constants.py +5 -1
- letta/functions/composio_helpers.py +100 -0
- letta/functions/functions.py +4 -2
- letta/functions/helpers.py +19 -99
- letta/groups/helpers.py +1 -0
- letta/groups/sleeptime_multi_agent.py +5 -1
- letta/helpers/message_helper.py +21 -4
- letta/helpers/tool_execution_helper.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +165 -158
- letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
- letta/llm_api/anthropic.py +15 -10
- letta/llm_api/anthropic_client.py +5 -1
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/llm_api_tools.py +7 -0
- letta/llm_api/llm_client.py +12 -2
- letta/llm_api/llm_client_base.py +4 -0
- letta/llm_api/openai.py +9 -3
- letta/llm_api/openai_client.py +18 -4
- letta/memory.py +3 -1
- letta/orm/group.py +2 -0
- letta/orm/provider.py +10 -0
- letta/schemas/agent.py +0 -1
- letta/schemas/enums.py +11 -0
- letta/schemas/group.py +24 -0
- letta/schemas/llm_config.py +1 -0
- letta/schemas/llm_config_overrides.py +2 -2
- letta/schemas/providers.py +75 -20
- letta/schemas/tool.py +3 -8
- letta/server/rest_api/app.py +12 -0
- letta/server/rest_api/chat_completions_interface.py +1 -1
- letta/server/rest_api/interface.py +8 -10
- letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
- letta/server/rest_api/routers/v1/agents.py +1 -1
- letta/server/rest_api/routers/v1/llms.py +4 -3
- letta/server/rest_api/routers/v1/providers.py +4 -1
- letta/server/rest_api/routers/v1/voice.py +0 -2
- letta/server/rest_api/utils.py +8 -19
- letta/server/server.py +25 -11
- letta/services/group_manager.py +58 -0
- letta/services/provider_manager.py +25 -14
- letta/services/summarizer/summarizer.py +15 -7
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -3
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0
@@ -35,7 +35,7 @@ from letta.schemas.letta_message import (
|
|
35
35
|
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
36
36
|
from letta.schemas.message import Message
|
37
37
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
38
|
-
from letta.server.rest_api.
|
38
|
+
from letta.server.rest_api.json_parser import JSONParser, PydanticJSONParser
|
39
39
|
|
40
40
|
logger = get_logger(__name__)
|
41
41
|
|
@@ -56,7 +56,7 @@ class AnthropicStreamingInterface:
|
|
56
56
|
"""
|
57
57
|
|
58
58
|
def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
|
59
|
-
self.
|
59
|
+
self.json_parser: JSONParser = PydanticJSONParser()
|
60
60
|
self.use_assistant_message = use_assistant_message
|
61
61
|
|
62
62
|
# Premake IDs for database writes
|
@@ -68,7 +68,7 @@ class AnthropicStreamingInterface:
|
|
68
68
|
self.accumulated_inner_thoughts = []
|
69
69
|
self.tool_call_id = None
|
70
70
|
self.tool_call_name = None
|
71
|
-
self.accumulated_tool_call_args =
|
71
|
+
self.accumulated_tool_call_args = ""
|
72
72
|
self.previous_parse = {}
|
73
73
|
|
74
74
|
# usage trackers
|
@@ -85,193 +85,200 @@ class AnthropicStreamingInterface:
|
|
85
85
|
|
86
86
|
def get_tool_call_object(self) -> ToolCall:
|
87
87
|
"""Useful for agent loop"""
|
88
|
-
return ToolCall(
|
89
|
-
id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
|
90
|
-
)
|
88
|
+
return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=self.accumulated_tool_call_args, name=self.tool_call_name))
|
91
89
|
|
92
90
|
def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
|
93
91
|
"""
|
94
92
|
Check if inner thoughts are complete in the current tool call arguments
|
95
93
|
by looking for a closing quote after the inner_thoughts field
|
96
94
|
"""
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
95
|
+
try:
|
96
|
+
if not self.put_inner_thoughts_in_kwarg:
|
97
|
+
# None of the things should have inner thoughts in kwargs
|
98
|
+
return True
|
99
|
+
else:
|
100
|
+
parsed = self.json_parser.parse(combined_args)
|
101
|
+
# TODO: This will break on tools with 0 input
|
102
|
+
return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
|
103
|
+
except Exception as e:
|
104
|
+
logger.error("Error checking inner thoughts: %s", e)
|
105
|
+
raise
|
104
106
|
|
105
107
|
async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
|
106
|
-
|
107
|
-
async
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
108
|
+
try:
|
109
|
+
async with stream:
|
110
|
+
async for event in stream:
|
111
|
+
# TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
|
112
|
+
if isinstance(event, BetaRawContentBlockStartEvent):
|
113
|
+
content = event.content_block
|
114
|
+
|
115
|
+
if isinstance(content, BetaTextBlock):
|
116
|
+
self.anthropic_mode = EventMode.TEXT
|
117
|
+
# TODO: Can capture citations, etc.
|
118
|
+
elif isinstance(content, BetaToolUseBlock):
|
119
|
+
self.anthropic_mode = EventMode.TOOL_USE
|
120
|
+
self.tool_call_id = content.id
|
121
|
+
self.tool_call_name = content.name
|
122
|
+
self.inner_thoughts_complete = False
|
123
|
+
|
124
|
+
if not self.use_assistant_message:
|
125
|
+
# Buffer the initial tool call message instead of yielding immediately
|
126
|
+
tool_call_msg = ToolCallMessage(
|
127
|
+
id=self.letta_tool_message_id,
|
128
|
+
tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
|
129
|
+
date=datetime.now(timezone.utc).isoformat(),
|
130
|
+
)
|
131
|
+
self.tool_call_buffer.append(tool_call_msg)
|
132
|
+
elif isinstance(content, BetaThinkingBlock):
|
133
|
+
self.anthropic_mode = EventMode.THINKING
|
134
|
+
# TODO: Can capture signature, etc.
|
135
|
+
elif isinstance(content, BetaRedactedThinkingBlock):
|
136
|
+
self.anthropic_mode = EventMode.REDACTED_THINKING
|
137
|
+
|
138
|
+
hidden_reasoning_message = HiddenReasoningMessage(
|
139
|
+
id=self.letta_assistant_message_id,
|
140
|
+
state="redacted",
|
141
|
+
hidden_reasoning=content.data,
|
126
142
|
date=datetime.now(timezone.utc).isoformat(),
|
127
143
|
)
|
128
|
-
self.
|
129
|
-
|
130
|
-
self.anthropic_mode = EventMode.THINKING
|
131
|
-
# TODO: Can capture signature, etc.
|
132
|
-
elif isinstance(content, BetaRedactedThinkingBlock):
|
133
|
-
self.anthropic_mode = EventMode.REDACTED_THINKING
|
134
|
-
|
135
|
-
hidden_reasoning_message = HiddenReasoningMessage(
|
136
|
-
id=self.letta_assistant_message_id,
|
137
|
-
state="redacted",
|
138
|
-
hidden_reasoning=content.data,
|
139
|
-
date=datetime.now(timezone.utc).isoformat(),
|
140
|
-
)
|
141
|
-
self.reasoning_messages.append(hidden_reasoning_message)
|
142
|
-
yield hidden_reasoning_message
|
143
|
-
|
144
|
-
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
145
|
-
delta = event.delta
|
146
|
-
|
147
|
-
if isinstance(delta, BetaTextDelta):
|
148
|
-
# Safety check
|
149
|
-
if not self.anthropic_mode == EventMode.TEXT:
|
150
|
-
raise RuntimeError(
|
151
|
-
f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
|
152
|
-
)
|
144
|
+
self.reasoning_messages.append(hidden_reasoning_message)
|
145
|
+
yield hidden_reasoning_message
|
153
146
|
|
154
|
-
|
155
|
-
delta
|
156
|
-
self.accumulated_inner_thoughts.append(delta.text)
|
157
|
-
|
158
|
-
reasoning_message = ReasoningMessage(
|
159
|
-
id=self.letta_assistant_message_id,
|
160
|
-
reasoning=self.accumulated_inner_thoughts[-1],
|
161
|
-
date=datetime.now(timezone.utc).isoformat(),
|
162
|
-
)
|
163
|
-
self.reasoning_messages.append(reasoning_message)
|
164
|
-
yield reasoning_message
|
165
|
-
|
166
|
-
elif isinstance(delta, BetaInputJSONDelta):
|
167
|
-
if not self.anthropic_mode == EventMode.TOOL_USE:
|
168
|
-
raise RuntimeError(
|
169
|
-
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
170
|
-
)
|
147
|
+
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
148
|
+
delta = event.delta
|
171
149
|
|
172
|
-
|
173
|
-
|
174
|
-
|
150
|
+
if isinstance(delta, BetaTextDelta):
|
151
|
+
# Safety check
|
152
|
+
if not self.anthropic_mode == EventMode.TEXT:
|
153
|
+
raise RuntimeError(
|
154
|
+
f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
|
155
|
+
)
|
175
156
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
157
|
+
# TODO: Strip out </thinking> more robustly, this is pretty hacky lol
|
158
|
+
delta.text = delta.text.replace("</thinking>", "")
|
159
|
+
self.accumulated_inner_thoughts.append(delta.text)
|
180
160
|
|
181
|
-
if inner_thoughts_diff:
|
182
161
|
reasoning_message = ReasoningMessage(
|
183
162
|
id=self.letta_assistant_message_id,
|
184
|
-
reasoning=
|
163
|
+
reasoning=self.accumulated_inner_thoughts[-1],
|
185
164
|
date=datetime.now(timezone.utc).isoformat(),
|
186
165
|
)
|
187
166
|
self.reasoning_messages.append(reasoning_message)
|
188
167
|
yield reasoning_message
|
189
168
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
self.
|
169
|
+
elif isinstance(delta, BetaInputJSONDelta):
|
170
|
+
if not self.anthropic_mode == EventMode.TOOL_USE:
|
171
|
+
raise RuntimeError(
|
172
|
+
f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
|
173
|
+
)
|
174
|
+
|
175
|
+
self.accumulated_tool_call_args += delta.partial_json
|
176
|
+
current_parsed = self.json_parser.parse(self.accumulated_tool_call_args)
|
197
177
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
send_message_diff = current_send_message[len(previous_send_message) :]
|
178
|
+
# Start detecting a difference in inner thoughts
|
179
|
+
previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
|
180
|
+
current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
|
181
|
+
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
203
182
|
|
204
|
-
|
205
|
-
|
206
|
-
yield AssistantMessage(
|
183
|
+
if inner_thoughts_diff:
|
184
|
+
reasoning_message = ReasoningMessage(
|
207
185
|
id=self.letta_assistant_message_id,
|
208
|
-
|
186
|
+
reasoning=inner_thoughts_diff,
|
209
187
|
date=datetime.now(timezone.utc).isoformat(),
|
210
188
|
)
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
189
|
+
self.reasoning_messages.append(reasoning_message)
|
190
|
+
yield reasoning_message
|
191
|
+
|
192
|
+
# Check if inner thoughts are complete - if so, flush the buffer
|
193
|
+
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
|
194
|
+
self.inner_thoughts_complete = True
|
195
|
+
# Flush all buffered tool call messages
|
196
|
+
for buffered_msg in self.tool_call_buffer:
|
197
|
+
yield buffered_msg
|
198
|
+
self.tool_call_buffer = []
|
199
|
+
|
200
|
+
# Start detecting special case of "send_message"
|
201
|
+
if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
|
202
|
+
previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
203
|
+
current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
|
204
|
+
send_message_diff = current_send_message[len(previous_send_message) :]
|
205
|
+
|
206
|
+
# Only stream out if it's not an empty string
|
207
|
+
if send_message_diff:
|
208
|
+
yield AssistantMessage(
|
209
|
+
id=self.letta_assistant_message_id,
|
210
|
+
content=[TextContent(text=send_message_diff)],
|
211
|
+
date=datetime.now(timezone.utc).isoformat(),
|
212
|
+
)
|
221
213
|
else:
|
222
|
-
|
214
|
+
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
215
|
+
tool_call_msg = ToolCallMessage(
|
216
|
+
id=self.letta_tool_message_id,
|
217
|
+
tool_call=ToolCallDelta(arguments=delta.partial_json),
|
218
|
+
date=datetime.now(timezone.utc).isoformat(),
|
219
|
+
)
|
223
220
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
221
|
+
if self.inner_thoughts_complete:
|
222
|
+
yield tool_call_msg
|
223
|
+
else:
|
224
|
+
self.tool_call_buffer.append(tool_call_msg)
|
225
|
+
|
226
|
+
# Set previous parse
|
227
|
+
self.previous_parse = current_parsed
|
228
|
+
elif isinstance(delta, BetaThinkingDelta):
|
229
|
+
# Safety check
|
230
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
231
|
+
raise RuntimeError(
|
232
|
+
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
233
|
+
)
|
234
|
+
|
235
|
+
reasoning_message = ReasoningMessage(
|
236
|
+
id=self.letta_assistant_message_id,
|
237
|
+
source="reasoner_model",
|
238
|
+
reasoning=delta.thinking,
|
239
|
+
date=datetime.now(timezone.utc).isoformat(),
|
231
240
|
)
|
241
|
+
self.reasoning_messages.append(reasoning_message)
|
242
|
+
yield reasoning_message
|
243
|
+
elif isinstance(delta, BetaSignatureDelta):
|
244
|
+
# Safety check
|
245
|
+
if not self.anthropic_mode == EventMode.THINKING:
|
246
|
+
raise RuntimeError(
|
247
|
+
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
248
|
+
)
|
232
249
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
self.reasoning_messages.append(reasoning_message)
|
240
|
-
yield reasoning_message
|
241
|
-
elif isinstance(delta, BetaSignatureDelta):
|
242
|
-
# Safety check
|
243
|
-
if not self.anthropic_mode == EventMode.THINKING:
|
244
|
-
raise RuntimeError(
|
245
|
-
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
250
|
+
reasoning_message = ReasoningMessage(
|
251
|
+
id=self.letta_assistant_message_id,
|
252
|
+
source="reasoner_model",
|
253
|
+
reasoning="",
|
254
|
+
date=datetime.now(timezone.utc).isoformat(),
|
255
|
+
signature=delta.signature,
|
246
256
|
)
|
257
|
+
self.reasoning_messages.append(reasoning_message)
|
258
|
+
yield reasoning_message
|
259
|
+
elif isinstance(event, BetaRawMessageStartEvent):
|
260
|
+
self.message_id = event.message.id
|
261
|
+
self.input_tokens += event.message.usage.input_tokens
|
262
|
+
self.output_tokens += event.message.usage.output_tokens
|
263
|
+
elif isinstance(event, BetaRawMessageDeltaEvent):
|
264
|
+
self.output_tokens += event.usage.output_tokens
|
265
|
+
elif isinstance(event, BetaRawMessageStopEvent):
|
266
|
+
# Don't do anything here! We don't want to stop the stream.
|
267
|
+
pass
|
268
|
+
elif isinstance(event, BetaRawContentBlockStopEvent):
|
269
|
+
# If we're exiting a tool use block and there are still buffered messages,
|
270
|
+
# we should flush them now
|
271
|
+
if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
|
272
|
+
for buffered_msg in self.tool_call_buffer:
|
273
|
+
yield buffered_msg
|
274
|
+
self.tool_call_buffer = []
|
247
275
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
)
|
255
|
-
self.reasoning_messages.append(reasoning_message)
|
256
|
-
yield reasoning_message
|
257
|
-
elif isinstance(event, BetaRawMessageStartEvent):
|
258
|
-
self.message_id = event.message.id
|
259
|
-
self.input_tokens += event.message.usage.input_tokens
|
260
|
-
self.output_tokens += event.message.usage.output_tokens
|
261
|
-
elif isinstance(event, BetaRawMessageDeltaEvent):
|
262
|
-
self.output_tokens += event.usage.output_tokens
|
263
|
-
elif isinstance(event, BetaRawMessageStopEvent):
|
264
|
-
# Don't do anything here! We don't want to stop the stream.
|
265
|
-
pass
|
266
|
-
elif isinstance(event, BetaRawContentBlockStopEvent):
|
267
|
-
# If we're exiting a tool use block and there are still buffered messages,
|
268
|
-
# we should flush them now
|
269
|
-
if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
|
270
|
-
for buffered_msg in self.tool_call_buffer:
|
271
|
-
yield buffered_msg
|
272
|
-
self.tool_call_buffer = []
|
273
|
-
|
274
|
-
self.anthropic_mode = None
|
276
|
+
self.anthropic_mode = None
|
277
|
+
except Exception as e:
|
278
|
+
logger.error("Error processing stream: %s", e)
|
279
|
+
raise
|
280
|
+
finally:
|
281
|
+
logger.info("AnthropicStreamingInterface: Stream processing complete.")
|
275
282
|
|
276
283
|
def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
|
277
284
|
def _process_group(
|
@@ -5,7 +5,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice,
|
|
5
5
|
|
6
6
|
from letta.constants import PRE_EXECUTION_MESSAGE_ARG
|
7
7
|
from letta.interfaces.utils import _format_sse_chunk
|
8
|
-
from letta.server.rest_api.
|
8
|
+
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
9
9
|
|
10
10
|
|
11
11
|
class OpenAIChatCompletionsStreamingInterface:
|
letta/llm_api/anthropic.py
CHANGED
@@ -26,6 +26,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
28
|
from letta.log import get_logger
|
29
|
+
from letta.schemas.enums import ProviderType
|
29
30
|
from letta.schemas.message import Message as _Message
|
30
31
|
from letta.schemas.message import MessageRole as _MessageRole
|
31
32
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
@@ -128,11 +129,12 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
|
|
128
129
|
# NOTE: currently there is no GET /models, so we need to hardcode
|
129
130
|
# return MODEL_LIST
|
130
131
|
|
131
|
-
|
132
|
-
|
133
|
-
anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
|
132
|
+
if api_key:
|
133
|
+
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
134
134
|
elif model_settings.anthropic_api_key:
|
135
135
|
anthropic_client = anthropic.Anthropic()
|
136
|
+
else:
|
137
|
+
raise ValueError("No API key provided")
|
136
138
|
|
137
139
|
models = anthropic_client.models.list()
|
138
140
|
models_json = models.model_dump()
|
@@ -738,13 +740,14 @@ def anthropic_chat_completions_request(
|
|
738
740
|
put_inner_thoughts_in_kwargs: bool = False,
|
739
741
|
extended_thinking: bool = False,
|
740
742
|
max_reasoning_tokens: Optional[int] = None,
|
743
|
+
provider_name: Optional[str] = None,
|
741
744
|
betas: List[str] = ["tools-2024-04-04"],
|
742
745
|
) -> ChatCompletionResponse:
|
743
746
|
"""https://docs.anthropic.com/claude/docs/tool-use"""
|
744
747
|
anthropic_client = None
|
745
|
-
|
746
|
-
|
747
|
-
anthropic_client = anthropic.Anthropic(api_key=
|
748
|
+
if provider_name and provider_name != ProviderType.anthropic.value:
|
749
|
+
api_key = ProviderManager().get_override_key(provider_name)
|
750
|
+
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
748
751
|
elif model_settings.anthropic_api_key:
|
749
752
|
anthropic_client = anthropic.Anthropic()
|
750
753
|
else:
|
@@ -796,6 +799,7 @@ def anthropic_chat_completions_request_stream(
|
|
796
799
|
put_inner_thoughts_in_kwargs: bool = False,
|
797
800
|
extended_thinking: bool = False,
|
798
801
|
max_reasoning_tokens: Optional[int] = None,
|
802
|
+
provider_name: Optional[str] = None,
|
799
803
|
betas: List[str] = ["tools-2024-04-04"],
|
800
804
|
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
801
805
|
"""Stream chat completions from Anthropic API.
|
@@ -810,10 +814,9 @@ def anthropic_chat_completions_request_stream(
|
|
810
814
|
extended_thinking=extended_thinking,
|
811
815
|
max_reasoning_tokens=max_reasoning_tokens,
|
812
816
|
)
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
|
817
|
+
if provider_name and provider_name != ProviderType.anthropic.value:
|
818
|
+
api_key = ProviderManager().get_override_key(provider_name)
|
819
|
+
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
817
820
|
elif model_settings.anthropic_api_key:
|
818
821
|
anthropic_client = anthropic.Anthropic()
|
819
822
|
|
@@ -860,6 +863,7 @@ def anthropic_chat_completions_process_stream(
|
|
860
863
|
put_inner_thoughts_in_kwargs: bool = False,
|
861
864
|
extended_thinking: bool = False,
|
862
865
|
max_reasoning_tokens: Optional[int] = None,
|
866
|
+
provider_name: Optional[str] = None,
|
863
867
|
create_message_id: bool = True,
|
864
868
|
create_message_datetime: bool = True,
|
865
869
|
betas: List[str] = ["tools-2024-04-04"],
|
@@ -944,6 +948,7 @@ def anthropic_chat_completions_process_stream(
|
|
944
948
|
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
945
949
|
extended_thinking=extended_thinking,
|
946
950
|
max_reasoning_tokens=max_reasoning_tokens,
|
951
|
+
provider_name=provider_name,
|
947
952
|
betas=betas,
|
948
953
|
)
|
949
954
|
):
|
@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
|
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
29
29
|
from letta.log import get_logger
|
30
|
+
from letta.schemas.enums import ProviderType
|
30
31
|
from letta.schemas.llm_config import LLMConfig
|
31
32
|
from letta.schemas.message import Message as PydanticMessage
|
32
33
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -112,7 +113,10 @@ class AnthropicClient(LLMClientBase):
|
|
112
113
|
|
113
114
|
@trace_method
|
114
115
|
def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
115
|
-
override_key =
|
116
|
+
override_key = None
|
117
|
+
if self.provider_name and self.provider_name != ProviderType.anthropic.value:
|
118
|
+
override_key = ProviderManager().get_override_key(self.provider_name)
|
119
|
+
|
116
120
|
if async_client:
|
117
121
|
return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
|
118
122
|
return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
|
@@ -63,7 +63,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
63
63
|
# Add thinking_config
|
64
64
|
# If enable_reasoner is False, set thinking_budget to 0
|
65
65
|
# Otherwise, use the value from max_reasoning_tokens
|
66
|
-
thinking_budget = 0 if not
|
66
|
+
thinking_budget = 0 if not llm_config.enable_reasoner else llm_config.max_reasoning_tokens
|
67
67
|
thinking_config = ThinkingConfig(
|
68
68
|
thinking_budget=thinking_budget,
|
69
69
|
)
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -24,6 +24,7 @@ from letta.llm_api.openai import (
|
|
24
24
|
from letta.local_llm.chat_completion_proxy import get_chat_completion
|
25
25
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
26
26
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
27
|
+
from letta.schemas.enums import ProviderType
|
27
28
|
from letta.schemas.llm_config import LLMConfig
|
28
29
|
from letta.schemas.message import Message
|
29
30
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
|
@@ -171,6 +172,10 @@ def create(
|
|
171
172
|
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
172
173
|
# only is a problem if we are *not* using an openai proxy
|
173
174
|
raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
|
175
|
+
elif llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
|
176
|
+
from letta.services.provider_manager import ProviderManager
|
177
|
+
|
178
|
+
api_key = ProviderManager().get_override_key(llm_config.provider_name)
|
174
179
|
elif model_settings.openai_api_key is None:
|
175
180
|
# the openai python client requires a dummy API key
|
176
181
|
api_key = "DUMMY_API_KEY"
|
@@ -373,6 +378,7 @@ def create(
|
|
373
378
|
stream_interface=stream_interface,
|
374
379
|
extended_thinking=llm_config.enable_reasoner,
|
375
380
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
381
|
+
provider_name=llm_config.provider_name,
|
376
382
|
name=name,
|
377
383
|
)
|
378
384
|
|
@@ -383,6 +389,7 @@ def create(
|
|
383
389
|
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
384
390
|
extended_thinking=llm_config.enable_reasoner,
|
385
391
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
392
|
+
provider_name=llm_config.provider_name,
|
386
393
|
)
|
387
394
|
|
388
395
|
if llm_config.put_inner_thoughts_in_kwargs:
|
letta/llm_api/llm_client.py
CHANGED
@@ -9,8 +9,10 @@ class LLMClient:
|
|
9
9
|
|
10
10
|
@staticmethod
|
11
11
|
def create(
|
12
|
-
|
12
|
+
provider_type: ProviderType,
|
13
|
+
provider_name: Optional[str] = None,
|
13
14
|
put_inner_thoughts_first: bool = True,
|
15
|
+
actor_id: Optional[str] = None,
|
14
16
|
) -> Optional[LLMClientBase]:
|
15
17
|
"""
|
16
18
|
Create an LLM client based on the model endpoint type.
|
@@ -25,30 +27,38 @@ class LLMClient:
|
|
25
27
|
Raises:
|
26
28
|
ValueError: If the model endpoint type is not supported
|
27
29
|
"""
|
28
|
-
match
|
30
|
+
match provider_type:
|
29
31
|
case ProviderType.google_ai:
|
30
32
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
31
33
|
|
32
34
|
return GoogleAIClient(
|
35
|
+
provider_name=provider_name,
|
33
36
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
37
|
+
actor_id=actor_id,
|
34
38
|
)
|
35
39
|
case ProviderType.google_vertex:
|
36
40
|
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
37
41
|
|
38
42
|
return GoogleVertexClient(
|
43
|
+
provider_name=provider_name,
|
39
44
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
45
|
+
actor_id=actor_id,
|
40
46
|
)
|
41
47
|
case ProviderType.anthropic:
|
42
48
|
from letta.llm_api.anthropic_client import AnthropicClient
|
43
49
|
|
44
50
|
return AnthropicClient(
|
51
|
+
provider_name=provider_name,
|
45
52
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
53
|
+
actor_id=actor_id,
|
46
54
|
)
|
47
55
|
case ProviderType.openai:
|
48
56
|
from letta.llm_api.openai_client import OpenAIClient
|
49
57
|
|
50
58
|
return OpenAIClient(
|
59
|
+
provider_name=provider_name,
|
51
60
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
61
|
+
actor_id=actor_id,
|
52
62
|
)
|
53
63
|
case _:
|
54
64
|
return None
|
letta/llm_api/llm_client_base.py
CHANGED
@@ -20,9 +20,13 @@ class LLMClientBase:
|
|
20
20
|
|
21
21
|
def __init__(
|
22
22
|
self,
|
23
|
+
provider_name: Optional[str] = None,
|
23
24
|
put_inner_thoughts_first: Optional[bool] = True,
|
24
25
|
use_tool_naming: bool = True,
|
26
|
+
actor_id: Optional[str] = None,
|
25
27
|
):
|
28
|
+
self.actor_id = actor_id
|
29
|
+
self.provider_name = provider_name
|
26
30
|
self.put_inner_thoughts_first = put_inner_thoughts_first
|
27
31
|
self.use_tool_naming = use_tool_naming
|
28
32
|
|
letta/llm_api/openai.py
CHANGED
@@ -157,11 +157,17 @@ def build_openai_chat_completions_request(
|
|
157
157
|
# if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
158
158
|
# data.response_format = {"type": "json_object"}
|
159
159
|
|
160
|
+
# always set user id for openai requests
|
161
|
+
if user_id:
|
162
|
+
data.user = str(user_id)
|
163
|
+
|
160
164
|
if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
|
161
|
-
|
162
|
-
|
165
|
+
if not user_id:
|
166
|
+
# override user id for inference.letta.com
|
167
|
+
import uuid
|
168
|
+
|
169
|
+
data.user = str(uuid.UUID(int=0))
|
163
170
|
|
164
|
-
data.user = str(uuid.UUID(int=0))
|
165
171
|
data.model = "memgpt-openai"
|
166
172
|
|
167
173
|
if use_structured_output and data.tools is not None and len(data.tools) > 0:
|