letta-nightly 0.6.44.dev20250325104221__py3-none-any.whl → 0.6.45.dev20250327035218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.6.44"
1
+ __version__ = "0.6.45"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -760,6 +760,7 @@ class Agent(BaseAgent):
760
760
  break
761
761
 
762
762
  if self.agent_state.message_buffer_autoclear:
763
+ self.logger.info("Autoclearing message buffer")
763
764
  self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
764
765
 
765
766
  return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count, steps_messages=steps_messages)
@@ -934,8 +935,6 @@ class Agent(BaseAgent):
934
935
  )
935
936
  raise e
936
937
 
937
- summarize_attempt_count += 1
938
-
939
938
  if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
940
939
  logger.warning(
941
940
  f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
@@ -352,7 +352,7 @@ def convert_anthropic_response_to_chatcompletion(
352
352
  redacted_reasoning_content = None
353
353
  tool_calls = None
354
354
 
355
- if len(response.content) > 1:
355
+ if len(response.content) > 0:
356
356
  for content_part in response.content:
357
357
  if content_part.type == "text":
358
358
  content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
@@ -743,6 +743,8 @@ def anthropic_chat_completions_request(
743
743
  anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
744
744
  elif model_settings.anthropic_api_key:
745
745
  anthropic_client = anthropic.Anthropic()
746
+ else:
747
+ raise ValueError("No available Anthropic API key")
746
748
  data = _prepare_anthropic_request(
747
749
  data=data,
748
750
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
@@ -0,0 +1,487 @@
1
+ import json
2
+ import re
3
+ from typing import List, Optional, Union
4
+
5
+ import anthropic
6
+ from anthropic.types import Message as AnthropicMessage
7
+
8
+ from letta.helpers.datetime_helpers import get_utc_time
9
+ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
10
+ from letta.llm_api.llm_api_tools import cast_message_to_subtype
11
+ from letta.llm_api.llm_client_base import LLMClientBase
12
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
13
+ from letta.log import get_logger
14
+ from letta.schemas.message import Message as PydanticMessage
15
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
16
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
17
+ from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
18
+ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
19
+ from letta.services.provider_manager import ProviderManager
20
+
21
+ DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class AnthropicClient(LLMClientBase):
27
+
28
+ def request(self, request_data: dict) -> dict:
29
+ try:
30
+ client = self._get_anthropic_client(async_client=False)
31
+ response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
32
+ return response.model_dump()
33
+ except Exception as e:
34
+ self._handle_anthropic_error(e)
35
+
36
+ async def request_async(self, request_data: dict) -> dict:
37
+ try:
38
+ client = self._get_anthropic_client(async_client=True)
39
+ response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
40
+ return response.model_dump()
41
+ except Exception as e:
42
+ self._handle_anthropic_error(e)
43
+
44
+ def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
45
+ override_key = ProviderManager().get_anthropic_override_key()
46
+ if async_client:
47
+ return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
48
+ return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
49
+
50
+ def _handle_anthropic_error(self, e: Exception):
51
+ if isinstance(e, anthropic.APIConnectionError):
52
+ logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
53
+ elif isinstance(e, anthropic.RateLimitError):
54
+ logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
55
+ elif isinstance(e, anthropic.APIStatusError):
56
+ logger.warning(f"[Anthropic] API status error: {e.status_code}, {e.response}")
57
+ raise e
58
+
59
+ def build_request_data(
60
+ self,
61
+ messages: List[PydanticMessage],
62
+ tools: List[dict],
63
+ tool_call: Optional[str],
64
+ force_tool_call: Optional[str] = None,
65
+ ) -> dict:
66
+ if not self.use_tool_naming:
67
+ raise NotImplementedError("Only tool calling supported on Anthropic API requests")
68
+
69
+ if tools is None:
70
+ # Special case for summarization path
71
+ available_tools = None
72
+ tool_choice = None
73
+ elif force_tool_call is not None:
74
+ assert tools is not None
75
+ tool_choice = {"type": "tool", "name": force_tool_call}
76
+ available_tools = [{"type": "function", "function": f} for f in tools if f["name"] == force_tool_call]
77
+
78
+ # need to have this setting to be able to put inner thoughts in kwargs
79
+ self.llm_config.put_inner_thoughts_in_kwargs = True
80
+ else:
81
+ if self.llm_config.put_inner_thoughts_in_kwargs:
82
+ # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
83
+ tool_choice = {"type": "any", "disable_parallel_tool_use": True}
84
+ else:
85
+ tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
86
+ available_tools = [{"type": "function", "function": f} for f in tools]
87
+
88
+ chat_completion_request = ChatCompletionRequest(
89
+ model=self.llm_config.model,
90
+ messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
91
+ tools=available_tools,
92
+ tool_choice=tool_choice,
93
+ max_tokens=self.llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
94
+ temperature=self.llm_config.temperature,
95
+ )
96
+
97
+ return _prepare_anthropic_request(
98
+ data=chat_completion_request,
99
+ put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
100
+ extended_thinking=self.llm_config.enable_reasoner,
101
+ max_reasoning_tokens=self.llm_config.max_reasoning_tokens,
102
+ )
103
+
104
+ def convert_response_to_chat_completion(
105
+ self,
106
+ response_data: dict,
107
+ input_messages: List[PydanticMessage],
108
+ ) -> ChatCompletionResponse:
109
+ """
110
+ Example response from Claude 3:
111
+ response.json = {
112
+ 'id': 'msg_01W1xg9hdRzbeN2CfZM7zD2w',
113
+ 'type': 'message',
114
+ 'role': 'assistant',
115
+ 'content': [
116
+ {
117
+ 'type': 'text',
118
+ 'text': "<thinking>Analyzing user login event. This is Chad's first
119
+ interaction with me. I will adjust my personality and rapport accordingly.</thinking>"
120
+ },
121
+ {
122
+ 'type':
123
+ 'tool_use',
124
+ 'id': 'toolu_01Ka4AuCmfvxiidnBZuNfP1u',
125
+ 'name': 'core_memory_append',
126
+ 'input': {
127
+ 'name': 'human',
128
+ 'content': 'Chad is logging in for the first time. I will aim to build a warm
129
+ and welcoming rapport.',
130
+ 'request_heartbeat': True
131
+ }
132
+ }
133
+ ],
134
+ 'model': 'claude-3-haiku-20240307',
135
+ 'stop_reason': 'tool_use',
136
+ 'stop_sequence': None,
137
+ 'usage': {
138
+ 'input_tokens': 3305,
139
+ 'output_tokens': 141
140
+ }
141
+ }
142
+ """
143
+ response = AnthropicMessage(**response_data)
144
+ prompt_tokens = response.usage.input_tokens
145
+ completion_tokens = response.usage.output_tokens
146
+ finish_reason = remap_finish_reason(response.stop_reason)
147
+
148
+ content = None
149
+ reasoning_content = None
150
+ reasoning_content_signature = None
151
+ redacted_reasoning_content = None
152
+ tool_calls = None
153
+
154
+ if len(response.content) > 0:
155
+ for content_part in response.content:
156
+ if content_part.type == "text":
157
+ content = strip_xml_tags(string=content_part.text, tag="thinking")
158
+ if content_part.type == "tool_use":
159
+ tool_calls = [
160
+ ToolCall(
161
+ id=content_part.id,
162
+ type="function",
163
+ function=FunctionCall(
164
+ name=content_part.name,
165
+ arguments=json.dumps(content_part.input, indent=2),
166
+ ),
167
+ )
168
+ ]
169
+ if content_part.type == "thinking":
170
+ reasoning_content = content_part.thinking
171
+ reasoning_content_signature = content_part.signature
172
+ if content_part.type == "redacted_thinking":
173
+ redacted_reasoning_content = content_part.data
174
+
175
+ else:
176
+ raise RuntimeError("Unexpected empty content in response")
177
+
178
+ assert response.role == "assistant"
179
+ choice = Choice(
180
+ index=0,
181
+ finish_reason=finish_reason,
182
+ message=ChoiceMessage(
183
+ role=response.role,
184
+ content=content,
185
+ reasoning_content=reasoning_content,
186
+ reasoning_content_signature=reasoning_content_signature,
187
+ redacted_reasoning_content=redacted_reasoning_content,
188
+ tool_calls=tool_calls,
189
+ ),
190
+ )
191
+
192
+ chat_completion_response = ChatCompletionResponse(
193
+ id=response.id,
194
+ choices=[choice],
195
+ created=get_utc_time(),
196
+ model=response.model,
197
+ usage=UsageStatistics(
198
+ prompt_tokens=prompt_tokens,
199
+ completion_tokens=completion_tokens,
200
+ total_tokens=prompt_tokens + completion_tokens,
201
+ ),
202
+ )
203
+ if self.llm_config.put_inner_thoughts_in_kwargs:
204
+ chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
205
+ response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
206
+ )
207
+
208
+ return chat_completion_response
209
+
210
+
211
+ def _prepare_anthropic_request(
212
+ data: ChatCompletionRequest,
213
+ inner_thoughts_xml_tag: Optional[str] = "thinking",
214
+ # if true, prefix fill the generation with the thinking tag
215
+ prefix_fill: bool = True,
216
+ # if true, put COT inside the tool calls instead of inside the content
217
+ put_inner_thoughts_in_kwargs: bool = False,
218
+ bedrock: bool = False,
219
+ # extended thinking related fields
220
+ # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
221
+ extended_thinking: bool = False,
222
+ max_reasoning_tokens: Optional[int] = None,
223
+ ) -> dict:
224
+ """Prepare the request data for Anthropic API format."""
225
+ if extended_thinking:
226
+ assert (
227
+ max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
228
+ ), "max tokens must be greater than thinking budget"
229
+ assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
230
+ # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
231
+ # Silently disable prefix_fill for now
232
+ prefix_fill = False
233
+
234
+ # if needed, put inner thoughts as a kwarg for all tools
235
+ if data.tools and put_inner_thoughts_in_kwargs:
236
+ functions = add_inner_thoughts_to_functions(
237
+ functions=[t.function.model_dump() for t in data.tools],
238
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
239
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
240
+ )
241
+ data.tools = [Tool(function=f) for f in functions]
242
+
243
+ # convert the tools to Anthropic's payload format
244
+ anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
245
+
246
+ # pydantic -> dict
247
+ data = data.model_dump(exclude_none=True)
248
+
249
+ if extended_thinking:
250
+ data["thinking"] = {
251
+ "type": "enabled",
252
+ "budget_tokens": max_reasoning_tokens,
253
+ }
254
+ # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
255
+ data["temperature"] = 1.0
256
+
257
+ if "functions" in data:
258
+ raise ValueError(f"'functions' unexpected in Anthropic API payload")
259
+
260
+ # Handle tools
261
+ if "tools" in data and data["tools"] is None:
262
+ data.pop("tools")
263
+ data.pop("tool_choice", None)
264
+ elif anthropic_tools is not None:
265
+ # TODO eventually enable parallel tool use
266
+ data["tools"] = anthropic_tools
267
+
268
+ # Move 'system' to the top level
269
+ assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
270
+ data["system"] = data["messages"][0]["content"]
271
+ data["messages"] = data["messages"][1:]
272
+
273
+ # Process messages
274
+ for message in data["messages"]:
275
+ if "content" not in message:
276
+ message["content"] = None
277
+
278
+ # Convert to Anthropic format
279
+ msg_objs = [
280
+ PydanticMessage.dict_to_message(
281
+ user_id=None,
282
+ agent_id=None,
283
+ openai_message_dict=m,
284
+ )
285
+ for m in data["messages"]
286
+ ]
287
+ data["messages"] = [
288
+ m.to_anthropic_dict(
289
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
290
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
291
+ )
292
+ for m in msg_objs
293
+ ]
294
+
295
+ # Ensure first message is user
296
+ if data["messages"][0]["role"] != "user":
297
+ data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
298
+
299
+ # Handle alternating messages
300
+ data["messages"] = merge_tool_results_into_user_messages(data["messages"])
301
+
302
+ # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
303
+ # https://docs.anthropic.com/en/api/messages#body-messages
304
+ # NOTE: cannot prefill with tools for opus:
305
+ # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
306
+ if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
307
+ if not bedrock: # not support for bedrock
308
+ data["messages"].append(
309
+ # Start the thinking process for the assistant
310
+ {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
311
+ )
312
+
313
+ # Validate max_tokens
314
+ assert "max_tokens" in data, data
315
+
316
+ # Remove OpenAI-specific fields
317
+ for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
318
+ data.pop(field, None)
319
+
320
+ return data
321
+
322
+
323
+ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
324
+ """See: https://docs.anthropic.com/claude/docs/tool-use
325
+
326
+ OpenAI style:
327
+ "tools": [{
328
+ "type": "function",
329
+ "function": {
330
+ "name": "find_movies",
331
+ "description": "find ....",
332
+ "parameters": {
333
+ "type": "object",
334
+ "properties": {
335
+ PARAM: {
336
+ "type": PARAM_TYPE, # eg "string"
337
+ "description": PARAM_DESCRIPTION,
338
+ },
339
+ ...
340
+ },
341
+ "required": List[str],
342
+ }
343
+ }
344
+ }
345
+ ]
346
+
347
+ Anthropic style:
348
+ "tools": [{
349
+ "name": "find_movies",
350
+ "description": "find ....",
351
+ "input_schema": {
352
+ "type": "object",
353
+ "properties": {
354
+ PARAM: {
355
+ "type": PARAM_TYPE, # eg "string"
356
+ "description": PARAM_DESCRIPTION,
357
+ },
358
+ ...
359
+ },
360
+ "required": List[str],
361
+ }
362
+ }
363
+ ]
364
+
365
+ Two small differences:
366
+ - 1 level less of nesting
367
+ - "parameters" -> "input_schema"
368
+ """
369
+ formatted_tools = []
370
+ for tool in tools:
371
+ formatted_tool = {
372
+ "name": tool.function.name,
373
+ "description": tool.function.description,
374
+ "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
375
+ }
376
+ formatted_tools.append(formatted_tool)
377
+
378
+ return formatted_tools
379
+
380
+
381
+ def merge_tool_results_into_user_messages(messages: List[dict]):
382
+ """Anthropic API doesn't allow role 'tool'->'user' sequences
383
+
384
+ Example HTTP error:
385
+ messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
386
+
387
+ From: https://docs.anthropic.com/claude/docs/tool-use
388
+ You may be familiar with other APIs that return tool use as separate from the model's primary output,
389
+ or which use a special-purpose tool or function message role.
390
+ In contrast, Anthropic's models and API are built around alternating user and assistant messages,
391
+ where each message is an array of rich content blocks: text, image, tool_use, and tool_result.
392
+ """
393
+
394
+ # TODO walk through the messages list
395
+ # When a dict (dict_A) with 'role' == 'user' is followed by a dict with 'role' == 'user' (dict B), do the following
396
+ # dict_A["content"] = dict_A["content"] + dict_B["content"]
397
+
398
+ # The result should be a new merged_messages list that doesn't have any back-to-back dicts with 'role' == 'user'
399
+ merged_messages = []
400
+ if not messages:
401
+ return merged_messages
402
+
403
+ # Start with the first message in the list
404
+ current_message = messages[0]
405
+
406
+ for next_message in messages[1:]:
407
+ if current_message["role"] == "user" and next_message["role"] == "user":
408
+ # Merge contents of the next user message into current one
409
+ current_content = (
410
+ current_message["content"]
411
+ if isinstance(current_message["content"], list)
412
+ else [{"type": "text", "text": current_message["content"]}]
413
+ )
414
+ next_content = (
415
+ next_message["content"]
416
+ if isinstance(next_message["content"], list)
417
+ else [{"type": "text", "text": next_message["content"]}]
418
+ )
419
+ merged_content = current_content + next_content
420
+ current_message["content"] = merged_content
421
+ else:
422
+ # Append the current message to result as it's complete
423
+ merged_messages.append(current_message)
424
+ # Move on to the next message
425
+ current_message = next_message
426
+
427
+ # Append the last processed message to the result
428
+ merged_messages.append(current_message)
429
+
430
+ return merged_messages
431
+
432
+
433
+ def remap_finish_reason(stop_reason: str) -> str:
434
+ """Remap Anthropic's 'stop_reason' to OpenAI 'finish_reason'
435
+
436
+ OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
437
+ see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
438
+
439
+ From: https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#stop-reason
440
+
441
+ Messages have a stop_reason of one of the following values:
442
+ "end_turn": The conversational turn ended naturally.
443
+ "stop_sequence": One of your specified custom stop sequences was generated.
444
+ "max_tokens": (unchanged)
445
+
446
+ """
447
+ if stop_reason == "end_turn":
448
+ return "stop"
449
+ elif stop_reason == "stop_sequence":
450
+ return "stop"
451
+ elif stop_reason == "max_tokens":
452
+ return "length"
453
+ elif stop_reason == "tool_use":
454
+ return "function_call"
455
+ else:
456
+ raise ValueError(f"Unexpected stop_reason: {stop_reason}")
457
+
458
+
459
+ def strip_xml_tags(string: str, tag: Optional[str]) -> str:
460
+ if tag is None:
461
+ return string
462
+ # Construct the regular expression pattern to find the start and end tags
463
+ tag_pattern = f"<{tag}.*?>|</{tag}>"
464
+ # Use the regular expression to replace the tags with an empty string
465
+ return re.sub(tag_pattern, "", string)
466
+
467
+
468
+ def strip_xml_tags_streaming(string: str, tag: Optional[str]) -> str:
469
+ if tag is None:
470
+ return string
471
+
472
+ # Handle common partial tag cases
473
+ parts_to_remove = [
474
+ "<", # Leftover start bracket
475
+ f"<{tag}", # Opening tag start
476
+ f"</{tag}", # Closing tag start
477
+ f"/{tag}>", # Closing tag end
478
+ f"{tag}>", # Opening tag end
479
+ f"/{tag}", # Partial closing tag without >
480
+ ">", # Leftover end bracket
481
+ ]
482
+
483
+ result = string
484
+ for part in parts_to_remove:
485
+ result = result.replace(part, "")
486
+
487
+ return result
@@ -15,7 +15,6 @@ from letta.llm_api.anthropic import (
15
15
  from letta.llm_api.aws_bedrock import has_valid_aws_credentials
16
16
  from letta.llm_api.azure_openai import azure_openai_chat_completions_request
17
17
  from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
18
- from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
19
18
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
20
19
  from letta.llm_api.openai import (
21
20
  build_openai_chat_completions_request,
@@ -27,7 +26,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG
27
26
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
27
  from letta.schemas.llm_config import LLMConfig
29
28
  from letta.schemas.message import Message
30
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool, cast_message_to_subtype
29
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
31
30
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
32
31
  from letta.settings import ModelSettings
33
32
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
@@ -314,65 +313,17 @@ def create(
314
313
 
315
314
  return response
316
315
 
317
- elif llm_config.model_endpoint_type == "google_ai":
318
- if stream:
319
- raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
320
- if not use_tool_naming:
321
- raise NotImplementedError("Only tool calling supported on Google AI API requests")
322
-
323
- if functions is not None:
324
- tools = [{"type": "function", "function": f} for f in functions]
325
- tools = [Tool(**t) for t in tools]
326
- tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
327
- else:
328
- tools = None
329
-
330
- return google_ai_chat_completions_request(
331
- base_url=llm_config.model_endpoint,
332
- model=llm_config.model,
333
- api_key=model_settings.gemini_api_key,
334
- # see structure of payload here: https://ai.google.dev/docs/function_calling
335
- data=dict(
336
- contents=[m.to_google_ai_dict() for m in messages],
337
- tools=tools,
338
- generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens},
339
- ),
340
- inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
341
- )
342
-
343
- elif llm_config.model_endpoint_type == "google_vertex":
344
- from letta.llm_api.google_vertex import google_vertex_chat_completions_request
345
-
346
- if stream:
347
- raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
348
- if not use_tool_naming:
349
- raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
350
-
351
- if functions is not None:
352
- tools = [{"type": "function", "function": f} for f in functions]
353
- tools = [Tool(**t) for t in tools]
354
- tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
355
- else:
356
- tools = None
357
-
358
- config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
359
-
360
- return google_vertex_chat_completions_request(
361
- model=llm_config.model,
362
- project_id=model_settings.google_cloud_project,
363
- region=model_settings.google_cloud_location,
364
- contents=[m.to_google_ai_dict() for m in messages],
365
- config=config,
366
- inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
367
- )
368
-
369
316
  elif llm_config.model_endpoint_type == "anthropic":
370
317
  if not use_tool_naming:
371
318
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
372
319
 
373
320
  # Force tool calling
374
321
  tool_call = None
375
- if force_tool_call is not None:
322
+ if functions is None:
323
+ # Special case for summarization path
324
+ tools = None
325
+ tool_choice = None
326
+ elif force_tool_call is not None:
376
327
  # tool_call = {"type": "function", "function": {"name": force_tool_call}}
377
328
  tool_choice = {"type": "tool", "name": force_tool_call}
378
329
  tools = [{"type": "function", "function": f} for f in functions if f["name"] == force_tool_call]
@@ -44,5 +44,11 @@ class LLMClient:
44
44
  return GoogleVertexClient(
45
45
  agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
46
46
  )
47
+ case "anthropic":
48
+ from letta.llm_api.anthropic_client import AnthropicClient
49
+
50
+ return AnthropicClient(
51
+ agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
52
+ )
47
53
  case _:
48
54
  return None
@@ -29,6 +29,7 @@ class LLMClientBase:
29
29
  self.llm_config = llm_config
30
30
  self.put_inner_thoughts_first = put_inner_thoughts_first
31
31
  self.actor_id = actor_id
32
+ self.use_tool_naming = use_tool_naming
32
33
 
33
34
  def send_llm_request(
34
35
  self,
@@ -82,6 +83,7 @@ class LLMClientBase:
82
83
  messages: List[Message],
83
84
  tools: List[dict],
84
85
  tool_call: Optional[str],
86
+ force_tool_call: Optional[str] = None,
85
87
  ) -> dict:
86
88
  """
87
89
  Constructs a request object in the expected data format for this client.
letta/schemas/agent.py CHANGED
@@ -244,6 +244,14 @@ class UpdateAgent(BaseModel):
244
244
  None,
245
245
  description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
246
246
  )
247
+ model: Optional[str] = Field(
248
+ None,
249
+ description="The LLM configuration handle used by the agent, specified in the format "
250
+ "provider/model-name, as an alternative to specifying llm_config.",
251
+ )
252
+ embedding: Optional[str] = Field(
253
+ None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
254
+ )
247
255
 
248
256
  class Config:
249
257
  extra = "ignore" # Ignores extra fields