letta-nightly 0.6.23.dev20250211104055__py3-none-any.whl → 0.6.24.dev20250212104045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

letta/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
- __version__ = "0.6.23"
2
-
1
+ __version__ = "0.6.24"
3
2
 
4
3
  # import clients
5
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -260,6 +260,7 @@ class Agent(BaseAgent):
260
260
  error_msg: str,
261
261
  tool_call_id: str,
262
262
  function_name: str,
263
+ function_args: dict,
263
264
  function_response: str,
264
265
  messages: List[Message],
265
266
  include_function_failed_message: bool = False,
@@ -394,6 +395,7 @@ class Agent(BaseAgent):
394
395
 
395
396
  messages = [] # append these to the history when done
396
397
  function_name = None
398
+ function_args = {}
397
399
 
398
400
  # Step 2: check if LLM wanted to call a function
399
401
  if response_message.function_call or (response_message.tool_calls is not None and len(response_message.tool_calls) > 0):
@@ -445,8 +447,6 @@ class Agent(BaseAgent):
445
447
  function_call = (
446
448
  response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
447
449
  )
448
-
449
- # Get the name of the function
450
450
  function_name = function_call.name
451
451
  self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
452
452
 
@@ -459,7 +459,9 @@ class Agent(BaseAgent):
459
459
  if not target_letta_tool:
460
460
  error_msg = f"No function named {function_name}"
461
461
  function_response = "None" # more like "never ran?"
462
- messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_response, messages)
462
+ messages = self._handle_function_error_response(
463
+ error_msg, tool_call_id, function_name, function_args, function_response, messages
464
+ )
463
465
  return messages, False, True # force a heartbeat to allow agent to handle error
464
466
 
465
467
  # Failure case 2: function name is OK, but function args are bad JSON
@@ -469,7 +471,9 @@ class Agent(BaseAgent):
469
471
  except Exception:
470
472
  error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
471
473
  function_response = "None" # more like "never ran?"
472
- messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_response, messages)
474
+ messages = self._handle_function_error_response(
475
+ error_msg, tool_call_id, function_name, function_args, function_response, messages
476
+ )
473
477
  return messages, False, True # force a heartbeat to allow agent to handle error
474
478
 
475
479
  # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
@@ -506,7 +510,7 @@ class Agent(BaseAgent):
506
510
 
507
511
  if sandbox_run_result and sandbox_run_result.status == "error":
508
512
  messages = self._handle_function_error_response(
509
- function_response, tool_call_id, function_name, function_response, messages
513
+ function_response, tool_call_id, function_name, function_args, function_response, messages
510
514
  )
511
515
  return messages, False, True # force a heartbeat to allow agent to handle error
512
516
 
@@ -535,7 +539,7 @@ class Agent(BaseAgent):
535
539
  error_msg_user = f"{error_msg}\n{traceback.format_exc()}"
536
540
  self.logger.error(error_msg_user)
537
541
  messages = self._handle_function_error_response(
538
- error_msg, tool_call_id, function_name, function_response, messages, include_function_failed_message=True
542
+ error_msg, tool_call_id, function_name, function_args, function_response, messages, include_function_failed_message=True
539
543
  )
540
544
  return messages, False, True # force a heartbeat to allow agent to handle error
541
545
 
@@ -543,7 +547,7 @@ class Agent(BaseAgent):
543
547
  if function_response_string.startswith(ERROR_MESSAGE_PREFIX):
544
548
  error_msg = function_response_string
545
549
  messages = self._handle_function_error_response(
546
- error_msg, tool_call_id, function_name, function_response, messages, include_function_failed_message=True
550
+ error_msg, tool_call_id, function_name, function_args, function_response, messages, include_function_failed_message=True
547
551
  )
548
552
  return messages, False, True # force a heartbeat to allow agent to handle error
549
553
 
letta/cli/cli.py CHANGED
@@ -15,7 +15,6 @@ from letta.local_llm.constants import ASSISTANT_MESSAGE_CLI_SYMBOL
15
15
  from letta.log import get_logger
16
16
  from letta.schemas.enums import OptionState
17
17
  from letta.schemas.memory import ChatMemory, Memory
18
- from letta.server.server import logger as server_logger
19
18
 
20
19
  # from letta.interface import CLIInterface as interface # for printing to terminal
21
20
  from letta.streaming_interface import StreamingRefreshCLIInterface as interface # for printing to terminal
@@ -119,6 +118,8 @@ def run(
119
118
  utils.DEBUG = debug
120
119
  # TODO: add logging command line options for runtime log level
121
120
 
121
+ from letta.server.server import logger as server_logger
122
+
122
123
  if debug:
123
124
  logger.setLevel(logging.DEBUG)
124
125
  server_logger.setLevel(logging.DEBUG)
@@ -360,4 +361,4 @@ def delete_agent(
360
361
  def version() -> str:
361
362
  import letta
362
363
 
363
- return letta.__version__
364
+ print(letta.__version__)
letta/client/streaming.py CHANGED
@@ -17,48 +17,45 @@ logger = get_logger(__name__)
17
17
 
18
18
 
19
19
  def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
20
-
21
- with httpx.Client() as client:
20
+ """
21
+ Sends an SSE POST request and yields parsed response chunks.
22
+ """
23
+ # TODO: Please note his is a very generous timeout for e2b reasons
24
+ with httpx.Client(timeout=httpx.Timeout(5 * 60.0, read=5 * 60.0)) as client:
22
25
  with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
23
26
 
24
- # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
27
+ # Check for immediate HTTP errors before processing the SSE stream
25
28
  if not event_source.response.is_success:
26
- # handle errors
27
- pass
28
-
29
- logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
30
- logger.warning(event_source.response.read().decode("utf-8"))
29
+ response_bytes = event_source.response.read()
30
+ logger.warning(f"SSE request error: {vars(event_source.response)}")
31
+ logger.warning(response_bytes.decode("utf-8"))
31
32
 
32
33
  try:
33
- response_bytes = event_source.response.read()
34
34
  response_dict = json.loads(response_bytes.decode("utf-8"))
35
- # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
36
- if (
37
- "error" in response_dict
38
- and "message" in response_dict["error"]
39
- and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
40
- ):
41
- logger.error(response_dict["error"]["message"])
42
- raise LLMError(response_dict["error"]["message"])
35
+ error_message = response_dict.get("error", {}).get("message", "")
36
+
37
+ if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
38
+ logger.error(error_message)
39
+ raise LLMError(error_message)
43
40
  except LLMError:
44
41
  raise
45
- except:
46
- logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
42
+ except Exception:
43
+ logger.error("Failed to parse SSE message, raising HTTP error")
47
44
  event_source.response.raise_for_status()
48
45
 
49
46
  try:
50
47
  for sse in event_source.iter_sse():
51
- # if sse.data == OPENAI_SSE_DONE:
52
- # print("finished")
53
- # break
54
- if sse.data in [status.value for status in MessageStreamStatus]:
55
- # break
48
+ if sse.data in {status.value for status in MessageStreamStatus}:
56
49
  yield MessageStreamStatus(sse.data)
50
+ if sse.data == MessageStreamStatus.done.value:
51
+ # We received the [DONE], so stop reading the stream.
52
+ break
57
53
  else:
58
54
  chunk_data = json.loads(sse.data)
55
+
59
56
  if "reasoning" in chunk_data:
60
57
  yield ReasoningMessage(**chunk_data)
61
- elif "message_type" in chunk_data and chunk_data["message_type"] == "assistant_message":
58
+ elif chunk_data.get("message_type") == "assistant_message":
62
59
  yield AssistantMessage(**chunk_data)
63
60
  elif "tool_call" in chunk_data:
64
61
  yield ToolCallMessage(**chunk_data)
@@ -67,33 +64,31 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStrea
67
64
  elif "step_count" in chunk_data:
68
65
  yield LettaUsageStatistics(**chunk_data)
69
66
  elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
70
- yield ChatCompletionChunk(**chunk_data) # Add your processing logic for chat chunks here
67
+ yield ChatCompletionChunk(**chunk_data)
71
68
  else:
72
69
  raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")
73
70
 
74
71
  except SSEError as e:
75
- logger.error("Caught an error while iterating the SSE stream:", str(e))
76
- if "application/json" in str(e): # Check if the error is because of JSON response
77
- # TODO figure out a better way to catch the error other than re-trying with a POST
78
- response = client.post(url=url, json=data, headers=headers) # Make the request again to get the JSON response
79
- if response.headers["Content-Type"].startswith("application/json"):
80
- error_details = response.json() # Parse the JSON to get the error message
81
- logger.error("Request:", vars(response.request))
82
- logger.error("POST Error:", error_details)
83
- logger.error("Original SSE Error:", str(e))
72
+ logger.error(f"SSE stream error: {e}")
73
+
74
+ if "application/json" in str(e):
75
+ response = client.post(url=url, json=data, headers=headers)
76
+
77
+ if response.headers.get("Content-Type", "").startswith("application/json"):
78
+ error_details = response.json()
79
+ logger.error(f"POST Error: {error_details}")
84
80
  else:
85
81
  logger.error("Failed to retrieve JSON error message via retry.")
86
- else:
87
- logger.error("SSEError not related to 'application/json' content type.")
88
82
 
89
- # Optionally re-raise the exception if you need to propagate it
90
83
  raise e
91
84
 
92
85
  except Exception as e:
93
- if event_source.response.request is not None:
94
- logger.error("HTTP Request:", vars(event_source.response.request))
95
- if event_source.response is not None:
96
- logger.error("HTTP Status:", event_source.response.status_code)
97
- logger.error("HTTP Headers:", event_source.response.headers)
98
- logger.error("Exception message:", str(e))
86
+ logger.error(f"Unexpected exception: {e}")
87
+
88
+ if event_source.response.request:
89
+ logger.error(f"HTTP Request: {vars(event_source.response.request)}")
90
+ if event_source.response:
91
+ logger.error(f"HTTP Status: {event_source.response.status_code}")
92
+ logger.error(f"HTTP Headers: {event_source.response.headers}")
93
+
99
94
  raise e
letta/constants.py CHANGED
@@ -51,9 +51,6 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
51
51
  BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
52
52
  # Multi agent tools
53
53
  MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
54
- MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
55
- MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60
56
- MULTI_AGENT_CONCURRENT_SENDS = 15
57
54
 
58
55
  # The name of the tool used to send message to the user
59
56
  # May not be relevant in cases where the agent has multiple ways to message to user (send_imessage, send_discord_mesasge, ...)
letta/embeddings.py CHANGED
@@ -167,6 +167,27 @@ class OllamaEmbeddings:
167
167
  return response_json["embedding"]
168
168
 
169
169
 
170
+ class GoogleEmbeddings:
171
+ def __init__(self, api_key: str, model: str, base_url: str):
172
+ self.api_key = api_key
173
+ self.model = model
174
+ self.base_url = base_url # Expected to be "https://generativelanguage.googleapis.com"
175
+
176
+ def get_text_embedding(self, text: str):
177
+ import httpx
178
+
179
+ headers = {"Content-Type": "application/json"}
180
+ # Build the URL based on the provided base_url, model, and API key.
181
+ url = f"{self.base_url}/v1beta/models/{self.model}:embedContent?key={self.api_key}"
182
+ payload = {"model": self.model, "content": {"parts": [{"text": text}]}}
183
+ with httpx.Client() as client:
184
+ response = client.post(url, headers=headers, json=payload)
185
+ # Raise an error for non-success HTTP status codes.
186
+ response.raise_for_status()
187
+ response_json = response.json()
188
+ return response_json["embedding"]["values"]
189
+
190
+
170
191
  def query_embedding(embedding_model, query_text: str):
171
192
  """Generate padded embedding for querying database"""
172
193
  query_vec = embedding_model.get_text_embedding(query_text)
@@ -237,5 +258,14 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
237
258
  )
238
259
  return model
239
260
 
261
+ elif endpoint_type == "google_ai":
262
+ assert all([model_settings.gemini_api_key is not None, model_settings.gemini_base_url is not None])
263
+ model = GoogleEmbeddings(
264
+ model=config.embedding_model,
265
+ api_key=model_settings.gemini_api_key,
266
+ base_url=model_settings.gemini_base_url,
267
+ )
268
+ return model
269
+
240
270
  else:
241
271
  raise ValueError(f"Unknown endpoint type {endpoint_type}")
@@ -19,6 +19,8 @@ from anthropic.types.beta import (
19
19
 
20
20
  from letta.errors import BedrockError, BedrockPermissionError
21
21
  from letta.llm_api.aws_bedrock import get_bedrock_client
22
+ from letta.llm_api.helpers import add_inner_thoughts_to_functions
23
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
22
24
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
23
25
  from letta.schemas.message import Message as _Message
24
26
  from letta.schemas.message import MessageRole as _MessageRole
@@ -513,9 +515,23 @@ def convert_anthropic_stream_event_to_chatcompletion(
513
515
  def _prepare_anthropic_request(
514
516
  data: ChatCompletionRequest,
515
517
  inner_thoughts_xml_tag: Optional[str] = "thinking",
518
+ # if true, prefix fill the generation with the thinking tag
519
+ prefix_fill: bool = True,
520
+ # if true, put COT inside the tool calls instead of inside the content
521
+ put_inner_thoughts_in_kwargs: bool = False,
516
522
  ) -> dict:
517
523
  """Prepare the request data for Anthropic API format."""
518
- # convert the tools
524
+
525
+ # if needed, put inner thoughts as a kwarg for all tools
526
+ if data.tools and put_inner_thoughts_in_kwargs:
527
+ functions = add_inner_thoughts_to_functions(
528
+ functions=[t.function.model_dump() for t in data.tools],
529
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
530
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
531
+ )
532
+ data.tools = [Tool(function=f) for f in functions]
533
+
534
+ # convert the tools to Anthropic's payload format
519
535
  anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
520
536
 
521
537
  # pydantic -> dict
@@ -529,11 +545,25 @@ def _prepare_anthropic_request(
529
545
  data.pop("tools")
530
546
  data.pop("tool_choice", None)
531
547
  elif anthropic_tools is not None:
548
+ # TODO eventually enable parallel tool use
532
549
  data["tools"] = anthropic_tools
533
- if len(anthropic_tools) == 1:
550
+
551
+ # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
552
+ if put_inner_thoughts_in_kwargs:
553
+ if len(anthropic_tools) == 1:
554
+ data["tool_choice"] = {
555
+ "type": "tool",
556
+ "name": anthropic_tools[0]["name"],
557
+ "disable_parallel_tool_use": True,
558
+ }
559
+ else:
560
+ data["tool_choice"] = {
561
+ "type": "any",
562
+ "disable_parallel_tool_use": True,
563
+ }
564
+ else:
534
565
  data["tool_choice"] = {
535
- "type": "tool",
536
- "name": anthropic_tools[0]["name"],
566
+ "type": "auto",
537
567
  "disable_parallel_tool_use": True,
538
568
  }
539
569
 
@@ -548,8 +578,21 @@ def _prepare_anthropic_request(
548
578
  message["content"] = None
549
579
 
550
580
  # Convert to Anthropic format
551
- msg_objs = [_Message.dict_to_message(user_id=None, agent_id=None, openai_message_dict=m) for m in data["messages"]]
552
- data["messages"] = [m.to_anthropic_dict(inner_thoughts_xml_tag=inner_thoughts_xml_tag) for m in msg_objs]
581
+ msg_objs = [
582
+ _Message.dict_to_message(
583
+ user_id=None,
584
+ agent_id=None,
585
+ openai_message_dict=m,
586
+ )
587
+ for m in data["messages"]
588
+ ]
589
+ data["messages"] = [
590
+ m.to_anthropic_dict(
591
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
592
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
593
+ )
594
+ for m in msg_objs
595
+ ]
553
596
 
554
597
  # Ensure first message is user
555
598
  if data["messages"][0]["role"] != "user":
@@ -558,6 +601,16 @@ def _prepare_anthropic_request(
558
601
  # Handle alternating messages
559
602
  data["messages"] = merge_tool_results_into_user_messages(data["messages"])
560
603
 
604
+ # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
605
+ # https://docs.anthropic.com/en/api/messages#body-messages
606
+ # NOTE: cannot prefill with tools for opus:
607
+ # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
608
+ if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
609
+ data["messages"].append(
610
+ # Start the thinking process for the assistant
611
+ {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
612
+ )
613
+
561
614
  # Validate max_tokens
562
615
  assert "max_tokens" in data, data
563
616
 
@@ -571,6 +624,7 @@ def _prepare_anthropic_request(
571
624
  def anthropic_chat_completions_request(
572
625
  data: ChatCompletionRequest,
573
626
  inner_thoughts_xml_tag: Optional[str] = "thinking",
627
+ put_inner_thoughts_in_kwargs: bool = False,
574
628
  betas: List[str] = ["tools-2024-04-04"],
575
629
  ) -> ChatCompletionResponse:
576
630
  """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -580,7 +634,11 @@ def anthropic_chat_completions_request(
580
634
  anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
581
635
  elif model_settings.anthropic_api_key:
582
636
  anthropic_client = anthropic.Anthropic()
583
- data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
637
+ data = _prepare_anthropic_request(
638
+ data=data,
639
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
640
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
641
+ )
584
642
  response = anthropic_client.beta.messages.create(
585
643
  **data,
586
644
  betas=betas,
@@ -611,6 +669,7 @@ def anthropic_bedrock_chat_completions_request(
611
669
  def anthropic_chat_completions_request_stream(
612
670
  data: ChatCompletionRequest,
613
671
  inner_thoughts_xml_tag: Optional[str] = "thinking",
672
+ put_inner_thoughts_in_kwargs: bool = False,
614
673
  betas: List[str] = ["tools-2024-04-04"],
615
674
  ) -> Generator[ChatCompletionChunkResponse, None, None]:
616
675
  """Stream chat completions from Anthropic API.
@@ -618,7 +677,11 @@ def anthropic_chat_completions_request_stream(
618
677
  Similar to OpenAI's streaming, but using Anthropic's native streaming support.
619
678
  See: https://docs.anthropic.com/claude/reference/messages-streaming
620
679
  """
621
- data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
680
+ data = _prepare_anthropic_request(
681
+ data=data,
682
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
683
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
684
+ )
622
685
 
623
686
  anthropic_override_key = ProviderManager().get_anthropic_override_key()
624
687
  if anthropic_override_key:
@@ -666,6 +729,7 @@ def anthropic_chat_completions_process_stream(
666
729
  chat_completion_request: ChatCompletionRequest,
667
730
  stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
668
731
  inner_thoughts_xml_tag: Optional[str] = "thinking",
732
+ put_inner_thoughts_in_kwargs: bool = False,
669
733
  create_message_id: bool = True,
670
734
  create_message_datetime: bool = True,
671
735
  betas: List[str] = ["tools-2024-04-04"],
@@ -743,6 +807,7 @@ def anthropic_chat_completions_process_stream(
743
807
  anthropic_chat_completions_request_stream(
744
808
  data=chat_completion_request,
745
809
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
810
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
746
811
  betas=betas,
747
812
  )
748
813
  ):
@@ -111,7 +111,6 @@ def create(
111
111
  # streaming?
112
112
  stream: bool = False,
113
113
  stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
114
- max_tokens: Optional[int] = None,
115
114
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
116
115
  ) -> ChatCompletionResponse:
117
116
  """Return response to chat completion with backoff"""
@@ -157,7 +156,7 @@ def create(
157
156
  else:
158
157
  function_call = "required"
159
158
 
160
- data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
159
+ data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming)
161
160
  if stream: # Client requested token streaming
162
161
  data.stream = True
163
162
  assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -212,7 +211,7 @@ def create(
212
211
  # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
213
212
  llm_config.model_endpoint = model_settings.azure_base_url
214
213
  chat_completion_request = build_openai_chat_completions_request(
215
- llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
214
+ llm_config, messages, user_id, functions, function_call, use_tool_naming
216
215
  )
217
216
 
218
217
  response = azure_openai_chat_completions_request(
@@ -248,7 +247,7 @@ def create(
248
247
  data=dict(
249
248
  contents=[m.to_google_ai_dict() for m in messages],
250
249
  tools=tools,
251
- generation_config={"temperature": llm_config.temperature},
250
+ generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens},
252
251
  ),
253
252
  inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
254
253
  )
@@ -268,7 +267,7 @@ def create(
268
267
  messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
269
268
  tools=([{"type": "function", "function": f} for f in functions] if functions else None),
270
269
  tool_choice=tool_call,
271
- max_tokens=1024, # TODO make dynamic
270
+ max_tokens=llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
272
271
  temperature=llm_config.temperature,
273
272
  stream=stream,
274
273
  )
@@ -279,14 +278,21 @@ def create(
279
278
 
280
279
  response = anthropic_chat_completions_process_stream(
281
280
  chat_completion_request=chat_completion_request,
281
+ put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
282
282
  stream_interface=stream_interface,
283
283
  )
284
- return response
285
284
 
286
- # Client did not request token streaming (expect a blocking backend response)
287
- return anthropic_chat_completions_request(
288
- data=chat_completion_request,
289
- )
285
+ else:
286
+ # Client did not request token streaming (expect a blocking backend response)
287
+ response = anthropic_chat_completions_request(
288
+ data=chat_completion_request,
289
+ put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
290
+ )
291
+
292
+ if llm_config.put_inner_thoughts_in_kwargs:
293
+ response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
294
+
295
+ return response
290
296
 
291
297
  # elif llm_config.model_endpoint_type == "cohere":
292
298
  # if stream:
@@ -416,7 +422,7 @@ def create(
416
422
  tool_choice=tool_call,
417
423
  # user=str(user_id),
418
424
  # NOTE: max_tokens is required for Anthropic API
419
- max_tokens=1024, # TODO make dynamic
425
+ max_tokens=llm_config.max_tokens,
420
426
  ),
421
427
  )
422
428
 
letta/llm_api/openai.py CHANGED
@@ -7,6 +7,7 @@ from openai import OpenAI
7
7
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
8
8
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
9
9
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
10
+ from letta.log import get_logger
10
11
  from letta.schemas.llm_config import LLMConfig
11
12
  from letta.schemas.message import Message as _Message
12
13
  from letta.schemas.message import MessageRole as _MessageRole
@@ -26,7 +27,7 @@ from letta.schemas.openai.embedding_response import EmbeddingResponse
26
27
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
27
28
  from letta.utils import get_tool_call_id, smart_urljoin
28
29
 
29
- OPENAI_SSE_DONE = "[DONE]"
30
+ logger = get_logger(__name__)
30
31
 
31
32
 
32
33
  def openai_get_model_list(
@@ -93,7 +94,6 @@ def build_openai_chat_completions_request(
93
94
  functions: Optional[list],
94
95
  function_call: Optional[str],
95
96
  use_tool_naming: bool,
96
- max_tokens: Optional[int],
97
97
  ) -> ChatCompletionRequest:
98
98
  if functions and llm_config.put_inner_thoughts_in_kwargs:
99
99
  # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -130,7 +130,7 @@ def build_openai_chat_completions_request(
130
130
  tools=[Tool(type="function", function=f) for f in functions] if functions else None,
131
131
  tool_choice=tool_choice,
132
132
  user=str(user_id),
133
- max_completion_tokens=max_tokens,
133
+ max_completion_tokens=llm_config.max_tokens,
134
134
  temperature=llm_config.temperature,
135
135
  )
136
136
  else:
@@ -140,7 +140,7 @@ def build_openai_chat_completions_request(
140
140
  functions=functions,
141
141
  function_call=function_call,
142
142
  user=str(user_id),
143
- max_completion_tokens=max_tokens,
143
+ max_completion_tokens=llm_config.max_tokens,
144
144
  temperature=llm_config.temperature,
145
145
  )
146
146
  # https://platform.openai.com/docs/guides/text-generation/json-mode
@@ -354,9 +354,10 @@ def openai_chat_completions_process_stream(
354
354
  except Exception as e:
355
355
  if stream_interface:
356
356
  stream_interface.stream_end()
357
- print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
357
+ logger.error(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
358
358
  raise e
359
359
  finally:
360
+ logger.info(f"Finally ending streaming interface.")
360
361
  if stream_interface:
361
362
  stream_interface.stream_end()
362
363
 
@@ -15,6 +15,7 @@ class LLMConfig(BaseModel):
15
15
  context_window (int): The context window size for the model.
16
16
  put_inner_thoughts_in_kwargs (bool): Puts `inner_thoughts` as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.
17
17
  temperature (float): The temperature to use when generating text with the model. A higher temperature will result in more random text.
18
+ max_tokens (int): The maximum number of tokens to generate.
18
19
  """
19
20
 
20
21
  # TODO: 🤮 don't default to a vendor! bug city!
@@ -51,6 +52,10 @@ class LLMConfig(BaseModel):
51
52
  0.7,
52
53
  description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
53
54
  )
55
+ max_tokens: Optional[int] = Field(
56
+ 1024,
57
+ description="The maximum number of tokens to generate. If not set, the model will use its default value.",
58
+ )
54
59
 
55
60
  # FIXME hack to silence pydantic protected namespace warning
56
61
  model_config = ConfigDict(protected_namespaces=())
letta/schemas/message.py CHANGED
@@ -542,7 +542,11 @@ class Message(BaseMessage):
542
542
 
543
543
  return openai_message
544
544
 
545
- def to_anthropic_dict(self, inner_thoughts_xml_tag="thinking") -> dict:
545
+ def to_anthropic_dict(
546
+ self,
547
+ inner_thoughts_xml_tag="thinking",
548
+ put_inner_thoughts_in_kwargs: bool = False,
549
+ ) -> dict:
546
550
  """
547
551
  Convert to an Anthropic message dictionary
548
552
 
@@ -586,26 +590,38 @@ class Message(BaseMessage):
586
590
  "role": self.role,
587
591
  }
588
592
  content = []
589
- if self.text is not None:
593
+ # COT / reasoning / thinking
594
+ if self.text is not None and not put_inner_thoughts_in_kwargs:
590
595
  content.append(
591
596
  {
592
597
  "type": "text",
593
598
  "text": add_xml_tag(string=self.text, xml_tag=inner_thoughts_xml_tag),
594
599
  }
595
600
  )
601
+ # Tool calling
596
602
  if self.tool_calls is not None:
597
603
  for tool_call in self.tool_calls:
604
+
605
+ if put_inner_thoughts_in_kwargs:
606
+ tool_call_input = add_inner_thoughts_to_tool_call(
607
+ tool_call,
608
+ inner_thoughts=self.text,
609
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
610
+ ).model_dump()
611
+ else:
612
+ tool_call_input = json.loads(tool_call.function.arguments)
613
+
598
614
  content.append(
599
615
  {
600
616
  "type": "tool_use",
601
617
  "id": tool_call.id,
602
618
  "name": tool_call.function.name,
603
- "input": json.loads(tool_call.function.arguments),
619
+ "input": tool_call_input,
604
620
  }
605
621
  )
606
622
 
607
623
  # If the only content was text, unpack it back into a singleton
608
- # TODO
624
+ # TODO support multi-modal
609
625
  anthropic_message["content"] = content
610
626
 
611
627
  # Optional fields, do not include if null
@@ -347,6 +347,15 @@ class AnthropicProvider(Provider):
347
347
 
348
348
  configs = []
349
349
  for model in models:
350
+
351
+ # We set this to false by default, because Anthropic can
352
+ # natively support <thinking> tags inside of content fields
353
+ # However, putting COT inside of tool calls can make it more
354
+ # reliable for tool calling (no chance of a non-tool call step)
355
+ # Since tool_choice_type 'any' doesn't work with in-content COT
356
+ # NOTE For Haiku, it can be flaky if we don't enable this by default
357
+ inner_thoughts_in_kwargs = True if "haiku" in model["name"] else False
358
+
350
359
  configs.append(
351
360
  LLMConfig(
352
361
  model=model["name"],
@@ -354,6 +363,7 @@ class AnthropicProvider(Provider):
354
363
  model_endpoint=self.base_url,
355
364
  context_window=model["context_window"],
356
365
  handle=self.get_handle(model["name"]),
366
+ put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
357
367
  )
358
368
  )
359
369
  return configs
@@ -41,7 +41,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
41
41
  def __init__(
42
42
  self,
43
43
  multi_step: bool = True,
44
- timeout: int = 150,
44
+ timeout: int = 3 * 60,
45
45
  # The following are placeholders for potential expansions; they
46
46
  # remain if you need to differentiate between actual "assistant messages"
47
47
  # vs. tool calls. By default, they are set for the "send_message" tool usage.
@@ -55,6 +55,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
55
55
  # Parsing state for incremental function-call data
56
56
  self.current_function_name = ""
57
57
  self.current_function_arguments = []
58
+ self.current_json_parse_result = {}
58
59
 
59
60
  # Internal chunk buffer and event for async notification
60
61
  self._chunks = deque()
@@ -85,6 +86,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
85
86
  try:
86
87
  await asyncio.wait_for(self._event.wait(), timeout=self.timeout)
87
88
  except asyncio.TimeoutError:
89
+ logger.warning("Chat completions interface timed out! Please check that this is intended.")
88
90
  break
89
91
 
90
92
  while self._chunks:
@@ -105,7 +107,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
105
107
  self,
106
108
  item: ChatCompletionChunk,
107
109
  ):
108
- """
110
+ """m
109
111
  Add an item (a LettaMessage, status marker, or partial chunk)
110
112
  to the queue and signal waiting consumers.
111
113
  """
@@ -156,6 +158,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
156
158
  Called externally with a ChatCompletionChunkResponse. Transforms
157
159
  it if necessary, then enqueues partial messages for streaming back.
158
160
  """
161
+ # print("RECEIVED CHUNK...")
159
162
  processed_chunk = self._process_chunk_to_openai_style(chunk)
160
163
  if processed_chunk is not None:
161
164
  self._push_to_buffer(processed_chunk)
@@ -216,37 +219,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
216
219
  combined_args = "".join(self.current_function_arguments)
217
220
  parsed_args = OptimisticJSONParser().parse(combined_args)
218
221
 
219
- # If we can see a "message" field, return it as partial content
220
- if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
221
- return ChatCompletionChunk(
222
- id=chunk.id,
223
- object=chunk.object,
224
- created=chunk.created.timestamp(),
225
- model=chunk.model,
226
- choices=[
227
- Choice(
228
- index=choice.index,
229
- delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
230
- finish_reason=None,
231
- )
232
- ],
233
- )
222
+ # If the parsed result is different
223
+ # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
224
+ if parsed_args != self.current_json_parse_result:
225
+ self.current_json_parse_result = parsed_args
226
+ # If we can see a "message" field, return it as partial content
227
+ if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
228
+ return ChatCompletionChunk(
229
+ id=chunk.id,
230
+ object=chunk.object,
231
+ created=chunk.created.timestamp(),
232
+ model=chunk.model,
233
+ choices=[
234
+ Choice(
235
+ index=choice.index,
236
+ delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
237
+ finish_reason=None,
238
+ )
239
+ ],
240
+ )
234
241
 
235
242
  # If there's a finish reason, pass that along
236
243
  if choice.finish_reason is not None:
237
- return ChatCompletionChunk(
238
- id=chunk.id,
239
- object=chunk.object,
240
- created=chunk.created.timestamp(),
241
- model=chunk.model,
242
- choices=[
243
- Choice(
244
- index=choice.index,
245
- delta=ChoiceDelta(),
246
- finish_reason=self.FINISH_REASON_STR,
247
- )
248
- ],
249
- )
244
+ # only emit a final chunk if finish_reason == "stop"
245
+ if choice.finish_reason == "stop":
246
+ return ChatCompletionChunk(
247
+ id=chunk.id,
248
+ object=chunk.object,
249
+ created=chunk.created.timestamp(),
250
+ model=chunk.model,
251
+ choices=[
252
+ Choice(
253
+ index=choice.index,
254
+ delta=ChoiceDelta(), # no partial text here
255
+ finish_reason="stop",
256
+ )
257
+ ],
258
+ )
250
259
 
251
260
  return None
252
261
 
@@ -436,11 +436,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
436
436
 
437
437
  # inner thoughts
438
438
  if message_delta.content is not None:
439
- processed_chunk = ReasoningMessage(
440
- id=message_id,
441
- date=message_date,
442
- reasoning=message_delta.content,
443
- )
439
+ if message_delta.content == "":
440
+ print("skipping empty content")
441
+ processed_chunk = None
442
+ else:
443
+ processed_chunk = ReasoningMessage(
444
+ id=message_id,
445
+ date=message_date,
446
+ reasoning=message_delta.content,
447
+ )
444
448
 
445
449
  # tool calls
446
450
  elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
@@ -496,15 +500,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
496
500
  if tool_call.function.name:
497
501
  tool_call_delta["name"] = tool_call.function.name
498
502
 
499
- processed_chunk = ToolCallMessage(
500
- id=message_id,
501
- date=message_date,
502
- tool_call=ToolCallDelta(
503
- name=tool_call_delta.get("name"),
504
- arguments=tool_call_delta.get("arguments"),
505
- tool_call_id=tool_call_delta.get("id"),
506
- ),
507
- )
503
+ # We might end up with a no-op, in which case we should omit
504
+ if (
505
+ tool_call_delta.get("name") is None
506
+ and tool_call_delta.get("arguments") in [None, ""]
507
+ and tool_call_delta.get("id") is None
508
+ ):
509
+ processed_chunk = None
510
+ print("skipping empty chunk...")
511
+ else:
512
+ processed_chunk = ToolCallMessage(
513
+ id=message_id,
514
+ date=message_date,
515
+ tool_call=ToolCallDelta(
516
+ name=tool_call_delta.get("name"),
517
+ arguments=tool_call_delta.get("arguments"),
518
+ tool_call_id=tool_call_delta.get("id"),
519
+ ),
520
+ )
508
521
 
509
522
  elif self.inner_thoughts_in_kwargs and tool_call.function:
510
523
  processed_chunk = None
@@ -525,11 +538,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
525
538
  self.function_id_buffer += tool_call.id
526
539
 
527
540
  if tool_call.function.arguments:
528
- if chunk.model.startswith("claude-"):
529
- updates_main_json = tool_call.function.arguments
530
- updates_inner_thoughts = ""
531
- else: # OpenAI
532
- updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
541
+ # if chunk.model.startswith("claude-"):
542
+ # updates_main_json = tool_call.function.arguments
543
+ # updates_inner_thoughts = ""
544
+ # else: # OpenAI
545
+ # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
546
+ updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
533
547
 
534
548
  # If we have inner thoughts, we should output them as a chunk
535
549
  if updates_inner_thoughts:
@@ -787,15 +801,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
787
801
  if tool_call.function.name:
788
802
  tool_call_delta["name"] = tool_call.function.name
789
803
 
790
- processed_chunk = ToolCallMessage(
791
- id=message_id,
792
- date=message_date,
793
- tool_call=ToolCallDelta(
794
- name=tool_call_delta.get("name"),
795
- arguments=tool_call_delta.get("arguments"),
796
- tool_call_id=tool_call_delta.get("id"),
797
- ),
798
- )
804
+ # We might end up with a no-op, in which case we should omit
805
+ if (
806
+ tool_call_delta.get("name") is None
807
+ and tool_call_delta.get("arguments") in [None, ""]
808
+ and tool_call_delta.get("id") is None
809
+ ):
810
+ processed_chunk = None
811
+ print("skipping empty chunk...")
812
+ else:
813
+ processed_chunk = ToolCallMessage(
814
+ id=message_id,
815
+ date=message_date,
816
+ tool_call=ToolCallDelta(
817
+ name=tool_call_delta.get("name"),
818
+ arguments=tool_call_delta.get("arguments"),
819
+ tool_call_id=tool_call_delta.get("id"),
820
+ ),
821
+ )
799
822
 
800
823
  elif choice.finish_reason is not None:
801
824
  # skip if there's a finish
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING
2
2
 
3
3
  from fastapi import APIRouter
4
4
 
5
- from letta.cli.cli import version
5
+ from letta import __version__
6
6
  from letta.schemas.health import Health
7
7
 
8
8
  if TYPE_CHECKING:
@@ -15,6 +15,6 @@ router = APIRouter(prefix="/health", tags=["health"])
15
15
  @router.get("/", response_model=Health, operation_id="health_check")
16
16
  def health_check():
17
17
  return Health(
18
- version=version(),
18
+ version=__version__,
19
19
  status="ok",
20
20
  )
@@ -9,6 +9,7 @@ from fastapi import Header
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from letta.errors import ContextWindowExceededError, RateLimitExceededError
12
+ from letta.log import get_logger
12
13
  from letta.schemas.usage import LettaUsageStatistics
13
14
  from letta.server.rest_api.interface import StreamingServerInterface
14
15
 
@@ -24,10 +25,14 @@ SSE_FINISH_MSG = "[DONE]" # mimic openai
24
25
  SSE_ARTIFICIAL_DELAY = 0.1
25
26
 
26
27
 
28
+ logger = get_logger(__name__)
29
+
30
+
27
31
  def sse_formatter(data: Union[dict, str]) -> str:
28
32
  """Prefix with 'data: ', and always include double newlines"""
29
33
  assert type(data) in [dict, str], f"Expected type dict or str, got type {type(data)}"
30
34
  data_str = json.dumps(data, separators=(",", ":")) if isinstance(data, dict) else data
35
+ # print(f"data: {data_str}\n\n")
31
36
  return f"data: {data_str}\n\n"
32
37
 
33
38
 
@@ -62,23 +67,29 @@ async def sse_async_generator(
62
67
  usage = await usage_task
63
68
  # Double-check the type
64
69
  if not isinstance(usage, LettaUsageStatistics):
65
- raise ValueError(f"Expected LettaUsageStatistics, got {type(usage)}")
70
+ err_msg = f"Expected LettaUsageStatistics, got {type(usage)}"
71
+ logger.error(err_msg)
72
+ raise ValueError(err_msg)
66
73
  yield sse_formatter(usage.model_dump())
67
74
 
68
75
  except ContextWindowExceededError as e:
69
76
  log_error_to_sentry(e)
77
+ logger.error(f"ContextWindowExceededError error: {e}")
70
78
  yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})
71
79
 
72
80
  except RateLimitExceededError as e:
73
81
  log_error_to_sentry(e)
82
+ logger.error(f"RateLimitExceededError error: {e}")
74
83
  yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})
75
84
 
76
85
  except Exception as e:
77
86
  log_error_to_sentry(e)
78
- yield sse_formatter({"error": f"Stream failed (internal error occured)"})
87
+ logger.error(f"Caught unexpected Exception: {e}")
88
+ yield sse_formatter({"error": f"Stream failed (internal error occurred)"})
79
89
 
80
90
  except Exception as e:
81
91
  log_error_to_sentry(e)
92
+ logger.error(f"Caught unexpected Exception: {e}")
82
93
  yield sse_formatter({"error": "Stream failed (decoder encountered an error)"})
83
94
 
84
95
  finally:
@@ -477,39 +477,39 @@ class AgentManager:
477
477
  )
478
478
  message = self.message_manager.create_message(message, actor=actor)
479
479
  message_ids = [message.id] + agent_state.message_ids[1:] # swap index 0 (system)
480
- return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
480
+ return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
481
481
  else:
482
482
  return agent_state
483
483
 
484
484
  @enforce_types
485
- def set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
485
+ def _set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
486
486
  return self.update_agent(agent_id=agent_id, agent_update=UpdateAgent(message_ids=message_ids), actor=actor)
487
487
 
488
488
  @enforce_types
489
489
  def trim_older_in_context_messages(self, num: int, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
490
490
  message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
491
491
  new_messages = [message_ids[0]] + message_ids[num:] # 0 is system message
492
- return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
492
+ return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
493
493
 
494
494
  @enforce_types
495
495
  def trim_all_in_context_messages_except_system(self, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
496
496
  message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
497
497
  new_messages = [message_ids[0]] # 0 is system message
498
- return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
498
+ return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
499
499
 
500
500
  @enforce_types
501
501
  def prepend_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
502
502
  message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
503
503
  new_messages = self.message_manager.create_many_messages(messages, actor=actor)
504
504
  message_ids = [message_ids[0]] + [m.id for m in new_messages] + message_ids[1:]
505
- return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
505
+ return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
506
506
 
507
507
  @enforce_types
508
508
  def append_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
509
509
  messages = self.message_manager.create_many_messages(messages, actor=actor)
510
510
  message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids or []
511
511
  message_ids += [m.id for m in messages]
512
- return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
512
+ return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
513
513
 
514
514
  @enforce_types
515
515
  def reset_messages(self, agent_id: str, actor: PydanticUser, add_default_initial_messages: bool = False) -> PydanticAgentState:
letta/settings.py CHANGED
@@ -85,7 +85,7 @@ class ModelSettings(BaseSettings):
85
85
 
86
86
  # google ai
87
87
  gemini_api_key: Optional[str] = None
88
-
88
+ gemini_base_url: str = "https://generativelanguage.googleapis.com/"
89
89
  # together
90
90
  together_api_key: Optional[str] = None
91
91
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: letta-nightly
3
- Version: 0.6.23.dev20250211104055
3
+ Version: 0.6.24.dev20250212104045
4
4
  Summary: Create LLM agents with long-term memory and custom tools
5
5
  License: Apache License
6
6
  Author: Letta Team
@@ -34,6 +34,7 @@ Requires-Dist: docker (>=7.1.0,<8.0.0) ; extra == "external-tools" or extra == "
34
34
  Requires-Dist: docstring-parser (>=0.16,<0.17)
35
35
  Requires-Dist: docx2txt (>=0.8,<0.9)
36
36
  Requires-Dist: e2b-code-interpreter (>=1.0.3,<2.0.0) ; extra == "cloud-tool-sandbox"
37
+ Requires-Dist: faker (>=36.1.0,<37.0.0)
37
38
  Requires-Dist: fastapi (>=0.115.6,<0.116.0) ; extra == "server" or extra == "all"
38
39
  Requires-Dist: grpcio (>=1.68.1,<2.0.0)
39
40
  Requires-Dist: grpcio-tools (>=1.68.1,<2.0.0)
@@ -1,21 +1,21 @@
1
- letta/__init__.py,sha256=7Jj4qnODNSOECEFr6e6vMUw-7kqA-Y65s6SWaFBmr44,919
1
+ letta/__init__.py,sha256=uwg8T6rRtfO8a2xRXUlXfp0rIsNyRkmGFAGne7CYEXM,918
2
2
  letta/__main__.py,sha256=6Hs2PV7EYc5Tid4g4OtcLXhqVHiNYTGzSBdoOnW2HXA,29
3
- letta/agent.py,sha256=xOhzNF-DMxBzCkADyw1-OILsxfy2gMBkV0CoQ3XfW_I,56980
3
+ letta/agent.py,sha256=KHpTmZnyNu7VE9vjZ6cP6vG1RzTnFZN_FG3_RAwYeQY,57143
4
4
  letta/benchmark/benchmark.py,sha256=ebvnwfp3yezaXOQyGXkYCDYpsmre-b9hvNtnyx4xkG0,3701
5
5
  letta/benchmark/constants.py,sha256=aXc5gdpMGJT327VuxsT5FngbCK2J41PQYeICBO7g_RE,536
6
6
  letta/chat_only_agent.py,sha256=71Lf-df8y3nsE9IFKpEigaZaWHoWnXnhVChkp1L-83I,4760
7
- letta/cli/cli.py,sha256=_uGKM-RvGLGf7y8iWjkLgLTxIw7uWrdCdL5ETUOCkUs,16472
7
+ letta/cli/cli.py,sha256=zJz78-qDUz-depb7VQWkg87RBKiETQU4h9DI6ukQBa8,16477
8
8
  letta/cli/cli_config.py,sha256=2oo4vui1GXQarAD6Ru4SRzPvcW4eX2mCXOBusfYGvJw,8533
9
9
  letta/cli/cli_load.py,sha256=xFw-CuzjChcIptaqQ1XpDROENt0JSjyPeiQ0nmEeO1k,2706
10
10
  letta/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  letta/client/client.py,sha256=ZgJEt5F1yB6Q_z9Qi0FJ7Vmlb-YK41tymSKFB7NWy38,138311
12
- letta/client/streaming.py,sha256=DzE86XJTg_0j9eC45Hrpy9vPt-Wfo1F-sIv_B7iNV6I,5509
12
+ letta/client/streaming.py,sha256=lN9vamc07sfQlRbFif327GvURLUPhx-4AC_oUOPvs6w,4543
13
13
  letta/client/utils.py,sha256=VCGV-op5ZSmurd4yw7Vhf93XDQ0BkyBT8qsuV7EqfiU,2859
14
14
  letta/config.py,sha256=JFGY4TWW0Wm5fTbZamOwWqk5G8Nn-TXyhgByGoAqy2c,12375
15
- letta/constants.py,sha256=ZyPGoe68NfBCteTQI6hX9aFhszuBvy10xakb2FFKV9M,7276
15
+ letta/constants.py,sha256=1MG3QTRgKdcEI5vqoJUkxkiM2aDIQR4xd0BOh0mcCgw,7158
16
16
  letta/data_sources/connectors.py,sha256=R2AssXpqS7wN6VI8AfxvqaZs5S1ZACc4E_FewmR9iZI,7022
17
17
  letta/data_sources/connectors_helper.py,sha256=2TQjCt74fCgT5sw1AP8PalDEk06jPBbhrPG4HVr-WLs,3371
18
- letta/embeddings.py,sha256=VgqbUqYL6oTuLOKGOd_8swTRMYIpRTIWJbBthjT8eR8,8838
18
+ letta/embeddings.py,sha256=WwnIul-4po2jAgOPqZ36gAjhEBLa7hDcb3lNXpahBAw,10110
19
19
  letta/errors.py,sha256=6fQXg2unP-2fo3R7db0ayKKWlD2XMusOPNi9TgJplCg,5558
20
20
  letta/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  letta/functions/ast_parsers.py,sha256=MEFfGxpflUsw34JiY9zdunkpbczAYxte8t4rDPOmXfQ,3620
@@ -33,16 +33,16 @@ letta/humans/examples/basic.txt,sha256=Lcp8YESTWvOJgO4Yf_yyQmgo5bKakeB1nIVrwEGG6
33
33
  letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k1xw,297
34
34
  letta/interface.py,sha256=JszHyhIK34dpV0h5KL0CD1W4svh4eijaHGgfOYyZOhg,12755
35
35
  letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- letta/llm_api/anthropic.py,sha256=ZPPjpYZh8hleSjHkbRF27EEwSp-pg23JlSo-b1wbWBY,33602
36
+ letta/llm_api/anthropic.py,sha256=OcQ60GTXtz6-BcbaMyIFnLGvZSNZvA9Ixqjp3x_drNY,36235
37
37
  letta/llm_api/aws_bedrock.py,sha256=-ms9tdROu8DLrEZJ9XgL-IyIOU_0UJKuhfRbjLs0_Gc,3838
38
38
  letta/llm_api/azure_openai.py,sha256=Y1HKPog1XzM_f7ujUK_Gv2zQkoy5pU-1bKiUnvSxSrs,6297
39
39
  letta/llm_api/azure_openai_constants.py,sha256=_f7NKjKBPxGPFQPfP1e0umHk4Jmf56qNjyecI0PqWqU,267
40
40
  letta/llm_api/cohere.py,sha256=H5kzYH_aQAnGNq7lip7XyKGLEOKC318Iw0_tiTP6kc4,14772
41
41
  letta/llm_api/google_ai.py,sha256=MIX4nmyC6448AvyPPSE8JZ_tzSpKJTArkZSfQGGoy0M,17920
42
42
  letta/llm_api/helpers.py,sha256=ov9WHsLSvkceIpSNJ3PUgCvufD862Bcrum-bWrUVJko,16193
43
- letta/llm_api/llm_api_tools.py,sha256=UXm1t_DPyJVhBtzBGP8wv1LPTKyfsng31X0yfIAEusI,20292
43
+ letta/llm_api/llm_api_tools.py,sha256=rPqMHgKWMQ9sgQCWh48TavTcedhHg1rfAZlN5TUXlxk,20693
44
44
  letta/llm_api/mistral.py,sha256=fHdfD9ug-rQIk2qn8tRKay1U6w9maF11ryhKi91FfXM,1593
45
- letta/llm_api/openai.py,sha256=gE2RTYsyATYjicgE4VwATUAwTD38B74ZVqy8oVemzdQ,20277
45
+ letta/llm_api/openai.py,sha256=T69e4oveJw1IdzuONIaK4t1aRqXggTdfQ6n6eW0Uh8Q,20371
46
46
  letta/local_llm/README.md,sha256=hFJyw5B0TU2jrh9nb0zGZMgdH-Ei1dSRfhvPQG_NSoU,168
47
47
  letta/local_llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
48
  letta/local_llm/chat_completion_proxy.py,sha256=ElYR0M5SY2zL4NQzInye21MxqtiP3AUXX9Ia0KbkD4Y,12948
@@ -153,10 +153,10 @@ letta/schemas/letta_base.py,sha256=HTnSHJ2YSyhEdpY-vg9Y7ywqS1zzTjb9j5iVPYsuVSk,3
153
153
  letta/schemas/letta_message.py,sha256=QHzIEwnEJEkE02biCwyQo5IvL2fVq_whBRQD3vPYO48,9837
154
154
  letta/schemas/letta_request.py,sha256=dzy3kwb5j2QLaSV0sDlwISEMt2xxH3IiK-vR9xJV65k,1123
155
155
  letta/schemas/letta_response.py,sha256=yL0w-cdUazgEqg6_F4LJz2tugKNAZsB83Gr5jfXwa5U,7124
156
- letta/schemas/llm_config.py,sha256=lycAmLNvAm6D35jlLBN333x3tpzdk2Fwkx6yJl3pXjQ,5273
156
+ letta/schemas/llm_config.py,sha256=CsWQ7M70KXJdgCSGE66zA5Bb13XdBeWWFydaKnNvuf4,5515
157
157
  letta/schemas/llm_config_overrides.py,sha256=-oRglCTcajF6UAK3RAa0FLWVuKODPI1v403fDIWMAtA,1815
158
158
  letta/schemas/memory.py,sha256=GOYDfPKzbWftUWO9Hv4KW7xAi1EIQmC8zpP7qvEkVHw,10245
159
- letta/schemas/message.py,sha256=4L0-B2gCA2krF34AkKi_G4L2X5aALQnH997MowIcQgs,37457
159
+ letta/schemas/message.py,sha256=jTC1Z_gpCFlodfeystPq1WeNCE9Ccjqlr_HBlOwO0hQ,38072
160
160
  letta/schemas/openai/chat_completion_request.py,sha256=AOIwgbN3CZKVqkuXeMHeSa53u4h0wVq69t3T_LJ0vIE,3389
161
161
  letta/schemas/openai/chat_completion_response.py,sha256=Kaz9T0_ZvhWdVgGcouBuUuAG8-Nl3kC3dRymBQlONZ4,3980
162
162
  letta/schemas/openai/chat_completions.py,sha256=l0e9sT9boTD5VBU5YtJ0s7qUtCfFGB2K-gQLeEZ2LHU,3599
@@ -164,7 +164,7 @@ letta/schemas/openai/embedding_response.py,sha256=WKIZpXab1Av7v6sxKG8feW3ZtpQUNo
164
164
  letta/schemas/openai/openai.py,sha256=Hilo5BiLAGabzxCwnwfzK5QrWqwYD8epaEKFa4Pwndk,7970
165
165
  letta/schemas/organization.py,sha256=WWbUWVSp_VQRFwWN4fdHg1yObiV6x9rZnvIY8x5BPs0,746
166
166
  letta/schemas/passage.py,sha256=pdCLZgOn0gWK1gB6aFHLS0gfdWCBqLaiHDA0iQ12Zd8,3704
167
- letta/schemas/providers.py,sha256=1Sc7gWI6n9RkR4kOY4g3xGLVo6VCSwpiJySp3Pm3MQw,34903
167
+ letta/schemas/providers.py,sha256=il--tOeW2rXjRS0d9L9-UScu4PWrzMIWLwJBmVA65-Y,35510
168
168
  letta/schemas/run.py,sha256=SRqPRziINIiPunjOhE_NlbnQYgxTvqmbauni_yfBQRA,2085
169
169
  letta/schemas/sandbox_config.py,sha256=Nz8K5brqe6jpf66KnTJ0-E7ZeFdPoBFGN-XOI35OeaY,5926
170
170
  letta/schemas/source.py,sha256=-BQVolcXA2ziCu2ztR6cbTdGUc8G7vGJy7rvpdf1hpg,2880
@@ -181,8 +181,8 @@ letta/server/rest_api/app.py,sha256=9cf9H6vZhN-iBJqkqjBdFWjA3PlKfok-q48ltI71qls,
181
181
  letta/server/rest_api/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  letta/server/rest_api/auth/index.py,sha256=fQBGyVylGSRfEMLQ17cZzrHd5Y1xiVylvPqH5Rl-lXQ,1378
183
183
  letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
184
- letta/server/rest_api/chat_completions_interface.py,sha256=i9tfb9oSh14QNY-1ghWYtdgP7_RiyPHD5NcA2FKF3Dw,10195
185
- letta/server/rest_api/interface.py,sha256=ZGTJ5WIRNsWgKO0yoCD-yUESxq838qNrK3N1SzRZl40,51886
184
+ letta/server/rest_api/chat_completions_interface.py,sha256=XAMbQ-f0KDUeMAtnazwbjcrUn8ZpF8vBBtYw_kEPZ-8,10932
185
+ letta/server/rest_api/interface.py,sha256=UpmXS-srzM3t0SKJvrFIFhsIyjufLpdjursO8IPKbjs,53138
186
186
  letta/server/rest_api/optimistic_json_parser.py,sha256=1z4d9unmxMb0ou7owJ62uUQoNjNYf21FmaNdg0ZcqUU,6567
187
187
  letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
188
  letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -190,7 +190,7 @@ letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256
190
190
  letta/server/rest_api/routers/v1/__init__.py,sha256=tzD8Oh6ynPkg8ULcITWcwalLL81SIh6eztPqV9l7VGk,1162
191
191
  letta/server/rest_api/routers/v1/agents.py,sha256=AkoJWxn-cJEURGXBarJtasTJsldPe6vv9TFOiGpJRlY,25473
192
192
  letta/server/rest_api/routers/v1/blocks.py,sha256=oJYOpGUTd4AhKwVolVlZPIXO2EoOrBHkyi2PdrmbtmA,3888
193
- letta/server/rest_api/routers/v1/health.py,sha256=pKCuVESlVOhGIb4VC4K-H82eZqfghmT6kvj2iOkkKuc,401
193
+ letta/server/rest_api/routers/v1/health.py,sha256=MoOjkydhGcJXTiuJrKIB0etVXiRMdTa51S8RQ8-50DQ,399
194
194
  letta/server/rest_api/routers/v1/jobs.py,sha256=pKihW12hQdFwt6tHQXs94yOMv6xotlhBB3Vl7Q5ASKQ,2738
195
195
  letta/server/rest_api/routers/v1/llms.py,sha256=lYp5URXtZk1yu_Pe-p1Wq1uQ0qeb6aWtx78rXSB7N_E,881
196
196
  letta/server/rest_api/routers/v1/organizations.py,sha256=8n-kA9LHtKImdY2xL-v7m6nYAbFWqH1vjBCJhQbv7Is,2077
@@ -203,7 +203,7 @@ letta/server/rest_api/routers/v1/tags.py,sha256=45G0cmcP-ER0OO5OanT_fGtGQfl9ZjRK
203
203
  letta/server/rest_api/routers/v1/tools.py,sha256=Ft1wnS7RJT3TOfwSGMJ0_gfTpXnVArZUtPCXT3osI-0,12615
204
204
  letta/server/rest_api/routers/v1/users.py,sha256=G5DBHSkPfBgVHN2Wkm-rVYiLQAudwQczIq2Z3YLdbVo,2277
205
205
  letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
206
- letta/server/rest_api/utils.py,sha256=dsjkZzgo9Rk3fjUf1ajjiiql1eeO5DAzmXprttI7bJU,3993
206
+ letta/server/rest_api/utils.py,sha256=X7D6PsSVOAH6_irY0bgxdnS2yZ5ux-OA4eAiYqVgAPE,4438
207
207
  letta/server/server.py,sha256=8tRXPLta26ARQSThMDnKDAxTGx39j8Zw-41kgEgSpoQ,59850
208
208
  letta/server/startup.sh,sha256=qEi6dQHJRzEzDIgnIODj-RYp-O1XstfFpc6cFLkUzVs,1576
209
209
  letta/server/static_files/assets/index-048c9598.js,sha256=mR16XppvselwKCcNgONs4L7kZEVa4OEERm4lNZYtLSk,146819
@@ -218,7 +218,7 @@ letta/server/ws_api/interface.py,sha256=TWl9vkcMCnLsUtgsuENZ-ku2oMDA-OUTzLh_yNRo
218
218
  letta/server/ws_api/protocol.py,sha256=M_-gM5iuDBwa1cuN2IGNCG5GxMJwU2d3XW93XALv9s8,1821
219
219
  letta/server/ws_api/server.py,sha256=cBSzf-V4zT1bL_0i54OTI3cMXhTIIxqjSRF8pYjk7fg,5835
220
220
  letta/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
- letta/services/agent_manager.py,sha256=FLb6Y3_aZSNGTIfUrKJbRTXTMHJBlf8OGH-Ahf0moY0,50958
221
+ letta/services/agent_manager.py,sha256=S-WBNNg4H84eabplDkFHdOD4tjKkPwIosDI3yM8aN4E,50964
222
222
  letta/services/block_manager.py,sha256=u56TXG46QDMbQZadDGCO7fY1vreJ69Xr_0MUF53xw4k,5519
223
223
  letta/services/helpers/agent_manager_helper.py,sha256=RH0MXLZASkP2LVbVNUfSYHrcBYZnVxFd9ejGjRK90Hw,11283
224
224
  letta/services/helpers/tool_execution_helper.py,sha256=q8uSiQcX6VH_iNg5VNloZgC2JkH9lIOXBKCXYPx2Yac,6097
@@ -234,13 +234,13 @@ letta/services/step_manager.py,sha256=_PJUgaXyUHKCdlwt9CAmKhdeCNzKE_0_8-SRdUzpZa
234
234
  letta/services/tool_execution_sandbox.py,sha256=4XBYkCEBLG6GqijxgqeLIQQJ9zRbsJa8vZ4dZG04Pq8,22080
235
235
  letta/services/tool_manager.py,sha256=9Y15q0GqnADk-tnUeWDFFsDOt_ZjwsPU2oteDVtHAF4,9572
236
236
  letta/services/user_manager.py,sha256=1U8BQ_-MBkEW2wnSFV_OsTwBmRAZLN8uHLFjnDjK3hA,4308
237
- letta/settings.py,sha256=TnWJKZxeu571rKZQPGZj9fSupDwHUj1Pn5yzer6TlMY,6263
237
+ letta/settings.py,sha256=gO5X4miD884jvFVsGoL_1kBE1fGlrce6haZ9N_v7CCs,6334
238
238
  letta/streaming_interface.py,sha256=lo2VAQRUJOdWTijwnXuKOC9uejqr2siUAEmZiQUXkj8,15710
239
239
  letta/streaming_utils.py,sha256=jLqFTVhUL76FeOuYk8TaRQHmPTf3HSRc2EoJwxJNK6U,11946
240
240
  letta/system.py,sha256=S_0cod77iEttkFd1bSh2wenLCKA8YL487AuVenIDUng,8425
241
241
  letta/utils.py,sha256=lgBDWKmrQrmJGPxcgamFC2aJyi6I0dX7bzLBt3YC6j0,34051
242
- letta_nightly-0.6.23.dev20250211104055.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
243
- letta_nightly-0.6.23.dev20250211104055.dist-info/METADATA,sha256=vbZOnhlQslB1TJ-Y9r96eSCTYx2pH380fdjigSmBqxM,22156
244
- letta_nightly-0.6.23.dev20250211104055.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
245
- letta_nightly-0.6.23.dev20250211104055.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
246
- letta_nightly-0.6.23.dev20250211104055.dist-info/RECORD,,
242
+ letta_nightly-0.6.24.dev20250212104045.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
243
+ letta_nightly-0.6.24.dev20250212104045.dist-info/METADATA,sha256=USLZaRA7JEPRVOe9nB7sZQrIKjcWHFv52yhq7B67tNk,22196
244
+ letta_nightly-0.6.24.dev20250212104045.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
245
+ letta_nightly-0.6.24.dev20250212104045.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
246
+ letta_nightly-0.6.24.dev20250212104045.dist-info/RECORD,,