letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +47 -12
  3. letta/agents/base_agent.py +7 -4
  4. letta/agents/helpers.py +52 -0
  5. letta/agents/letta_agent.py +105 -42
  6. letta/agents/voice_agent.py +2 -2
  7. letta/constants.py +13 -1
  8. letta/errors.py +10 -3
  9. letta/functions/function_sets/base.py +65 -0
  10. letta/functions/interface.py +2 -2
  11. letta/functions/mcp_client/base_client.py +18 -1
  12. letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
  13. letta/groups/helpers.py +113 -0
  14. letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
  15. letta/groups/sleeptime_multi_agent.py +259 -0
  16. letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
  17. letta/helpers/converters.py +109 -7
  18. letta/helpers/message_helper.py +1 -0
  19. letta/helpers/tool_rule_solver.py +40 -23
  20. letta/interface.py +12 -5
  21. letta/interfaces/anthropic_streaming_interface.py +329 -0
  22. letta/llm_api/anthropic.py +12 -1
  23. letta/llm_api/anthropic_client.py +65 -14
  24. letta/llm_api/azure_openai.py +2 -2
  25. letta/llm_api/google_ai_client.py +13 -2
  26. letta/llm_api/google_constants.py +3 -0
  27. letta/llm_api/google_vertex_client.py +2 -2
  28. letta/llm_api/llm_api_tools.py +1 -1
  29. letta/llm_api/llm_client.py +7 -0
  30. letta/llm_api/llm_client_base.py +2 -7
  31. letta/llm_api/openai.py +7 -1
  32. letta/llm_api/openai_client.py +250 -0
  33. letta/orm/__init__.py +4 -0
  34. letta/orm/agent.py +6 -0
  35. letta/orm/block.py +32 -2
  36. letta/orm/block_history.py +46 -0
  37. letta/orm/custom_columns.py +60 -0
  38. letta/orm/enums.py +7 -0
  39. letta/orm/group.py +6 -0
  40. letta/orm/groups_blocks.py +13 -0
  41. letta/orm/llm_batch_items.py +55 -0
  42. letta/orm/llm_batch_job.py +48 -0
  43. letta/orm/message.py +7 -1
  44. letta/orm/organization.py +2 -0
  45. letta/orm/sqlalchemy_base.py +18 -15
  46. letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
  47. letta/prompts/system/sleeptime.txt +26 -0
  48. letta/schemas/agent.py +13 -1
  49. letta/schemas/enums.py +17 -2
  50. letta/schemas/group.py +14 -1
  51. letta/schemas/letta_message.py +5 -3
  52. letta/schemas/llm_batch_job.py +53 -0
  53. letta/schemas/llm_config.py +14 -4
  54. letta/schemas/message.py +44 -0
  55. letta/schemas/tool.py +3 -0
  56. letta/schemas/usage.py +1 -0
  57. letta/server/db.py +2 -0
  58. letta/server/rest_api/app.py +1 -1
  59. letta/server/rest_api/chat_completions_interface.py +8 -3
  60. letta/server/rest_api/interface.py +36 -7
  61. letta/server/rest_api/routers/v1/agents.py +53 -39
  62. letta/server/rest_api/routers/v1/runs.py +14 -2
  63. letta/server/rest_api/utils.py +15 -4
  64. letta/server/server.py +120 -71
  65. letta/services/agent_manager.py +70 -6
  66. letta/services/block_manager.py +190 -2
  67. letta/services/group_manager.py +68 -0
  68. letta/services/helpers/agent_manager_helper.py +6 -4
  69. letta/services/llm_batch_manager.py +139 -0
  70. letta/services/message_manager.py +17 -31
  71. letta/services/tool_executor/tool_execution_sandbox.py +1 -3
  72. letta/services/tool_executor/tool_executor.py +9 -20
  73. letta/services/tool_manager.py +14 -3
  74. letta/services/tool_sandbox/__init__.py +0 -0
  75. letta/services/tool_sandbox/base.py +188 -0
  76. letta/services/tool_sandbox/e2b_sandbox.py +116 -0
  77. letta/services/tool_sandbox/local_sandbox.py +221 -0
  78. letta/sleeptime_agent.py +61 -0
  79. letta/streaming_interface.py +20 -10
  80. letta/utils.py +4 -0
  81. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
  82. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
  83. letta/offline_memory_agent.py +0 -173
  84. letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
  85. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
  86. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
  87. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  import json
5
+ import uuid
5
6
  import warnings
6
7
  from collections import OrderedDict
7
8
  from datetime import datetime, timezone
@@ -78,6 +79,7 @@ class MessageCreate(BaseModel):
78
79
  json_schema_extra=get_letta_message_content_union_str_json_schema(),
79
80
  )
80
81
  name: Optional[str] = Field(None, description="The name of the participant.")
82
+ otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
81
83
 
82
84
  def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
83
85
  data = super().model_dump(**kwargs)
@@ -168,12 +170,17 @@ class Message(BaseMessage):
168
170
  json_message["created_at"] = self.created_at.isoformat()
169
171
  return json_message
170
172
 
173
+ @staticmethod
174
+ def generate_otid():
175
+ return str(uuid.uuid4())
176
+
171
177
  @staticmethod
172
178
  def to_letta_messages_from_list(
173
179
  messages: List[Message],
174
180
  use_assistant_message: bool = True,
175
181
  assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
176
182
  assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
183
+ reverse: bool = True,
177
184
  ) -> List[LettaMessage]:
178
185
  if use_assistant_message:
179
186
  message_ids_to_remove = []
@@ -203,6 +210,7 @@ class Message(BaseMessage):
203
210
  use_assistant_message=use_assistant_message,
204
211
  assistant_message_tool_name=assistant_message_tool_name,
205
212
  assistant_message_tool_kwarg=assistant_message_tool_kwarg,
213
+ reverse=reverse,
206
214
  )
207
215
  ]
208
216
 
@@ -211,6 +219,7 @@ class Message(BaseMessage):
211
219
  use_assistant_message: bool = False,
212
220
  assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
213
221
  assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
222
+ reverse: bool = True,
214
223
  ) -> List[LettaMessage]:
215
224
  """Convert message object (in DB format) to the style used by the original Letta API"""
216
225
  messages = []
@@ -221,18 +230,21 @@ class Message(BaseMessage):
221
230
  if self.content:
222
231
  # Check for ReACT-style COT inside of TextContent
223
232
  if len(self.content) == 1 and isinstance(self.content[0], TextContent):
233
+ otid = Message.generate_otid_from_id(self.id, len(messages))
224
234
  messages.append(
225
235
  ReasoningMessage(
226
236
  id=self.id,
227
237
  date=self.created_at,
228
238
  reasoning=self.content[0].text,
229
239
  name=self.name,
240
+ otid=otid,
230
241
  )
231
242
  )
232
243
  # Otherwise, we may have a list of multiple types
233
244
  else:
234
245
  # TODO we can probably collapse these two cases into a single loop
235
246
  for content_part in self.content:
247
+ otid = Message.generate_otid_from_id(self.id, len(messages))
236
248
  if isinstance(content_part, TextContent):
237
249
  # COT
238
250
  messages.append(
@@ -241,6 +253,7 @@ class Message(BaseMessage):
241
253
  date=self.created_at,
242
254
  reasoning=content_part.text,
243
255
  name=self.name,
256
+ otid=otid,
244
257
  )
245
258
  )
246
259
  elif isinstance(content_part, ReasoningContent):
@@ -253,6 +266,7 @@ class Message(BaseMessage):
253
266
  source="reasoner_model", # TODO do we want to tag like this?
254
267
  signature=content_part.signature,
255
268
  name=self.name,
269
+ otid=otid,
256
270
  )
257
271
  )
258
272
  elif isinstance(content_part, RedactedReasoningContent):
@@ -264,6 +278,7 @@ class Message(BaseMessage):
264
278
  state="redacted",
265
279
  hidden_reasoning=content_part.data,
266
280
  name=self.name,
281
+ otid=otid,
267
282
  )
268
283
  )
269
284
  else:
@@ -272,6 +287,7 @@ class Message(BaseMessage):
272
287
  if self.tool_calls is not None:
273
288
  # This is type FunctionCall
274
289
  for tool_call in self.tool_calls:
290
+ otid = Message.generate_otid_from_id(self.id, len(messages))
275
291
  # If we're supporting using assistant message,
276
292
  # then we want to treat certain function calls as a special case
277
293
  if use_assistant_message and tool_call.function.name == assistant_message_tool_name:
@@ -287,6 +303,7 @@ class Message(BaseMessage):
287
303
  date=self.created_at,
288
304
  content=message_string,
289
305
  name=self.name,
306
+ otid=otid,
290
307
  )
291
308
  )
292
309
  else:
@@ -300,6 +317,7 @@ class Message(BaseMessage):
300
317
  tool_call_id=tool_call.id,
301
318
  ),
302
319
  name=self.name,
320
+ otid=otid,
303
321
  )
304
322
  )
305
323
  elif self.role == MessageRole.tool:
@@ -341,6 +359,7 @@ class Message(BaseMessage):
341
359
  stdout=self.tool_returns[0].stdout if self.tool_returns else None,
342
360
  stderr=self.tool_returns[0].stderr if self.tool_returns else None,
343
361
  name=self.name,
362
+ otid=self.id.replace("message-", ""),
344
363
  )
345
364
  )
346
365
  elif self.role == MessageRole.user:
@@ -357,6 +376,7 @@ class Message(BaseMessage):
357
376
  date=self.created_at,
358
377
  content=message_str or text_content,
359
378
  name=self.name,
379
+ otid=self.otid,
360
380
  )
361
381
  )
362
382
  elif self.role == MessageRole.system:
@@ -372,11 +392,15 @@ class Message(BaseMessage):
372
392
  date=self.created_at,
373
393
  content=text_content,
374
394
  name=self.name,
395
+ otid=self.otid,
375
396
  )
376
397
  )
377
398
  else:
378
399
  raise ValueError(self.role)
379
400
 
401
+ if reverse:
402
+ messages.reverse()
403
+
380
404
  return messages
381
405
 
382
406
  @staticmethod
@@ -670,6 +694,9 @@ class Message(BaseMessage):
670
694
 
671
695
  def add_xml_tag(string: str, xml_tag: Optional[str]):
672
696
  # NOTE: Anthropic docs recommends using <thinking> tag when using CoT + tool use
697
+ if f"<{xml_tag}>" in string and f"</{xml_tag}>" in string:
698
+ # don't nest if tags already exist
699
+ return string
673
700
  return f"<{xml_tag}>{string}</{xml_tag}" if xml_tag else string
674
701
 
675
702
  if self.role == "system":
@@ -988,6 +1015,23 @@ class Message(BaseMessage):
988
1015
 
989
1016
  return cohere_message
990
1017
 
1018
+ @staticmethod
1019
+ def generate_otid_from_id(message_id: str, index: int) -> str:
1020
+ """
1021
+ Convert message id to bits and change the list bit to the index
1022
+ """
1023
+ if not 0 <= index < 128:
1024
+ raise ValueError("Index must be between 0 and 127")
1025
+
1026
+ message_uuid = message_id.replace("message-", "")
1027
+ uuid_int = int(message_uuid.replace("-", ""), 16)
1028
+
1029
+ # Clear last 7 bits and set them to index; supports up to 128 unique indices
1030
+ uuid_int = (uuid_int & ~0x7F) | (index & 0x7F)
1031
+
1032
+ hex_str = f"{uuid_int:032x}"
1033
+ return f"{hex_str[:8]}-{hex_str[8:12]}-{hex_str[12:16]}-{hex_str[16:20]}-{hex_str[20:]}"
1034
+
991
1035
 
992
1036
  class ToolReturn(BaseModel):
993
1037
  status: Literal["success", "error"] = Field(..., description="The status of the tool call")
letta/schemas/tool.py CHANGED
@@ -104,6 +104,9 @@ class Tool(BaseTool):
104
104
  elif self.tool_type in {ToolType.LETTA_MULTI_AGENT_CORE}:
105
105
  # If it's letta multi-agent tool, we also generate the json_schema on the fly here
106
106
  self.json_schema = get_json_schema_from_module(module_name=LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name=self.name)
107
+ elif self.tool_type in {ToolType.LETTA_SLEEPTIME_CORE}:
108
+ # If it's letta sleeptime core tool, we generate the json_schema on the fly here
109
+ self.json_schema = get_json_schema_from_module(module_name=LETTA_CORE_TOOL_MODULE_NAME, function_name=self.name)
107
110
 
108
111
  # At this point, we need to validate that at least json_schema is populated
109
112
  if not self.json_schema:
letta/schemas/usage.py CHANGED
@@ -23,3 +23,4 @@ class LettaUsageStatistics(BaseModel):
23
23
  step_count: int = Field(0, description="The number of steps taken by the agent.")
24
24
  # TODO: Optional for now. This field makes everyone's lives easier
25
25
  steps_messages: Optional[List[List[Message]]] = Field(None, description="The messages generated per step")
26
+ run_ids: Optional[List[str]] = Field(None, description="The background task run IDs associated with the agent interaction")
letta/server/db.py CHANGED
@@ -59,11 +59,13 @@ if settings.letta_pg_uri_no_default:
59
59
  # create engine
60
60
  engine = create_engine(
61
61
  settings.letta_pg_uri,
62
+ # f"{settings.letta_pg_uri}?options=-c%20client_encoding=UTF8",
62
63
  pool_size=settings.pg_pool_size,
63
64
  max_overflow=settings.pg_max_overflow,
64
65
  pool_timeout=settings.pg_pool_timeout,
65
66
  pool_recycle=settings.pg_pool_recycle,
66
67
  echo=settings.pg_echo,
68
+ # connect_args={"client_encoding": "utf8"},
67
69
  )
68
70
  else:
69
71
  # TODO: don't rely on config storage
@@ -139,7 +139,7 @@ def create_application() -> "FastAPI":
139
139
 
140
140
  @app.on_event("startup")
141
141
  async def configure_executor():
142
- print(f"Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
142
+ print(f"INFO: Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
143
143
  loop = asyncio.get_running_loop()
144
144
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=settings.event_loop_threadpool_max_workers)
145
145
  loop.set_default_executor(executor)
@@ -155,7 +155,12 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
155
155
  return
156
156
 
157
157
  def process_chunk(
158
- self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime, expect_reasoning_content: bool = False
158
+ self,
159
+ chunk: ChatCompletionChunkResponse,
160
+ message_id: str,
161
+ message_date: datetime,
162
+ expect_reasoning_content: bool = False,
163
+ message_index: int = 0,
159
164
  ) -> None:
160
165
  """
161
166
  Called externally with a ChatCompletionChunkResponse. Transforms
@@ -172,7 +177,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
172
177
  """
173
178
  return
174
179
 
175
- def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
180
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
176
181
  """
177
182
  Handle LLM reasoning or internal monologue. Example usage: if you want
178
183
  to capture chain-of-thought for debugging in a non-streaming scenario.
@@ -186,7 +191,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
186
191
  """
187
192
  return
188
193
 
189
- def function_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
194
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
190
195
  """
191
196
  Handle function-related log messages, typically of the form:
192
197
  It's a no-op by default.
@@ -165,7 +165,7 @@ class QueuingInterface(AgentInterface):
165
165
  print(vars(msg_obj))
166
166
  print(msg_obj.created_at.isoformat())
167
167
 
168
- def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
168
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
169
169
  """Handle the agent's internal monologue"""
170
170
  assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
171
171
  if self.debug:
@@ -209,7 +209,9 @@ class QueuingInterface(AgentInterface):
209
209
 
210
210
  self._queue_push(message_api=new_message, message_obj=msg_obj)
211
211
 
212
- def function_message(self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False) -> None:
212
+ def function_message(
213
+ self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False, chunk_index: Optional[int] = None
214
+ ) -> None:
213
215
  """Handle the agent calling a function"""
214
216
  # TODO handle 'function' messages that indicate the start of a function call
215
217
  assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
@@ -466,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
466
468
  # and `content` needs to be handled outside the interface
467
469
  expect_reasoning_content: bool = False,
468
470
  name: Optional[str] = None,
471
+ message_index: int = 0,
469
472
  ) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
470
473
  """
471
474
  Example data from non-streaming response looks like:
@@ -478,6 +481,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
478
481
  """
479
482
  choice = chunk.choices[0]
480
483
  message_delta = choice.delta
484
+ otid = Message.generate_otid_from_id(message_id, message_index)
481
485
 
482
486
  if (
483
487
  message_delta.content is None
@@ -499,6 +503,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
499
503
  signature=message_delta.reasoning_content_signature,
500
504
  source="reasoner_model" if message_delta.reasoning_content_signature else "non_reasoner_model",
501
505
  name=name,
506
+ otid=otid,
502
507
  )
503
508
  elif expect_reasoning_content and message_delta.redacted_reasoning_content is not None:
504
509
  processed_chunk = HiddenReasoningMessage(
@@ -507,6 +512,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
507
512
  hidden_reasoning=message_delta.redacted_reasoning_content,
508
513
  state="redacted",
509
514
  name=name,
515
+ otid=otid,
510
516
  )
511
517
  elif expect_reasoning_content and message_delta.content is not None:
512
518
  # "ignore" content if we expect reasoning content
@@ -534,6 +540,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
534
540
  tool_call_id=None,
535
541
  ),
536
542
  name=name,
543
+ otid=otid,
537
544
  )
538
545
 
539
546
  except json.JSONDecodeError as e:
@@ -564,6 +571,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
564
571
  date=message_date,
565
572
  reasoning=message_delta.content,
566
573
  name=name,
574
+ otid=otid,
567
575
  )
568
576
 
569
577
  # tool calls
@@ -612,7 +620,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
612
620
  # TODO: Assumes consistent state and that prev_content is subset of new_content
613
621
  diff = new_content.replace(prev_content, "", 1)
614
622
  self.current_json_parse_result = parsed_args
615
- processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name)
623
+ processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name, otid=otid)
616
624
  else:
617
625
  return None
618
626
 
@@ -645,6 +653,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
645
653
  tool_call_id=tool_call_delta.get("id"),
646
654
  ),
647
655
  name=name,
656
+ otid=otid,
648
657
  )
649
658
 
650
659
  elif self.inner_thoughts_in_kwargs and tool_call.function:
@@ -681,6 +690,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
681
690
  date=message_date,
682
691
  reasoning=updates_inner_thoughts,
683
692
  name=name,
693
+ otid=otid,
684
694
  )
685
695
  # Additionally inner thoughts may stream back with a chunk of main JSON
686
696
  # In that case, since we can only return a chunk at a time, we should buffer it
@@ -717,6 +727,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
717
727
  tool_call_id=self.function_id_buffer,
718
728
  ),
719
729
  name=name,
730
+ otid=otid,
720
731
  )
721
732
 
722
733
  # Record what the last function name we flushed was
@@ -774,6 +785,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
774
785
  date=message_date,
775
786
  content=combined_chunk,
776
787
  name=name,
788
+ otid=otid,
777
789
  )
778
790
  # Store the ID of the tool call so allow skipping the corresponding response
779
791
  if self.function_id_buffer:
@@ -798,7 +810,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
798
810
  # TODO: Assumes consistent state and that prev_content is subset of new_content
799
811
  diff = new_content.replace(prev_content, "", 1)
800
812
  self.current_json_parse_result = parsed_args
801
- processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name)
813
+ processed_chunk = AssistantMessage(
814
+ id=message_id, date=message_date, content=diff, name=name, otid=otid
815
+ )
802
816
  else:
803
817
  return None
804
818
 
@@ -823,6 +837,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
823
837
  tool_call_id=self.function_id_buffer,
824
838
  ),
825
839
  name=name,
840
+ otid=otid,
826
841
  )
827
842
  # clear buffer
828
843
  self.function_args_buffer = None
@@ -838,6 +853,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
838
853
  tool_call_id=self.function_id_buffer,
839
854
  ),
840
855
  name=name,
856
+ otid=otid,
841
857
  )
842
858
  self.function_id_buffer = None
843
859
 
@@ -967,6 +983,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
967
983
  tool_call_id=tool_call_delta.get("id"),
968
984
  ),
969
985
  name=name,
986
+ otid=otid,
970
987
  )
971
988
 
972
989
  elif choice.finish_reason is not None:
@@ -1048,6 +1065,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1048
1065
  message_date: datetime,
1049
1066
  expect_reasoning_content: bool = False,
1050
1067
  name: Optional[str] = None,
1068
+ message_index: int = 0,
1051
1069
  ):
1052
1070
  """Process a streaming chunk from an OpenAI-compatible server.
1053
1071
 
@@ -1074,18 +1092,20 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1074
1092
  message_date=message_date,
1075
1093
  expect_reasoning_content=expect_reasoning_content,
1076
1094
  name=name,
1095
+ message_index=message_index,
1077
1096
  )
1078
-
1079
1097
  if processed_chunk is None:
1080
1098
  return
1081
1099
 
1082
1100
  self._push_to_buffer(processed_chunk)
1083
1101
 
1102
+ return processed_chunk.message_type
1103
+
1084
1104
  def user_message(self, msg: str, msg_obj: Optional[Message] = None):
1085
1105
  """Letta receives a user message"""
1086
1106
  return
1087
1107
 
1088
- def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
1108
+ def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
1089
1109
  """Letta generates some internal monologue"""
1090
1110
  if not self.streaming_mode:
1091
1111
 
@@ -1102,6 +1122,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1102
1122
  date=msg_obj.created_at,
1103
1123
  reasoning=msg,
1104
1124
  name=msg_obj.name,
1125
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1105
1126
  )
1106
1127
 
1107
1128
  self._push_to_buffer(processed_chunk)
@@ -1113,6 +1134,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1113
1134
  date=msg_obj.created_at,
1114
1135
  reasoning=content.text,
1115
1136
  name=msg_obj.name,
1137
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1116
1138
  )
1117
1139
  elif isinstance(content, ReasoningContent):
1118
1140
  processed_chunk = ReasoningMessage(
@@ -1122,6 +1144,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1122
1144
  reasoning=content.reasoning,
1123
1145
  signature=content.signature,
1124
1146
  name=msg_obj.name,
1147
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1125
1148
  )
1126
1149
  elif isinstance(content, RedactedReasoningContent):
1127
1150
  processed_chunk = HiddenReasoningMessage(
@@ -1130,6 +1153,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1130
1153
  state="redacted",
1131
1154
  hidden_reasoning=content.data,
1132
1155
  name=msg_obj.name,
1156
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1133
1157
  )
1134
1158
 
1135
1159
  self._push_to_buffer(processed_chunk)
@@ -1142,7 +1166,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1142
1166
  # NOTE: this is a no-op, we handle this special case in function_message instead
1143
1167
  return
1144
1168
 
1145
- def function_message(self, msg: str, msg_obj: Optional[Message] = None):
1169
+ def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
1146
1170
  """Letta calls a function"""
1147
1171
 
1148
1172
  # TODO handle 'function' messages that indicate the start of a function call
@@ -1191,6 +1215,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1191
1215
  date=msg_obj.created_at,
1192
1216
  content=func_args["message"],
1193
1217
  name=msg_obj.name,
1218
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1194
1219
  )
1195
1220
  self._push_to_buffer(processed_chunk)
1196
1221
  except Exception as e:
@@ -1214,6 +1239,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1214
1239
  date=msg_obj.created_at,
1215
1240
  content=func_args[self.assistant_message_tool_kwarg],
1216
1241
  name=msg_obj.name,
1242
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1217
1243
  )
1218
1244
  # Store the ID of the tool call so allow skipping the corresponding response
1219
1245
  self.prev_assistant_message_id = function_call.id
@@ -1227,6 +1253,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1227
1253
  tool_call_id=function_call.id,
1228
1254
  ),
1229
1255
  name=msg_obj.name,
1256
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1230
1257
  )
1231
1258
 
1232
1259
  # processed_chunk = {
@@ -1267,6 +1294,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1267
1294
  stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
1268
1295
  stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
1269
1296
  name=msg_obj.name,
1297
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
1270
1298
  )
1271
1299
 
1272
1300
  elif msg.startswith("Error: "):
@@ -1282,6 +1310,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1282
1310
  stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
1283
1311
  stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
1284
1312
  name=msg_obj.name,
1313
+ otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
1285
1314
  )
1286
1315
 
1287
1316
  else:
@@ -8,6 +8,7 @@ from fastapi.responses import JSONResponse
8
8
  from marshmallow import ValidationError
9
9
  from pydantic import Field
10
10
  from sqlalchemy.exc import IntegrityError, OperationalError
11
+ from starlette.responses import StreamingResponse
11
12
 
12
13
  from letta.agents.letta_agent import LettaAgent
13
14
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
@@ -30,7 +31,6 @@ from letta.schemas.user import User
30
31
  from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
31
32
  from letta.server.rest_api.utils import get_letta_server
32
33
  from letta.server.server import SyncServer
33
- from letta.settings import settings
34
34
 
35
35
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
36
36
 
@@ -130,6 +130,10 @@ async def import_agent_serialized(
130
130
  description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
131
131
  ),
132
132
  project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
133
+ strip_messages: bool = Query(
134
+ False,
135
+ description="If set to True, strips all messages from the agent before importing.",
136
+ ),
133
137
  ):
134
138
  """
135
139
  Import a serialized agent file and recreate the agent in the system.
@@ -149,6 +153,7 @@ async def import_agent_serialized(
149
153
  append_copy_suffix=append_copy_suffix,
150
154
  override_existing_tools=override_existing_tools,
151
155
  project_id=project_id,
156
+ strip_messages=strip_messages,
152
157
  )
153
158
  return new_agent
154
159
 
@@ -585,8 +590,10 @@ async def send_message(
585
590
  This endpoint accepts a message from a user and processes it through the agent.
586
591
  """
587
592
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
588
- if settings.use_experimental:
589
- logger.warning("USING EXPERIMENTAL!")
593
+ # TODO: This is redundant, remove soon
594
+ agent = server.agent_manager.get_agent_by_id(agent_id, actor)
595
+
596
+ if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
590
597
  experimental_agent = LettaAgent(
591
598
  agent_id=agent_id,
592
599
  message_manager=server.message_manager,
@@ -639,17 +646,38 @@ async def send_message_streaming(
639
646
  It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
640
647
  """
641
648
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
642
- result = await server.send_message_to_agent(
643
- agent_id=agent_id,
644
- actor=actor,
645
- messages=request.messages,
646
- stream_steps=True,
647
- stream_tokens=request.stream_tokens,
648
- # Support for AssistantMessage
649
- use_assistant_message=request.use_assistant_message,
650
- assistant_message_tool_name=request.assistant_message_tool_name,
651
- assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
652
- )
649
+ # TODO: This is redundant, remove soon
650
+ agent = server.agent_manager.get_agent_by_id(agent_id, actor)
651
+
652
+ if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
653
+ experimental_agent = LettaAgent(
654
+ agent_id=agent_id,
655
+ message_manager=server.message_manager,
656
+ agent_manager=server.agent_manager,
657
+ block_manager=server.block_manager,
658
+ passage_manager=server.passage_manager,
659
+ actor=actor,
660
+ )
661
+
662
+ messages = request.messages
663
+ content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
664
+ result = StreamingResponse(
665
+ experimental_agent.step_stream(UserMessage(content=content), max_steps=10, use_assistant_message=request.use_assistant_message),
666
+ media_type="text/event-stream",
667
+ )
668
+ else:
669
+ result = await server.send_message_to_agent(
670
+ agent_id=agent_id,
671
+ actor=actor,
672
+ messages=request.messages,
673
+ stream_steps=True,
674
+ stream_tokens=request.stream_tokens,
675
+ # Support for AssistantMessage
676
+ use_assistant_message=request.use_assistant_message,
677
+ assistant_message_tool_name=request.assistant_message_tool_name,
678
+ assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
679
+ )
680
+
653
681
  return result
654
682
 
655
683
 
@@ -665,31 +693,17 @@ async def process_message_background(
665
693
  ) -> None:
666
694
  """Background task to process the message and update job status."""
667
695
  try:
668
- # TODO(matt) we should probably make this stream_steps and log each step as it progresses, so the job update GET can see the total steps so far + partial usage?
669
- if settings.use_experimental:
670
- logger.warning("USING EXPERIMENTAL!")
671
- experimental_agent = LettaAgent(
672
- agent_id=agent_id,
673
- message_manager=server.message_manager,
674
- agent_manager=server.agent_manager,
675
- block_manager=server.block_manager,
676
- passage_manager=server.passage_manager,
677
- actor=actor,
678
- )
679
- content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
680
- result = await experimental_agent.step(UserMessage(content=content), max_steps=10)
681
- else:
682
- result = await server.send_message_to_agent(
683
- agent_id=agent_id,
684
- actor=actor,
685
- messages=messages,
686
- stream_steps=False, # NOTE(matt)
687
- stream_tokens=False,
688
- use_assistant_message=use_assistant_message,
689
- assistant_message_tool_name=assistant_message_tool_name,
690
- assistant_message_tool_kwarg=assistant_message_tool_kwarg,
691
- metadata={"job_id": job_id}, # Pass job_id through metadata
692
- )
696
+ result = await server.send_message_to_agent(
697
+ agent_id=agent_id,
698
+ actor=actor,
699
+ messages=messages,
700
+ stream_steps=False, # NOTE(matt)
701
+ stream_tokens=False,
702
+ use_assistant_message=use_assistant_message,
703
+ assistant_message_tool_name=assistant_message_tool_name,
704
+ assistant_message_tool_kwarg=assistant_message_tool_kwarg,
705
+ metadata={"job_id": job_id}, # Pass job_id through metadata
706
+ )
693
707
 
694
708
  # Update job status to completed
695
709
  job_update = JobUpdate(