letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -71,15 +71,6 @@ class LettaCoreToolExecutor(ToolExecutor):
71
71
  )
72
72
 
73
73
  async def send_message(self, agent_state: AgentState, actor: User, message: str) -> Optional[str]:
74
- """
75
- Sends a message to the human user.
76
-
77
- Args:
78
- message (str): Message contents. All unicode (including emojis) are supported.
79
-
80
- Returns:
81
- Optional[str]: None is always returned as this function does not produce a response.
82
- """
83
74
  return "Sent message successfully."
84
75
 
85
76
  async def conversation_search(
@@ -92,19 +83,6 @@ class LettaCoreToolExecutor(ToolExecutor):
92
83
  start_date: Optional[str] = None,
93
84
  end_date: Optional[str] = None,
94
85
  ) -> Optional[str]:
95
- """
96
- Search prior conversation history using hybrid search (text + semantic similarity).
97
-
98
- Args:
99
- query (str): String to search for using both text matching and semantic similarity.
100
- roles (Optional[List[Literal["assistant", "user", "tool"]]]): Optional list of message roles to filter by.
101
- limit (Optional[int]): Maximum number of results to return. Uses system default if not specified.
102
- start_date (Optional[str]): Filter results to messages created after this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-15", "2024-01-15T14:30".
103
- end_date (Optional[str]): Filter results to messages created before this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-20", "2024-01-20T17:00".
104
-
105
- Returns:
106
- str: Query result string containing matching messages with timestamps and content.
107
- """
108
86
  try:
109
87
  # Parse datetime parameters if provided
110
88
  start_datetime = None
@@ -163,7 +141,6 @@ class LettaCoreToolExecutor(ToolExecutor):
163
141
  limit=search_limit,
164
142
  start_date=start_datetime,
165
143
  end_date=end_datetime,
166
- embedding_config=agent_state.embedding_config,
167
144
  )
168
145
 
169
146
  if len(message_results) == 0:
@@ -286,23 +263,9 @@ class LettaCoreToolExecutor(ToolExecutor):
286
263
  start_datetime: Optional[str] = None,
287
264
  end_datetime: Optional[str] = None,
288
265
  ) -> Optional[str]:
289
- """
290
- Search archival memory using semantic (embedding-based) search with optional temporal filtering.
291
-
292
- Args:
293
- query (str): String to search for using semantic similarity.
294
- tags (Optional[list[str]]): Optional list of tags to filter search results. Only passages with these tags will be returned.
295
- tag_match_mode (Literal["any", "all"]): How to match tags - "any" to match passages with any of the tags, "all" to match only passages with all tags. Defaults to "any".
296
- top_k (Optional[int]): Maximum number of results to return. Uses system default if not specified.
297
- start_datetime (Optional[str]): Filter results to passages created after this datetime. ISO 8601 format.
298
- end_datetime (Optional[str]): Filter results to passages created before this datetime. ISO 8601 format.
299
-
300
- Returns:
301
- str: Query result string containing matching passages with timestamps, content, and tags.
302
- """
303
266
  try:
304
267
  # Use the shared service method to get results
305
- formatted_results, count = await self.agent_manager.search_agent_archival_memory_async(
268
+ formatted_results = await self.agent_manager.search_agent_archival_memory_async(
306
269
  agent_id=agent_state.id,
307
270
  actor=actor,
308
271
  query=query,
@@ -313,7 +276,7 @@ class LettaCoreToolExecutor(ToolExecutor):
313
276
  end_datetime=end_datetime,
314
277
  )
315
278
 
316
- return formatted_results, count
279
+ return formatted_results
317
280
 
318
281
  except Exception as e:
319
282
  raise e
@@ -321,16 +284,6 @@ class LettaCoreToolExecutor(ToolExecutor):
321
284
  async def archival_memory_insert(
322
285
  self, agent_state: AgentState, actor: User, content: str, tags: Optional[list[str]] = None
323
286
  ) -> Optional[str]:
324
- """
325
- Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.
326
-
327
- Args:
328
- content (str): Content to write to the memory. All unicode (including emojis) are supported.
329
- tags (Optional[list[str]]): Optional list of tags to associate with this memory for better organization and filtering.
330
-
331
- Returns:
332
- Optional[str]: None is always returned as this function does not produce a response.
333
- """
334
287
  await self.passage_manager.insert_passage(
335
288
  agent_state=agent_state,
336
289
  text=content,
@@ -341,16 +294,6 @@ class LettaCoreToolExecutor(ToolExecutor):
341
294
  return None
342
295
 
343
296
  async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> Optional[str]:
344
- """
345
- Append to the contents of core memory.
346
-
347
- Args:
348
- label (str): Section of the memory to be edited.
349
- content (str): Content to write to the memory. All unicode (including emojis) are supported.
350
-
351
- Returns:
352
- Optional[str]: None is always returned as this function does not produce a response.
353
- """
354
297
  if agent_state.memory.get_block(label).read_only:
355
298
  raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
356
299
  current_value = str(agent_state.memory.get_block(label).value)
@@ -367,17 +310,6 @@ class LettaCoreToolExecutor(ToolExecutor):
367
310
  old_content: str,
368
311
  new_content: str,
369
312
  ) -> Optional[str]:
370
- """
371
- Replace the contents of core memory. To delete memories, use an empty string for new_content.
372
-
373
- Args:
374
- label (str): Section of the memory to be edited.
375
- old_content (str): String to replace. Must be an exact match.
376
- new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
377
-
378
- Returns:
379
- Optional[str]: None is always returned as this function does not produce a response.
380
- """
381
313
  if agent_state.memory.get_block(label).read_only:
382
314
  raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
383
315
  current_value = str(agent_state.memory.get_block(label).value)
@@ -389,20 +321,6 @@ class LettaCoreToolExecutor(ToolExecutor):
389
321
  return None
390
322
 
391
323
  async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
392
- """
393
- The memory_replace command allows you to replace a specific string in a memory
394
- block with a new string. This is used for making precise edits.
395
-
396
- Args:
397
- label (str): Section of the memory to be edited, identified by its label.
398
- old_str (str): The text to replace (must match exactly, including whitespace
399
- and indentation). Do not include line number prefixes.
400
- new_str (str): The new text to insert in place of the old text. Do not include line number prefixes.
401
-
402
- Returns:
403
- str: The success message
404
- """
405
-
406
324
  if agent_state.memory.get_block(label).read_only:
407
325
  raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
408
326
 
@@ -479,20 +397,6 @@ class LettaCoreToolExecutor(ToolExecutor):
479
397
  new_str: str,
480
398
  insert_line: int = -1,
481
399
  ) -> str:
482
- """
483
- The memory_insert command allows you to insert text at a specific location
484
- in a memory block.
485
-
486
- Args:
487
- label (str): Section of the memory to be edited, identified by its label.
488
- new_str (str): The text to insert. Do not include line number prefixes.
489
- insert_line (int): The line number after which to insert the text (0 for
490
- beginning of file). Defaults to -1 (end of the file).
491
-
492
- Returns:
493
- str: The success message
494
- """
495
-
496
400
  if agent_state.memory.get_block(label).read_only:
497
401
  raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
498
402
 
@@ -559,20 +463,6 @@ class LettaCoreToolExecutor(ToolExecutor):
559
463
  return success_msg
560
464
 
561
465
  async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
562
- """
563
- The memory_rethink command allows you to completely rewrite the contents of a
564
- memory block. Use this tool to make large sweeping changes (e.g. when you want
565
- to condense or reorganize the memory blocks), do NOT use this tool to make small
566
- precise edits (e.g. add or remove a line, replace a specific string, etc).
567
-
568
- Args:
569
- label (str): The memory block to be rewritten, identified by its label.
570
- new_memory (str): The new memory contents with information integrated from
571
- existing memory blocks and the conversation context. Do not include line number prefixes.
572
-
573
- Returns:
574
- str: The success message
575
- """
576
466
  if agent_state.memory.get_block(label).read_only:
577
467
  raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
578
468
 
@@ -611,12 +501,4 @@ class LettaCoreToolExecutor(ToolExecutor):
611
501
  return success_msg
612
502
 
613
503
  async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
614
- """
615
- Call the memory_finish_edits command when you are finished making edits
616
- (integrating all new information) into the memory blocks. This function
617
- is called when the agent is done rethinking the memory.
618
-
619
- Returns:
620
- Optional[str]: None is always returned as this function does not produce a response.
621
- """
622
504
  return None
@@ -5,10 +5,13 @@ from typing import Any, Dict, List, Optional
5
5
  from letta.constants import PINECONE_TEXT_FIELD_NAME
6
6
  from letta.functions.types import FileOpenRequest
7
7
  from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
8
+ from letta.helpers.tpuf_client import should_use_tpuf
8
9
  from letta.log import get_logger
9
10
  from letta.otel.tracing import trace_method
10
11
  from letta.schemas.agent import AgentState
12
+ from letta.schemas.enums import VectorDBProvider
11
13
  from letta.schemas.sandbox_config import SandboxConfig
14
+ from letta.schemas.source import Source
12
15
  from letta.schemas.tool import Tool
13
16
  from letta.schemas.tool_execution_result import ToolExecutionResult
14
17
  from letta.schemas.user import User
@@ -554,18 +557,140 @@ class LettaFileToolExecutor(ToolExecutor):
554
557
 
555
558
  self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
556
559
 
557
- # Check if Pinecone is enabled and use it if available
558
- if should_use_pinecone():
559
- return await self._search_files_pinecone(agent_state, query, limit)
560
- else:
561
- return await self._search_files_traditional(agent_state, query, limit)
560
+ # Check which vector DB to use - Turbopuffer takes precedence
561
+ attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
562
+ attached_tpuf_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.TPUF]
563
+ attached_pinecone_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.PINECONE]
564
+
565
+ if not attached_tpuf_sources and not attached_pinecone_sources:
566
+ return await self._search_files_native(agent_state, query, limit)
567
+
568
+ results = []
569
+
570
+ # If both have items, we half the limit roughly
571
+ # TODO: This is very hacky bc it skips the re-ranking - but this is a temporary stopgap while we think about migrating data
572
+
573
+ if attached_tpuf_sources and attached_pinecone_sources:
574
+ limit = max(limit // 2, 1)
575
+
576
+ if should_use_tpuf() and attached_tpuf_sources:
577
+ tpuf_result = await self._search_files_turbopuffer(agent_state, attached_tpuf_sources, query, limit)
578
+ results.append(tpuf_result)
579
+
580
+ if should_use_pinecone() and attached_pinecone_sources:
581
+ pinecone_result = await self._search_files_pinecone(agent_state, attached_pinecone_sources, query, limit)
582
+ results.append(pinecone_result)
583
+
584
+ # combine results from both sources
585
+ if results:
586
+ return "\n\n".join(results)
587
+
588
+ # fallback if no results from either source
589
+ return "No results found"
590
+
591
+ async def _search_files_turbopuffer(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
592
+ """Search files using Turbopuffer vector database."""
593
+
594
+ # Get attached sources
595
+ source_ids = [source.id for source in attached_sources]
596
+ if not source_ids:
597
+ return "No valid source IDs found for attached files"
598
+
599
+ # Get all attached files for this agent
600
+ file_agents = await self.files_agents_manager.list_files_for_agent(
601
+ agent_id=agent_state.id, per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit, actor=self.actor
602
+ )
603
+ if not file_agents:
604
+ return "No files are currently attached to search"
605
+
606
+ # Create a map of file_id to file_name for quick lookup
607
+ file_map = {fa.file_id: fa.file_name for fa in file_agents}
608
+
609
+ results = []
610
+ total_hits = 0
611
+ files_with_matches = {}
612
+
613
+ try:
614
+ from letta.helpers.tpuf_client import TurbopufferClient
615
+
616
+ tpuf_client = TurbopufferClient()
617
+
618
+ # Query Turbopuffer for all sources at once
619
+ search_results = await tpuf_client.query_file_passages(
620
+ source_ids=source_ids, # pass all source_ids as a list
621
+ organization_id=self.actor.organization_id,
622
+ actor=self.actor,
623
+ query_text=query,
624
+ search_mode="hybrid", # use hybrid search for best results
625
+ top_k=limit,
626
+ )
627
+
628
+ # Process search results
629
+ for passage, score, metadata in search_results:
630
+ if total_hits >= limit:
631
+ break
632
+
633
+ total_hits += 1
634
+
635
+ # get file name from our map
636
+ file_name = file_map.get(passage.file_id, "Unknown File")
562
637
 
563
- async def _search_files_pinecone(self, agent_state: AgentState, query: str, limit: int) -> str:
638
+ # group by file name
639
+ if file_name not in files_with_matches:
640
+ files_with_matches[file_name] = []
641
+ files_with_matches[file_name].append({"text": passage.text, "score": score, "passage_id": passage.id})
642
+
643
+ except Exception as e:
644
+ self.logger.error(f"Turbopuffer search failed: {str(e)}")
645
+ raise e
646
+
647
+ if not files_with_matches:
648
+ return f"No semantic matches found in Turbopuffer for query: '{query}'"
649
+
650
+ # Format results
651
+ passage_num = 0
652
+ for file_name, matches in files_with_matches.items():
653
+ for match in matches:
654
+ passage_num += 1
655
+
656
+ # format each passage with terminal-style header
657
+ score_display = f"(score: {match['score']:.3f})"
658
+ passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
659
+
660
+ # format the passage text
661
+ passage_text = match["text"].strip()
662
+ lines = passage_text.splitlines()
663
+ formatted_lines = []
664
+ for line in lines[:20]: # limit to first 20 lines per passage
665
+ formatted_lines.append(f" {line}")
666
+
667
+ if len(lines) > 20:
668
+ formatted_lines.append(f" ... [truncated {len(lines) - 20} more lines]")
669
+
670
+ passage_content = "\n".join(formatted_lines)
671
+ results.append(f"{passage_header}\n{passage_content}")
672
+
673
+ # mark access for files that had matches
674
+ if files_with_matches:
675
+ matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
676
+ if matched_file_names:
677
+ await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
678
+
679
+ # create summary header
680
+ file_count = len(files_with_matches)
681
+ summary = f"Found {total_hits} Turbopuffer matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
682
+
683
+ # combine all results
684
+ formatted_results = [summary, "=" * len(summary)] + results
685
+
686
+ self.logger.info(f"Turbopuffer search completed: {total_hits} matches across {file_count} files")
687
+ return "\n".join(formatted_results)
688
+
689
+ async def _search_files_pinecone(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
564
690
  """Search files using Pinecone vector database."""
565
691
 
566
692
  # Extract unique source_ids
567
693
  # TODO: Inefficient
568
- attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
569
694
  source_ids = [source.id for source in attached_sources]
570
695
  if not source_ids:
571
696
  return "No valid source IDs found for attached files"
@@ -658,7 +783,7 @@ class LettaFileToolExecutor(ToolExecutor):
658
783
  self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
659
784
  return "\n".join(formatted_results)
660
785
 
661
- async def _search_files_traditional(self, agent_state: AgentState, query: str, limit: int) -> str:
786
+ async def _search_files_native(self, agent_state: AgentState, query: str, limit: int) -> str:
662
787
  """Traditional search using existing passage manager."""
663
788
  # Get semantic search results
664
789
  passages = await self.agent_manager.query_source_passages_async(
@@ -13,6 +13,7 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
13
13
  from letta.schemas.user import User
14
14
  from letta.services.tool_executor.tool_executor_base import ToolExecutor
15
15
  from letta.settings import settings
16
+ from letta.utils import safe_create_task
16
17
 
17
18
  logger = get_logger(__name__)
18
19
 
@@ -55,7 +56,8 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
55
56
  f"{message}"
56
57
  )
57
58
 
58
- return str(await self._process_agent(agent_id=other_agent_id, message=augmented_message))
59
+ other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
60
+ return str(await self._process_agent(agent_state=other_agent_state, message=augmented_message))
59
61
 
60
62
  async def send_message_to_agents_matching_tags_async(
61
63
  self, agent_state: AgentState, message: str, match_all: List[str], match_some: List[str]
@@ -75,22 +77,20 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
75
77
  )
76
78
 
77
79
  tasks = [
78
- asyncio.create_task(self._process_agent(agent_id=agent_state.id, message=augmented_message)) for agent_state in matching_agents
80
+ safe_create_task(
81
+ self._process_agent(agent_state=agent_state, message=augmented_message), label=f"process_agent_{agent_state.id}"
82
+ )
83
+ for agent_state in matching_agents
79
84
  ]
80
85
  results = await asyncio.gather(*tasks)
81
86
  return str(results)
82
87
 
83
- async def _process_agent(self, agent_id: str, message: str) -> Dict[str, Any]:
84
- from letta.agents.letta_agent import LettaAgent
88
+ async def _process_agent(self, agent_state: AgentState, message: str) -> Dict[str, Any]:
89
+ from letta.agents.letta_agent_v2 import LettaAgentV2
85
90
 
86
91
  try:
87
- letta_agent = LettaAgent(
88
- agent_id=agent_id,
89
- message_manager=self.message_manager,
90
- agent_manager=self.agent_manager,
91
- block_manager=self.block_manager,
92
- job_manager=self.job_manager,
93
- passage_manager=self.passage_manager,
92
+ letta_agent = LettaAgentV2(
93
+ agent_state=agent_state,
94
94
  actor=self.actor,
95
95
  )
96
96
 
@@ -100,13 +100,13 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
100
100
  send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
101
101
 
102
102
  return {
103
- "agent_id": agent_id,
103
+ "agent_id": agent_state.id,
104
104
  "response": send_message_content if send_message_content else ["<no response>"],
105
105
  }
106
106
 
107
107
  except Exception as e:
108
108
  return {
109
- "agent_id": agent_id,
109
+ "agent_id": agent_state.id,
110
110
  "error": str(e),
111
111
  "type": type(e).__name__,
112
112
  }
@@ -123,7 +123,10 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
123
123
  f"{message}"
124
124
  )
125
125
 
126
- task = asyncio.create_task(self._process_agent(agent_id=other_agent_id, message=prefixed))
126
+ other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
127
+ task = safe_create_task(
128
+ self._process_agent(agent_state=other_agent_state, message=prefixed), label=f"send_message_to_{other_agent_id}"
129
+ )
127
130
 
128
131
  task.add_done_callback(lambda t: (logger.error(f"Async send_message task failed: {t.exception()}") if t.exception() else None))
129
132
 
@@ -23,7 +23,7 @@ from letta.services.helpers.tool_execution_helper import (
23
23
  from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
24
24
  from letta.services.tool_sandbox.base import AsyncToolSandboxBase
25
25
  from letta.settings import tool_settings
26
- from letta.utils import get_friendly_error_msg, parse_stderr_error_msg
26
+ from letta.utils import get_friendly_error_msg, parse_stderr_error_msg, safe_create_task
27
27
 
28
28
  logger = get_logger(__name__)
29
29
 
@@ -89,7 +89,7 @@ class AsyncToolSandboxLocal(AsyncToolSandboxBase):
89
89
  venv_preparation_task = None
90
90
  if use_venv:
91
91
  venv_path = str(os.path.join(sandbox_dir, local_configs.venv_name))
92
- venv_preparation_task = asyncio.create_task(self._prepare_venv(local_configs, venv_path, env))
92
+ venv_preparation_task = safe_create_task(self._prepare_venv(local_configs, venv_path, env), label="prepare_venv")
93
93
 
94
94
  # Generate and write execution script (always with markers, since we rely on stdout)
95
95
  code = await self.generate_execution_script(agent_state=agent_state, wrap_print_with_markers=True)
@@ -16,6 +16,7 @@ from letta.log import get_logger
16
16
  from letta.schemas.tool import ToolUpdate
17
17
  from letta.services.tool_manager import ToolManager
18
18
  from letta.services.tool_sandbox.modal_constants import CACHE_TTL_SECONDS, DEFAULT_CONFIG_KEY, MODAL_DEPLOYMENTS_KEY
19
+ from letta.utils import safe_create_task
19
20
 
20
21
  logger = get_logger(__name__)
21
22
 
@@ -197,7 +198,7 @@ class ModalVersionManager:
197
198
  if deployment_key in self._deployments_in_progress:
198
199
  self._deployments_in_progress[deployment_key].set()
199
200
  # Clean up after a short delay to allow waiters to wake up
200
- asyncio.create_task(self._cleanup_deployment_marker(deployment_key))
201
+ safe_create_task(self._cleanup_deployment_marker(deployment_key), label=f"cleanup_deployment_{deployment_key}")
201
202
 
202
203
  async def _cleanup_deployment_marker(self, deployment_key: str):
203
204
  """Clean up deployment marker after a delay."""
letta/settings.py CHANGED
@@ -211,6 +211,9 @@ class Settings(BaseSettings):
211
211
  enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts")
212
212
  keepalive_interval: float = Field(50.0, description="Seconds between keepalive messages (default: 50)")
213
213
 
214
+ # SSE Streaming cancellation settings
215
+ enable_cancellation_aware_streaming: bool = Field(True, description="Enable cancellation aware streaming")
216
+
214
217
  # default handles
215
218
  default_llm_handle: Optional[str] = None
216
219
  default_embedding_handle: Optional[str] = None
@@ -303,6 +306,9 @@ class Settings(BaseSettings):
303
306
  tpuf_region: str = "gcp-us-central1"
304
307
  embed_all_messages: bool = False
305
308
 
309
+ # For encryption
310
+ encryption_key: Optional[str] = None
311
+
306
312
  # File processing timeout settings
307
313
  file_processing_timeout_minutes: int = 30
308
314
  file_processing_timeout_error_message: str = "File processing timed out after {} minutes. Please try again."
letta/streaming_utils.py CHANGED
@@ -99,6 +99,15 @@ class JSONInnerThoughtsExtractor:
99
99
  else:
100
100
  updates_main_json += c
101
101
  self.main_buffer += c
102
+ # NOTE (fix): Streaming JSON can arrive token-by-token from the LLM.
103
+ # In the old implementation we pre-inserted an opening quote after every
104
+ # key's colon (i.e. we emitted '"key":"' immediately). That implicitly
105
+ # assumed all values are strings. When a non-string value (e.g. true/false,
106
+ # numbers, null, or a nested object/array) streamed in next, the stream
107
+ # ended up with an unmatched '"' and appeared as a "missing end-quote" to
108
+ # clients. We now only emit an opening quote when we actually enter a
109
+ # string value (see below). This keeps values like booleans unquoted and
110
+ # avoids generating dangling quotes mid-stream.
102
111
  elif c == '"':
103
112
  if not self.escaped:
104
113
  self.in_string = not self.in_string
@@ -112,6 +121,14 @@ class JSONInnerThoughtsExtractor:
112
121
  self.main_buffer += self.main_json_held_buffer
113
122
  self.main_json_held_buffer = ""
114
123
  self.hold_main_json = False
124
+ elif self.state == "value":
125
+ # Opening quote for a string value (non-inner-thoughts only)
126
+ if not self.is_inner_thoughts_value:
127
+ if self.hold_main_json:
128
+ self.main_json_held_buffer += '"'
129
+ else:
130
+ updates_main_json += '"'
131
+ self.main_buffer += '"'
115
132
  else:
116
133
  if self.state == "key":
117
134
  self.state = "colon"
@@ -156,18 +173,26 @@ class JSONInnerThoughtsExtractor:
156
173
  updates_main_json += c
157
174
  self.main_buffer += c
158
175
  else:
176
+ # NOTE (fix): Do NOT pre-insert an opening quote after ':' any more.
177
+ # The value may not be a string; we only emit quotes when we actually
178
+ # see a string begin (handled in the '"' branch above). This prevents
179
+ # forced-quoting of non-string values and eliminates the common
180
+ # streaming artifact of "... 'request_heartbeat':'true}" missing the
181
+ # final quote.
159
182
  if c == ":" and self.state == "colon":
183
+ # Transition to reading a value; don't pre-insert quotes
160
184
  self.state = "value"
161
185
  self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
162
186
  if self.is_inner_thoughts_value:
163
- pass # Do not include 'inner_thoughts' key in main_json
187
+ # Do not include 'inner_thoughts' key in main_json
188
+ pass
164
189
  else:
165
190
  key_colon = f'"{self.current_key}":'
166
191
  if self.hold_main_json:
167
- self.main_json_held_buffer += key_colon + '"'
192
+ self.main_json_held_buffer += key_colon
168
193
  else:
169
- updates_main_json += key_colon + '"'
170
- self.main_buffer += key_colon + '"'
194
+ updates_main_json += key_colon
195
+ self.main_buffer += key_colon
171
196
  elif c == "," and self.state == "comma_or_end":
172
197
  if self.is_inner_thoughts_value:
173
198
  # Inner thoughts value ended