letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +81 -0
- letta/adapters/letta_llm_request_adapter.py +113 -0
- letta/adapters/letta_llm_stream_adapter.py +171 -0
- letta/agents/agent_loop.py +23 -0
- letta/agents/base_agent.py +4 -1
- letta/agents/base_agent_v2.py +68 -0
- letta/agents/helpers.py +3 -5
- letta/agents/letta_agent.py +23 -12
- letta/agents/letta_agent_v2.py +1221 -0
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +1 -1
- letta/errors.py +12 -0
- letta/functions/function_sets/base.py +53 -12
- letta/functions/helpers.py +3 -2
- letta/functions/schema_generator.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +4 -2
- letta/groups/sleeptime_multi_agent_v3.py +233 -0
- letta/helpers/tool_rule_solver.py +4 -0
- letta/helpers/tpuf_client.py +607 -34
- letta/interfaces/anthropic_streaming_interface.py +74 -30
- letta/interfaces/openai_streaming_interface.py +80 -37
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/openai_client.py +45 -4
- letta/orm/agent.py +4 -1
- letta/orm/block.py +2 -0
- letta/orm/blocks_agents.py +1 -0
- letta/orm/group.py +1 -0
- letta/orm/source.py +8 -1
- letta/orm/sources_agents.py +2 -1
- letta/orm/step_metrics.py +10 -0
- letta/orm/tools_agents.py +5 -2
- letta/schemas/block.py +4 -0
- letta/schemas/enums.py +1 -0
- letta/schemas/group.py +8 -0
- letta/schemas/letta_message.py +1 -1
- letta/schemas/letta_request.py +2 -2
- letta/schemas/mcp.py +9 -1
- letta/schemas/message.py +42 -2
- letta/schemas/providers/ollama.py +1 -1
- letta/schemas/providers.py +1 -2
- letta/schemas/source.py +6 -0
- letta/schemas/step_metrics.py +2 -0
- letta/server/rest_api/interface.py +34 -2
- letta/server/rest_api/json_parser.py +2 -0
- letta/server/rest_api/redis_stream_manager.py +2 -1
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +132 -170
- letta/server/rest_api/routers/v1/blocks.py +6 -0
- letta/server/rest_api/routers/v1/folders.py +25 -7
- letta/server/rest_api/routers/v1/groups.py +6 -0
- letta/server/rest_api/routers/v1/internal_templates.py +218 -12
- letta/server/rest_api/routers/v1/messages.py +14 -19
- letta/server/rest_api/routers/v1/runs.py +43 -28
- letta/server/rest_api/routers/v1/sources.py +25 -7
- letta/server/rest_api/routers/v1/tools.py +42 -0
- letta/server/rest_api/streaming_response.py +11 -2
- letta/server/server.py +9 -6
- letta/services/agent_manager.py +39 -59
- letta/services/agent_serialization_manager.py +26 -11
- letta/services/archive_manager.py +60 -9
- letta/services/block_manager.py +5 -0
- letta/services/file_processor/embedder/base_embedder.py +5 -0
- letta/services/file_processor/embedder/openai_embedder.py +4 -0
- letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
- letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
- letta/services/file_processor/file_processor.py +9 -7
- letta/services/group_manager.py +74 -11
- letta/services/mcp_manager.py +134 -28
- letta/services/message_manager.py +229 -125
- letta/services/passage_manager.py +2 -1
- letta/services/source_manager.py +23 -1
- letta/services/summarizer/summarizer.py +4 -1
- letta/services/tool_executor/core_tool_executor.py +2 -120
- letta/services/tool_executor/files_tool_executor.py +133 -8
- letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
- letta/services/tool_sandbox/local_sandbox.py +2 -2
- letta/services/tool_sandbox/modal_version_manager.py +2 -1
- letta/settings.py +6 -0
- letta/streaming_utils.py +29 -4
- letta/utils.py +106 -4
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -71,15 +71,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
71
71
|
)
|
72
72
|
|
73
73
|
async def send_message(self, agent_state: AgentState, actor: User, message: str) -> Optional[str]:
|
74
|
-
"""
|
75
|
-
Sends a message to the human user.
|
76
|
-
|
77
|
-
Args:
|
78
|
-
message (str): Message contents. All unicode (including emojis) are supported.
|
79
|
-
|
80
|
-
Returns:
|
81
|
-
Optional[str]: None is always returned as this function does not produce a response.
|
82
|
-
"""
|
83
74
|
return "Sent message successfully."
|
84
75
|
|
85
76
|
async def conversation_search(
|
@@ -92,19 +83,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
92
83
|
start_date: Optional[str] = None,
|
93
84
|
end_date: Optional[str] = None,
|
94
85
|
) -> Optional[str]:
|
95
|
-
"""
|
96
|
-
Search prior conversation history using hybrid search (text + semantic similarity).
|
97
|
-
|
98
|
-
Args:
|
99
|
-
query (str): String to search for using both text matching and semantic similarity.
|
100
|
-
roles (Optional[List[Literal["assistant", "user", "tool"]]]): Optional list of message roles to filter by.
|
101
|
-
limit (Optional[int]): Maximum number of results to return. Uses system default if not specified.
|
102
|
-
start_date (Optional[str]): Filter results to messages created after this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-15", "2024-01-15T14:30".
|
103
|
-
end_date (Optional[str]): Filter results to messages created before this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-20", "2024-01-20T17:00".
|
104
|
-
|
105
|
-
Returns:
|
106
|
-
str: Query result string containing matching messages with timestamps and content.
|
107
|
-
"""
|
108
86
|
try:
|
109
87
|
# Parse datetime parameters if provided
|
110
88
|
start_datetime = None
|
@@ -163,7 +141,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
163
141
|
limit=search_limit,
|
164
142
|
start_date=start_datetime,
|
165
143
|
end_date=end_datetime,
|
166
|
-
embedding_config=agent_state.embedding_config,
|
167
144
|
)
|
168
145
|
|
169
146
|
if len(message_results) == 0:
|
@@ -286,23 +263,9 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
286
263
|
start_datetime: Optional[str] = None,
|
287
264
|
end_datetime: Optional[str] = None,
|
288
265
|
) -> Optional[str]:
|
289
|
-
"""
|
290
|
-
Search archival memory using semantic (embedding-based) search with optional temporal filtering.
|
291
|
-
|
292
|
-
Args:
|
293
|
-
query (str): String to search for using semantic similarity.
|
294
|
-
tags (Optional[list[str]]): Optional list of tags to filter search results. Only passages with these tags will be returned.
|
295
|
-
tag_match_mode (Literal["any", "all"]): How to match tags - "any" to match passages with any of the tags, "all" to match only passages with all tags. Defaults to "any".
|
296
|
-
top_k (Optional[int]): Maximum number of results to return. Uses system default if not specified.
|
297
|
-
start_datetime (Optional[str]): Filter results to passages created after this datetime. ISO 8601 format.
|
298
|
-
end_datetime (Optional[str]): Filter results to passages created before this datetime. ISO 8601 format.
|
299
|
-
|
300
|
-
Returns:
|
301
|
-
str: Query result string containing matching passages with timestamps, content, and tags.
|
302
|
-
"""
|
303
266
|
try:
|
304
267
|
# Use the shared service method to get results
|
305
|
-
formatted_results
|
268
|
+
formatted_results = await self.agent_manager.search_agent_archival_memory_async(
|
306
269
|
agent_id=agent_state.id,
|
307
270
|
actor=actor,
|
308
271
|
query=query,
|
@@ -313,7 +276,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
313
276
|
end_datetime=end_datetime,
|
314
277
|
)
|
315
278
|
|
316
|
-
return formatted_results
|
279
|
+
return formatted_results
|
317
280
|
|
318
281
|
except Exception as e:
|
319
282
|
raise e
|
@@ -321,16 +284,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
321
284
|
async def archival_memory_insert(
|
322
285
|
self, agent_state: AgentState, actor: User, content: str, tags: Optional[list[str]] = None
|
323
286
|
) -> Optional[str]:
|
324
|
-
"""
|
325
|
-
Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.
|
326
|
-
|
327
|
-
Args:
|
328
|
-
content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
329
|
-
tags (Optional[list[str]]): Optional list of tags to associate with this memory for better organization and filtering.
|
330
|
-
|
331
|
-
Returns:
|
332
|
-
Optional[str]: None is always returned as this function does not produce a response.
|
333
|
-
"""
|
334
287
|
await self.passage_manager.insert_passage(
|
335
288
|
agent_state=agent_state,
|
336
289
|
text=content,
|
@@ -341,16 +294,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
341
294
|
return None
|
342
295
|
|
343
296
|
async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> Optional[str]:
|
344
|
-
"""
|
345
|
-
Append to the contents of core memory.
|
346
|
-
|
347
|
-
Args:
|
348
|
-
label (str): Section of the memory to be edited.
|
349
|
-
content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
350
|
-
|
351
|
-
Returns:
|
352
|
-
Optional[str]: None is always returned as this function does not produce a response.
|
353
|
-
"""
|
354
297
|
if agent_state.memory.get_block(label).read_only:
|
355
298
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
356
299
|
current_value = str(agent_state.memory.get_block(label).value)
|
@@ -367,17 +310,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
367
310
|
old_content: str,
|
368
311
|
new_content: str,
|
369
312
|
) -> Optional[str]:
|
370
|
-
"""
|
371
|
-
Replace the contents of core memory. To delete memories, use an empty string for new_content.
|
372
|
-
|
373
|
-
Args:
|
374
|
-
label (str): Section of the memory to be edited.
|
375
|
-
old_content (str): String to replace. Must be an exact match.
|
376
|
-
new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
377
|
-
|
378
|
-
Returns:
|
379
|
-
Optional[str]: None is always returned as this function does not produce a response.
|
380
|
-
"""
|
381
313
|
if agent_state.memory.get_block(label).read_only:
|
382
314
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
383
315
|
current_value = str(agent_state.memory.get_block(label).value)
|
@@ -389,20 +321,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
389
321
|
return None
|
390
322
|
|
391
323
|
async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
|
392
|
-
"""
|
393
|
-
The memory_replace command allows you to replace a specific string in a memory
|
394
|
-
block with a new string. This is used for making precise edits.
|
395
|
-
|
396
|
-
Args:
|
397
|
-
label (str): Section of the memory to be edited, identified by its label.
|
398
|
-
old_str (str): The text to replace (must match exactly, including whitespace
|
399
|
-
and indentation). Do not include line number prefixes.
|
400
|
-
new_str (str): The new text to insert in place of the old text. Do not include line number prefixes.
|
401
|
-
|
402
|
-
Returns:
|
403
|
-
str: The success message
|
404
|
-
"""
|
405
|
-
|
406
324
|
if agent_state.memory.get_block(label).read_only:
|
407
325
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
408
326
|
|
@@ -479,20 +397,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
479
397
|
new_str: str,
|
480
398
|
insert_line: int = -1,
|
481
399
|
) -> str:
|
482
|
-
"""
|
483
|
-
The memory_insert command allows you to insert text at a specific location
|
484
|
-
in a memory block.
|
485
|
-
|
486
|
-
Args:
|
487
|
-
label (str): Section of the memory to be edited, identified by its label.
|
488
|
-
new_str (str): The text to insert. Do not include line number prefixes.
|
489
|
-
insert_line (int): The line number after which to insert the text (0 for
|
490
|
-
beginning of file). Defaults to -1 (end of the file).
|
491
|
-
|
492
|
-
Returns:
|
493
|
-
str: The success message
|
494
|
-
"""
|
495
|
-
|
496
400
|
if agent_state.memory.get_block(label).read_only:
|
497
401
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
498
402
|
|
@@ -559,20 +463,6 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
559
463
|
return success_msg
|
560
464
|
|
561
465
|
async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
|
562
|
-
"""
|
563
|
-
The memory_rethink command allows you to completely rewrite the contents of a
|
564
|
-
memory block. Use this tool to make large sweeping changes (e.g. when you want
|
565
|
-
to condense or reorganize the memory blocks), do NOT use this tool to make small
|
566
|
-
precise edits (e.g. add or remove a line, replace a specific string, etc).
|
567
|
-
|
568
|
-
Args:
|
569
|
-
label (str): The memory block to be rewritten, identified by its label.
|
570
|
-
new_memory (str): The new memory contents with information integrated from
|
571
|
-
existing memory blocks and the conversation context. Do not include line number prefixes.
|
572
|
-
|
573
|
-
Returns:
|
574
|
-
str: The success message
|
575
|
-
"""
|
576
466
|
if agent_state.memory.get_block(label).read_only:
|
577
467
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
578
468
|
|
@@ -611,12 +501,4 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|
611
501
|
return success_msg
|
612
502
|
|
613
503
|
async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
|
614
|
-
"""
|
615
|
-
Call the memory_finish_edits command when you are finished making edits
|
616
|
-
(integrating all new information) into the memory blocks. This function
|
617
|
-
is called when the agent is done rethinking the memory.
|
618
|
-
|
619
|
-
Returns:
|
620
|
-
Optional[str]: None is always returned as this function does not produce a response.
|
621
|
-
"""
|
622
504
|
return None
|
@@ -5,10 +5,13 @@ from typing import Any, Dict, List, Optional
|
|
5
5
|
from letta.constants import PINECONE_TEXT_FIELD_NAME
|
6
6
|
from letta.functions.types import FileOpenRequest
|
7
7
|
from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
|
8
|
+
from letta.helpers.tpuf_client import should_use_tpuf
|
8
9
|
from letta.log import get_logger
|
9
10
|
from letta.otel.tracing import trace_method
|
10
11
|
from letta.schemas.agent import AgentState
|
12
|
+
from letta.schemas.enums import VectorDBProvider
|
11
13
|
from letta.schemas.sandbox_config import SandboxConfig
|
14
|
+
from letta.schemas.source import Source
|
12
15
|
from letta.schemas.tool import Tool
|
13
16
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
14
17
|
from letta.schemas.user import User
|
@@ -554,18 +557,140 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
554
557
|
|
555
558
|
self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
|
556
559
|
|
557
|
-
# Check
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
560
|
+
# Check which vector DB to use - Turbopuffer takes precedence
|
561
|
+
attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
|
562
|
+
attached_tpuf_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.TPUF]
|
563
|
+
attached_pinecone_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.PINECONE]
|
564
|
+
|
565
|
+
if not attached_tpuf_sources and not attached_pinecone_sources:
|
566
|
+
return await self._search_files_native(agent_state, query, limit)
|
567
|
+
|
568
|
+
results = []
|
569
|
+
|
570
|
+
# If both have items, we half the limit roughly
|
571
|
+
# TODO: This is very hacky bc it skips the re-ranking - but this is a temporary stopgap while we think about migrating data
|
572
|
+
|
573
|
+
if attached_tpuf_sources and attached_pinecone_sources:
|
574
|
+
limit = max(limit // 2, 1)
|
575
|
+
|
576
|
+
if should_use_tpuf() and attached_tpuf_sources:
|
577
|
+
tpuf_result = await self._search_files_turbopuffer(agent_state, attached_tpuf_sources, query, limit)
|
578
|
+
results.append(tpuf_result)
|
579
|
+
|
580
|
+
if should_use_pinecone() and attached_pinecone_sources:
|
581
|
+
pinecone_result = await self._search_files_pinecone(agent_state, attached_pinecone_sources, query, limit)
|
582
|
+
results.append(pinecone_result)
|
583
|
+
|
584
|
+
# combine results from both sources
|
585
|
+
if results:
|
586
|
+
return "\n\n".join(results)
|
587
|
+
|
588
|
+
# fallback if no results from either source
|
589
|
+
return "No results found"
|
590
|
+
|
591
|
+
async def _search_files_turbopuffer(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
|
592
|
+
"""Search files using Turbopuffer vector database."""
|
593
|
+
|
594
|
+
# Get attached sources
|
595
|
+
source_ids = [source.id for source in attached_sources]
|
596
|
+
if not source_ids:
|
597
|
+
return "No valid source IDs found for attached files"
|
598
|
+
|
599
|
+
# Get all attached files for this agent
|
600
|
+
file_agents = await self.files_agents_manager.list_files_for_agent(
|
601
|
+
agent_id=agent_state.id, per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit, actor=self.actor
|
602
|
+
)
|
603
|
+
if not file_agents:
|
604
|
+
return "No files are currently attached to search"
|
605
|
+
|
606
|
+
# Create a map of file_id to file_name for quick lookup
|
607
|
+
file_map = {fa.file_id: fa.file_name for fa in file_agents}
|
608
|
+
|
609
|
+
results = []
|
610
|
+
total_hits = 0
|
611
|
+
files_with_matches = {}
|
612
|
+
|
613
|
+
try:
|
614
|
+
from letta.helpers.tpuf_client import TurbopufferClient
|
615
|
+
|
616
|
+
tpuf_client = TurbopufferClient()
|
617
|
+
|
618
|
+
# Query Turbopuffer for all sources at once
|
619
|
+
search_results = await tpuf_client.query_file_passages(
|
620
|
+
source_ids=source_ids, # pass all source_ids as a list
|
621
|
+
organization_id=self.actor.organization_id,
|
622
|
+
actor=self.actor,
|
623
|
+
query_text=query,
|
624
|
+
search_mode="hybrid", # use hybrid search for best results
|
625
|
+
top_k=limit,
|
626
|
+
)
|
627
|
+
|
628
|
+
# Process search results
|
629
|
+
for passage, score, metadata in search_results:
|
630
|
+
if total_hits >= limit:
|
631
|
+
break
|
632
|
+
|
633
|
+
total_hits += 1
|
634
|
+
|
635
|
+
# get file name from our map
|
636
|
+
file_name = file_map.get(passage.file_id, "Unknown File")
|
562
637
|
|
563
|
-
|
638
|
+
# group by file name
|
639
|
+
if file_name not in files_with_matches:
|
640
|
+
files_with_matches[file_name] = []
|
641
|
+
files_with_matches[file_name].append({"text": passage.text, "score": score, "passage_id": passage.id})
|
642
|
+
|
643
|
+
except Exception as e:
|
644
|
+
self.logger.error(f"Turbopuffer search failed: {str(e)}")
|
645
|
+
raise e
|
646
|
+
|
647
|
+
if not files_with_matches:
|
648
|
+
return f"No semantic matches found in Turbopuffer for query: '{query}'"
|
649
|
+
|
650
|
+
# Format results
|
651
|
+
passage_num = 0
|
652
|
+
for file_name, matches in files_with_matches.items():
|
653
|
+
for match in matches:
|
654
|
+
passage_num += 1
|
655
|
+
|
656
|
+
# format each passage with terminal-style header
|
657
|
+
score_display = f"(score: {match['score']:.3f})"
|
658
|
+
passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
|
659
|
+
|
660
|
+
# format the passage text
|
661
|
+
passage_text = match["text"].strip()
|
662
|
+
lines = passage_text.splitlines()
|
663
|
+
formatted_lines = []
|
664
|
+
for line in lines[:20]: # limit to first 20 lines per passage
|
665
|
+
formatted_lines.append(f" {line}")
|
666
|
+
|
667
|
+
if len(lines) > 20:
|
668
|
+
formatted_lines.append(f" ... [truncated {len(lines) - 20} more lines]")
|
669
|
+
|
670
|
+
passage_content = "\n".join(formatted_lines)
|
671
|
+
results.append(f"{passage_header}\n{passage_content}")
|
672
|
+
|
673
|
+
# mark access for files that had matches
|
674
|
+
if files_with_matches:
|
675
|
+
matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
|
676
|
+
if matched_file_names:
|
677
|
+
await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
|
678
|
+
|
679
|
+
# create summary header
|
680
|
+
file_count = len(files_with_matches)
|
681
|
+
summary = f"Found {total_hits} Turbopuffer matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
|
682
|
+
|
683
|
+
# combine all results
|
684
|
+
formatted_results = [summary, "=" * len(summary)] + results
|
685
|
+
|
686
|
+
self.logger.info(f"Turbopuffer search completed: {total_hits} matches across {file_count} files")
|
687
|
+
return "\n".join(formatted_results)
|
688
|
+
|
689
|
+
async def _search_files_pinecone(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
|
564
690
|
"""Search files using Pinecone vector database."""
|
565
691
|
|
566
692
|
# Extract unique source_ids
|
567
693
|
# TODO: Inefficient
|
568
|
-
attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
|
569
694
|
source_ids = [source.id for source in attached_sources]
|
570
695
|
if not source_ids:
|
571
696
|
return "No valid source IDs found for attached files"
|
@@ -658,7 +783,7 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
658
783
|
self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
|
659
784
|
return "\n".join(formatted_results)
|
660
785
|
|
661
|
-
async def
|
786
|
+
async def _search_files_native(self, agent_state: AgentState, query: str, limit: int) -> str:
|
662
787
|
"""Traditional search using existing passage manager."""
|
663
788
|
# Get semantic search results
|
664
789
|
passages = await self.agent_manager.query_source_passages_async(
|
@@ -13,6 +13,7 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
|
|
13
13
|
from letta.schemas.user import User
|
14
14
|
from letta.services.tool_executor.tool_executor_base import ToolExecutor
|
15
15
|
from letta.settings import settings
|
16
|
+
from letta.utils import safe_create_task
|
16
17
|
|
17
18
|
logger = get_logger(__name__)
|
18
19
|
|
@@ -55,7 +56,8 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
|
|
55
56
|
f"{message}"
|
56
57
|
)
|
57
58
|
|
58
|
-
|
59
|
+
other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
|
60
|
+
return str(await self._process_agent(agent_state=other_agent_state, message=augmented_message))
|
59
61
|
|
60
62
|
async def send_message_to_agents_matching_tags_async(
|
61
63
|
self, agent_state: AgentState, message: str, match_all: List[str], match_some: List[str]
|
@@ -75,22 +77,20 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
|
|
75
77
|
)
|
76
78
|
|
77
79
|
tasks = [
|
78
|
-
|
80
|
+
safe_create_task(
|
81
|
+
self._process_agent(agent_state=agent_state, message=augmented_message), label=f"process_agent_{agent_state.id}"
|
82
|
+
)
|
83
|
+
for agent_state in matching_agents
|
79
84
|
]
|
80
85
|
results = await asyncio.gather(*tasks)
|
81
86
|
return str(results)
|
82
87
|
|
83
|
-
async def _process_agent(self,
|
84
|
-
from letta.agents.
|
88
|
+
async def _process_agent(self, agent_state: AgentState, message: str) -> Dict[str, Any]:
|
89
|
+
from letta.agents.letta_agent_v2 import LettaAgentV2
|
85
90
|
|
86
91
|
try:
|
87
|
-
letta_agent =
|
88
|
-
|
89
|
-
message_manager=self.message_manager,
|
90
|
-
agent_manager=self.agent_manager,
|
91
|
-
block_manager=self.block_manager,
|
92
|
-
job_manager=self.job_manager,
|
93
|
-
passage_manager=self.passage_manager,
|
92
|
+
letta_agent = LettaAgentV2(
|
93
|
+
agent_state=agent_state,
|
94
94
|
actor=self.actor,
|
95
95
|
)
|
96
96
|
|
@@ -100,13 +100,13 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
|
|
100
100
|
send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
|
101
101
|
|
102
102
|
return {
|
103
|
-
"agent_id":
|
103
|
+
"agent_id": agent_state.id,
|
104
104
|
"response": send_message_content if send_message_content else ["<no response>"],
|
105
105
|
}
|
106
106
|
|
107
107
|
except Exception as e:
|
108
108
|
return {
|
109
|
-
"agent_id":
|
109
|
+
"agent_id": agent_state.id,
|
110
110
|
"error": str(e),
|
111
111
|
"type": type(e).__name__,
|
112
112
|
}
|
@@ -123,7 +123,10 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
|
|
123
123
|
f"{message}"
|
124
124
|
)
|
125
125
|
|
126
|
-
|
126
|
+
other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
|
127
|
+
task = safe_create_task(
|
128
|
+
self._process_agent(agent_state=other_agent_state, message=prefixed), label=f"send_message_to_{other_agent_id}"
|
129
|
+
)
|
127
130
|
|
128
131
|
task.add_done_callback(lambda t: (logger.error(f"Async send_message task failed: {t.exception()}") if t.exception() else None))
|
129
132
|
|
@@ -23,7 +23,7 @@ from letta.services.helpers.tool_execution_helper import (
|
|
23
23
|
from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
|
24
24
|
from letta.services.tool_sandbox.base import AsyncToolSandboxBase
|
25
25
|
from letta.settings import tool_settings
|
26
|
-
from letta.utils import get_friendly_error_msg, parse_stderr_error_msg
|
26
|
+
from letta.utils import get_friendly_error_msg, parse_stderr_error_msg, safe_create_task
|
27
27
|
|
28
28
|
logger = get_logger(__name__)
|
29
29
|
|
@@ -89,7 +89,7 @@ class AsyncToolSandboxLocal(AsyncToolSandboxBase):
|
|
89
89
|
venv_preparation_task = None
|
90
90
|
if use_venv:
|
91
91
|
venv_path = str(os.path.join(sandbox_dir, local_configs.venv_name))
|
92
|
-
venv_preparation_task =
|
92
|
+
venv_preparation_task = safe_create_task(self._prepare_venv(local_configs, venv_path, env), label="prepare_venv")
|
93
93
|
|
94
94
|
# Generate and write execution script (always with markers, since we rely on stdout)
|
95
95
|
code = await self.generate_execution_script(agent_state=agent_state, wrap_print_with_markers=True)
|
@@ -16,6 +16,7 @@ from letta.log import get_logger
|
|
16
16
|
from letta.schemas.tool import ToolUpdate
|
17
17
|
from letta.services.tool_manager import ToolManager
|
18
18
|
from letta.services.tool_sandbox.modal_constants import CACHE_TTL_SECONDS, DEFAULT_CONFIG_KEY, MODAL_DEPLOYMENTS_KEY
|
19
|
+
from letta.utils import safe_create_task
|
19
20
|
|
20
21
|
logger = get_logger(__name__)
|
21
22
|
|
@@ -197,7 +198,7 @@ class ModalVersionManager:
|
|
197
198
|
if deployment_key in self._deployments_in_progress:
|
198
199
|
self._deployments_in_progress[deployment_key].set()
|
199
200
|
# Clean up after a short delay to allow waiters to wake up
|
200
|
-
|
201
|
+
safe_create_task(self._cleanup_deployment_marker(deployment_key), label=f"cleanup_deployment_{deployment_key}")
|
201
202
|
|
202
203
|
async def _cleanup_deployment_marker(self, deployment_key: str):
|
203
204
|
"""Clean up deployment marker after a delay."""
|
letta/settings.py
CHANGED
@@ -211,6 +211,9 @@ class Settings(BaseSettings):
|
|
211
211
|
enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts")
|
212
212
|
keepalive_interval: float = Field(50.0, description="Seconds between keepalive messages (default: 50)")
|
213
213
|
|
214
|
+
# SSE Streaming cancellation settings
|
215
|
+
enable_cancellation_aware_streaming: bool = Field(True, description="Enable cancellation aware streaming")
|
216
|
+
|
214
217
|
# default handles
|
215
218
|
default_llm_handle: Optional[str] = None
|
216
219
|
default_embedding_handle: Optional[str] = None
|
@@ -303,6 +306,9 @@ class Settings(BaseSettings):
|
|
303
306
|
tpuf_region: str = "gcp-us-central1"
|
304
307
|
embed_all_messages: bool = False
|
305
308
|
|
309
|
+
# For encryption
|
310
|
+
encryption_key: Optional[str] = None
|
311
|
+
|
306
312
|
# File processing timeout settings
|
307
313
|
file_processing_timeout_minutes: int = 30
|
308
314
|
file_processing_timeout_error_message: str = "File processing timed out after {} minutes. Please try again."
|
letta/streaming_utils.py
CHANGED
@@ -99,6 +99,15 @@ class JSONInnerThoughtsExtractor:
|
|
99
99
|
else:
|
100
100
|
updates_main_json += c
|
101
101
|
self.main_buffer += c
|
102
|
+
# NOTE (fix): Streaming JSON can arrive token-by-token from the LLM.
|
103
|
+
# In the old implementation we pre-inserted an opening quote after every
|
104
|
+
# key's colon (i.e. we emitted '"key":"' immediately). That implicitly
|
105
|
+
# assumed all values are strings. When a non-string value (e.g. true/false,
|
106
|
+
# numbers, null, or a nested object/array) streamed in next, the stream
|
107
|
+
# ended up with an unmatched '"' and appeared as a "missing end-quote" to
|
108
|
+
# clients. We now only emit an opening quote when we actually enter a
|
109
|
+
# string value (see below). This keeps values like booleans unquoted and
|
110
|
+
# avoids generating dangling quotes mid-stream.
|
102
111
|
elif c == '"':
|
103
112
|
if not self.escaped:
|
104
113
|
self.in_string = not self.in_string
|
@@ -112,6 +121,14 @@ class JSONInnerThoughtsExtractor:
|
|
112
121
|
self.main_buffer += self.main_json_held_buffer
|
113
122
|
self.main_json_held_buffer = ""
|
114
123
|
self.hold_main_json = False
|
124
|
+
elif self.state == "value":
|
125
|
+
# Opening quote for a string value (non-inner-thoughts only)
|
126
|
+
if not self.is_inner_thoughts_value:
|
127
|
+
if self.hold_main_json:
|
128
|
+
self.main_json_held_buffer += '"'
|
129
|
+
else:
|
130
|
+
updates_main_json += '"'
|
131
|
+
self.main_buffer += '"'
|
115
132
|
else:
|
116
133
|
if self.state == "key":
|
117
134
|
self.state = "colon"
|
@@ -156,18 +173,26 @@ class JSONInnerThoughtsExtractor:
|
|
156
173
|
updates_main_json += c
|
157
174
|
self.main_buffer += c
|
158
175
|
else:
|
176
|
+
# NOTE (fix): Do NOT pre-insert an opening quote after ':' any more.
|
177
|
+
# The value may not be a string; we only emit quotes when we actually
|
178
|
+
# see a string begin (handled in the '"' branch above). This prevents
|
179
|
+
# forced-quoting of non-string values and eliminates the common
|
180
|
+
# streaming artifact of "... 'request_heartbeat':'true}" missing the
|
181
|
+
# final quote.
|
159
182
|
if c == ":" and self.state == "colon":
|
183
|
+
# Transition to reading a value; don't pre-insert quotes
|
160
184
|
self.state = "value"
|
161
185
|
self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
|
162
186
|
if self.is_inner_thoughts_value:
|
163
|
-
|
187
|
+
# Do not include 'inner_thoughts' key in main_json
|
188
|
+
pass
|
164
189
|
else:
|
165
190
|
key_colon = f'"{self.current_key}":'
|
166
191
|
if self.hold_main_json:
|
167
|
-
self.main_json_held_buffer += key_colon
|
192
|
+
self.main_json_held_buffer += key_colon
|
168
193
|
else:
|
169
|
-
updates_main_json += key_colon
|
170
|
-
self.main_buffer += key_colon
|
194
|
+
updates_main_json += key_colon
|
195
|
+
self.main_buffer += key_colon
|
171
196
|
elif c == "," and self.state == "comma_or_end":
|
172
197
|
if self.is_inner_thoughts_value:
|
173
198
|
# Inner thoughts value ended
|