PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250911104039py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

letta/adapters/letta_llm_adapter.py +81 -0
letta/adapters/letta_llm_request_adapter.py +113 -0
letta/adapters/letta_llm_stream_adapter.py +171 -0
letta/agents/agent_loop.py +23 -0
letta/agents/base_agent.py +4 -1
letta/agents/base_agent_v2.py +68 -0
letta/agents/helpers.py +3 -5
letta/agents/letta_agent.py +23 -12
letta/agents/letta_agent_v2.py +1221 -0
letta/agents/voice_agent.py +2 -1
letta/constants.py +1 -1
letta/errors.py +12 -0
letta/functions/function_sets/base.py +53 -12
letta/functions/helpers.py +3 -2
letta/functions/schema_generator.py +1 -1
letta/groups/sleeptime_multi_agent_v2.py +4 -2
letta/groups/sleeptime_multi_agent_v3.py +233 -0
letta/helpers/tool_rule_solver.py +4 -0
letta/helpers/tpuf_client.py +607 -34
letta/interfaces/anthropic_streaming_interface.py +74 -30
letta/interfaces/openai_streaming_interface.py +80 -37
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/openai_client.py +45 -4
letta/orm/agent.py +4 -1
letta/orm/block.py +2 -0
letta/orm/blocks_agents.py +1 -0
letta/orm/group.py +1 -0
letta/orm/source.py +8 -1
letta/orm/sources_agents.py +2 -1
letta/orm/step_metrics.py +10 -0
letta/orm/tools_agents.py +5 -2
letta/schemas/block.py +4 -0
letta/schemas/enums.py +1 -0
letta/schemas/group.py +8 -0
letta/schemas/letta_message.py +1 -1
letta/schemas/letta_request.py +2 -2
letta/schemas/mcp.py +9 -1
letta/schemas/message.py +42 -2
letta/schemas/providers/ollama.py +1 -1
letta/schemas/providers.py +1 -2
letta/schemas/source.py +6 -0
letta/schemas/step_metrics.py +2 -0
letta/server/rest_api/interface.py +34 -2
letta/server/rest_api/json_parser.py +2 -0
letta/server/rest_api/redis_stream_manager.py +2 -1
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +132 -170
letta/server/rest_api/routers/v1/blocks.py +6 -0
letta/server/rest_api/routers/v1/folders.py +25 -7
letta/server/rest_api/routers/v1/groups.py +6 -0
letta/server/rest_api/routers/v1/internal_templates.py +218 -12
letta/server/rest_api/routers/v1/messages.py +14 -19
letta/server/rest_api/routers/v1/runs.py +43 -28
letta/server/rest_api/routers/v1/sources.py +25 -7
letta/server/rest_api/routers/v1/tools.py +42 -0
letta/server/rest_api/streaming_response.py +11 -2
letta/server/server.py +9 -6
letta/services/agent_manager.py +39 -59
letta/services/agent_serialization_manager.py +26 -11
letta/services/archive_manager.py +60 -9
letta/services/block_manager.py +5 -0
letta/services/file_processor/embedder/base_embedder.py +5 -0
letta/services/file_processor/embedder/openai_embedder.py +4 -0
letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
letta/services/file_processor/file_processor.py +9 -7
letta/services/group_manager.py +74 -11
letta/services/mcp_manager.py +134 -28
letta/services/message_manager.py +229 -125
letta/services/passage_manager.py +2 -1
letta/services/source_manager.py +23 -1
letta/services/summarizer/summarizer.py +4 -1
letta/services/tool_executor/core_tool_executor.py +2 -120
letta/services/tool_executor/files_tool_executor.py +133 -8
letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
letta/services/tool_sandbox/local_sandbox.py +2 -2
letta/services/tool_sandbox/modal_version_manager.py +2 -1
letta/settings.py +6 -0
letta/streaming_utils.py +29 -4
letta/utils.py +106 -4
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0

letta/services/tool_executor/core_tool_executor.py CHANGED Viewed

@@ -71,15 +71,6 @@ class LettaCoreToolExecutor(ToolExecutor):
             )
     async def send_message(self, agent_state: AgentState, actor: User, message: str) -> Optional[str]:
-        """
-        Sends a message to the human user.
-        Args:
-            message (str): Message contents. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         return "Sent message successfully."
     async def conversation_search(
@@ -92,19 +83,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         start_date: Optional[str] = None,
         end_date: Optional[str] = None,
     ) -> Optional[str]:
-        """
-        Search prior conversation history using hybrid search (text + semantic similarity).
-        Args:
-            query (str): String to search for using both text matching and semantic similarity.
-            roles (Optional[List[Literal["assistant", "user", "tool"]]]): Optional list of message roles to filter by.
-            limit (Optional[int]): Maximum number of results to return. Uses system default if not specified.
-            start_date (Optional[str]): Filter results to messages created after this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-15", "2024-01-15T14:30".
-            end_date (Optional[str]): Filter results to messages created before this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-20", "2024-01-20T17:00".
-        Returns:
-            str: Query result string containing matching messages with timestamps and content.
-        """
         try:
             # Parse datetime parameters if provided
             start_datetime = None
@@ -163,7 +141,6 @@ class LettaCoreToolExecutor(ToolExecutor):
                 limit=search_limit,
                 start_date=start_datetime,
                 end_date=end_datetime,
-                embedding_config=agent_state.embedding_config,
             )
             if len(message_results) == 0:
@@ -286,23 +263,9 @@ class LettaCoreToolExecutor(ToolExecutor):
         start_datetime: Optional[str] = None,
         end_datetime: Optional[str] = None,
     ) -> Optional[str]:
-        """
-        Search archival memory using semantic (embedding-based) search with optional temporal filtering.
-        Args:
-            query (str): String to search for using semantic similarity.
-            tags (Optional[list[str]]): Optional list of tags to filter search results. Only passages with these tags will be returned.
-            tag_match_mode (Literal["any", "all"]): How to match tags - "any" to match passages with any of the tags, "all" to match only passages with all tags. Defaults to "any".
-            top_k (Optional[int]): Maximum number of results to return. Uses system default if not specified.
-            start_datetime (Optional[str]): Filter results to passages created after this datetime. ISO 8601 format.
-            end_datetime (Optional[str]): Filter results to passages created before this datetime. ISO 8601 format.
-        Returns:
-            str: Query result string containing matching passages with timestamps, content, and tags.
-        """
         try:
             # Use the shared service method to get results
-            formatted_results, count = await self.agent_manager.search_agent_archival_memory_async(
+            formatted_results = await self.agent_manager.search_agent_archival_memory_async(
                 agent_id=agent_state.id,
                 actor=actor,
                 query=query,
@@ -313,7 +276,7 @@ class LettaCoreToolExecutor(ToolExecutor):
                 end_datetime=end_datetime,
             )
-            return formatted_results, count
+            return formatted_results
         except Exception as e:
             raise e
@@ -321,16 +284,6 @@ class LettaCoreToolExecutor(ToolExecutor):
     async def archival_memory_insert(
         self, agent_state: AgentState, actor: User, content: str, tags: Optional[list[str]] = None
     ) -> Optional[str]:
-        """
-        Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.
-        Args:
-            content (str): Content to write to the memory. All unicode (including emojis) are supported.
-            tags (Optional[list[str]]): Optional list of tags to associate with this memory for better organization and filtering.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         await self.passage_manager.insert_passage(
             agent_state=agent_state,
             text=content,
@@ -341,16 +294,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return None
     async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> Optional[str]:
-        """
-        Append to the contents of core memory.
-        Args:
-            label (str): Section of the memory to be edited.
-            content (str): Content to write to the memory. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
         current_value = str(agent_state.memory.get_block(label).value)
@@ -367,17 +310,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         old_content: str,
         new_content: str,
     ) -> Optional[str]:
-        """
-        Replace the contents of core memory. To delete memories, use an empty string for new_content.
-        Args:
-            label (str): Section of the memory to be edited.
-            old_content (str): String to replace. Must be an exact match.
-            new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
         current_value = str(agent_state.memory.get_block(label).value)
@@ -389,20 +321,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return None
     async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
-        """
-        The memory_replace command allows you to replace a specific string in a memory
-        block with a new string. This is used for making precise edits.
-        Args:
-            label (str): Section of the memory to be edited, identified by its label.
-            old_str (str): The text to replace (must match exactly, including whitespace
-                and indentation). Do not include line number prefixes.
-            new_str (str): The new text to insert in place of the old text. Do not include line number prefixes.
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -479,20 +397,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         new_str: str,
         insert_line: int = -1,
     ) -> str:
-        """
-        The memory_insert command allows you to insert text at a specific location
-        in a memory block.
-        Args:
-            label (str): Section of the memory to be edited, identified by its label.
-            new_str (str): The text to insert. Do not include line number prefixes.
-            insert_line (int): The line number after which to insert the text (0 for
-                beginning of file). Defaults to -1 (end of the file).
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -559,20 +463,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return success_msg
     async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
-        """
-        The memory_rethink command allows you to completely rewrite the contents of a
-        memory block. Use this tool to make large sweeping changes (e.g. when you want
-        to condense or reorganize the memory blocks), do NOT use this tool to make small
-        precise edits (e.g. add or remove a line, replace a specific string, etc).
-        Args:
-            label (str): The memory block to be rewritten, identified by its label.
-            new_memory (str): The new memory contents with information integrated from
-                existing memory blocks and the conversation context. Do not include line number prefixes.
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -611,12 +501,4 @@ class LettaCoreToolExecutor(ToolExecutor):
         return success_msg
     async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
-        """
-        Call the memory_finish_edits command when you are finished making edits
-        (integrating all new information) into the memory blocks. This function
-        is called when the agent is done rethinking the memory.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         return None

letta/services/tool_executor/files_tool_executor.py CHANGED Viewed

@@ -5,10 +5,13 @@ from typing import Any, Dict, List, Optional
 from letta.constants import PINECONE_TEXT_FIELD_NAME
 from letta.functions.types import FileOpenRequest
 from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
+from letta.helpers.tpuf_client import should_use_tpuf
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
+from letta.schemas.enums import VectorDBProvider
 from letta.schemas.sandbox_config import SandboxConfig
+from letta.schemas.source import Source
 from letta.schemas.tool import Tool
 from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
@@ -554,18 +557,140 @@ class LettaFileToolExecutor(ToolExecutor):
         self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
-        # Check if Pinecone is enabled and use it if available
-        if should_use_pinecone():
-            return await self._search_files_pinecone(agent_state, query, limit)
-        else:
-            return await self._search_files_traditional(agent_state, query, limit)
+        # Check which vector DB to use - Turbopuffer takes precedence
+        attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
+        attached_tpuf_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.TPUF]
+        attached_pinecone_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.PINECONE]
+        if not attached_tpuf_sources and not attached_pinecone_sources:
+            return await self._search_files_native(agent_state, query, limit)
+        results = []
+        # If both have items, we half the limit roughly
+        # TODO: This is very hacky bc it skips the re-ranking - but this is a temporary stopgap while we think about migrating data
+        if attached_tpuf_sources and attached_pinecone_sources:
+            limit = max(limit // 2, 1)
+        if should_use_tpuf() and attached_tpuf_sources:
+            tpuf_result = await self._search_files_turbopuffer(agent_state, attached_tpuf_sources, query, limit)
+            results.append(tpuf_result)
+        if should_use_pinecone() and attached_pinecone_sources:
+            pinecone_result = await self._search_files_pinecone(agent_state, attached_pinecone_sources, query, limit)
+            results.append(pinecone_result)
+        # combine results from both sources
+        if results:
+            return "\n\n".join(results)
+        # fallback if no results from either source
+        return "No results found"
+    async def _search_files_turbopuffer(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
+        """Search files using Turbopuffer vector database."""
+        # Get attached sources
+        source_ids = [source.id for source in attached_sources]
+        if not source_ids:
+            return "No valid source IDs found for attached files"
+        # Get all attached files for this agent
+        file_agents = await self.files_agents_manager.list_files_for_agent(
+            agent_id=agent_state.id, per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit, actor=self.actor
+        )
+        if not file_agents:
+            return "No files are currently attached to search"
+        # Create a map of file_id to file_name for quick lookup
+        file_map = {fa.file_id: fa.file_name for fa in file_agents}
+        results = []
+        total_hits = 0
+        files_with_matches = {}
+        try:
+            from letta.helpers.tpuf_client import TurbopufferClient
+            tpuf_client = TurbopufferClient()
+            # Query Turbopuffer for all sources at once
+            search_results = await tpuf_client.query_file_passages(
+                source_ids=source_ids,  # pass all source_ids as a list
+                organization_id=self.actor.organization_id,
+                actor=self.actor,
+                query_text=query,
+                search_mode="hybrid",  # use hybrid search for best results
+                top_k=limit,
+            )
+            # Process search results
+            for passage, score, metadata in search_results:
+                if total_hits >= limit:
+                    break
+                total_hits += 1
+                # get file name from our map
+                file_name = file_map.get(passage.file_id, "Unknown File")
-    async def _search_files_pinecone(self, agent_state: AgentState, query: str, limit: int) -> str:
+                # group by file name
+                if file_name not in files_with_matches:
+                    files_with_matches[file_name] = []
+                files_with_matches[file_name].append({"text": passage.text, "score": score, "passage_id": passage.id})
+        except Exception as e:
+            self.logger.error(f"Turbopuffer search failed: {str(e)}")
+            raise e
+        if not files_with_matches:
+            return f"No semantic matches found in Turbopuffer for query: '{query}'"
+        # Format results
+        passage_num = 0
+        for file_name, matches in files_with_matches.items():
+            for match in matches:
+                passage_num += 1
+                # format each passage with terminal-style header
+                score_display = f"(score: {match['score']:.3f})"
+                passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
+                # format the passage text
+                passage_text = match["text"].strip()
+                lines = passage_text.splitlines()
+                formatted_lines = []
+                for line in lines[:20]:  # limit to first 20 lines per passage
+                    formatted_lines.append(f"  {line}")
+                if len(lines) > 20:
+                    formatted_lines.append(f"  ... [truncated {len(lines) - 20} more lines]")
+                passage_content = "\n".join(formatted_lines)
+                results.append(f"{passage_header}\n{passage_content}")
+        # mark access for files that had matches
+        if files_with_matches:
+            matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
+            if matched_file_names:
+                await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
+        # create summary header
+        file_count = len(files_with_matches)
+        summary = f"Found {total_hits} Turbopuffer matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
+        # combine all results
+        formatted_results = [summary, "=" * len(summary)] + results
+        self.logger.info(f"Turbopuffer search completed: {total_hits} matches across {file_count} files")
+        return "\n".join(formatted_results)
+    async def _search_files_pinecone(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
         """Search files using Pinecone vector database."""
         # Extract unique source_ids
         # TODO: Inefficient
-        attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
         source_ids = [source.id for source in attached_sources]
         if not source_ids:
             return "No valid source IDs found for attached files"
@@ -658,7 +783,7 @@ class LettaFileToolExecutor(ToolExecutor):
         self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
         return "\n".join(formatted_results)
-    async def _search_files_traditional(self, agent_state: AgentState, query: str, limit: int) -> str:
+    async def _search_files_native(self, agent_state: AgentState, query: str, limit: int) -> str:
         """Traditional search using existing passage manager."""
         # Get semantic search results
         passages = await self.agent_manager.query_source_passages_async(

letta/services/tool_executor/multi_agent_tool_executor.py CHANGED Viewed

@@ -13,6 +13,7 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.services.tool_executor.tool_executor_base import ToolExecutor
 from letta.settings import settings
+from letta.utils import safe_create_task
 logger = get_logger(__name__)
@@ -55,7 +56,8 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
             f"{message}"
         )
-        return str(await self._process_agent(agent_id=other_agent_id, message=augmented_message))
+        other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
+        return str(await self._process_agent(agent_state=other_agent_state, message=augmented_message))
     async def send_message_to_agents_matching_tags_async(
         self, agent_state: AgentState, message: str, match_all: List[str], match_some: List[str]
@@ -75,22 +77,20 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
         )
         tasks = [
-            asyncio.create_task(self._process_agent(agent_id=agent_state.id, message=augmented_message)) for agent_state in matching_agents
+            safe_create_task(
+                self._process_agent(agent_state=agent_state, message=augmented_message), label=f"process_agent_{agent_state.id}"
+            )
+            for agent_state in matching_agents
         ]
         results = await asyncio.gather(*tasks)
         return str(results)
-    async def _process_agent(self, agent_id: str, message: str) -> Dict[str, Any]:
-        from letta.agents.letta_agent import LettaAgent
+    async def _process_agent(self, agent_state: AgentState, message: str) -> Dict[str, Any]:
+        from letta.agents.letta_agent_v2 import LettaAgentV2
         try:
-            letta_agent = LettaAgent(
-                agent_id=agent_id,
-                message_manager=self.message_manager,
-                agent_manager=self.agent_manager,
-                block_manager=self.block_manager,
-                job_manager=self.job_manager,
-                passage_manager=self.passage_manager,
+            letta_agent = LettaAgentV2(
+                agent_state=agent_state,
                 actor=self.actor,
             )
@@ -100,13 +100,13 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
             send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
             return {
-                "agent_id": agent_id,
+                "agent_id": agent_state.id,
                 "response": send_message_content if send_message_content else ["<no response>"],
             }
         except Exception as e:
             return {
-                "agent_id": agent_id,
+                "agent_id": agent_state.id,
                 "error": str(e),
                 "type": type(e).__name__,
             }
@@ -123,7 +123,10 @@ class LettaMultiAgentToolExecutor(ToolExecutor):
             f"{message}"
         )
-        task = asyncio.create_task(self._process_agent(agent_id=other_agent_id, message=prefixed))
+        other_agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=other_agent_id, actor=self.actor)
+        task = safe_create_task(
+            self._process_agent(agent_state=other_agent_state, message=prefixed), label=f"send_message_to_{other_agent_id}"
+        )
         task.add_done_callback(lambda t: (logger.error(f"Async send_message task failed: {t.exception()}") if t.exception() else None))

letta/services/tool_sandbox/local_sandbox.py CHANGED Viewed

@@ -23,7 +23,7 @@ from letta.services.helpers.tool_execution_helper import (
 from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
 from letta.services.tool_sandbox.base import AsyncToolSandboxBase
 from letta.settings import tool_settings
-from letta.utils import get_friendly_error_msg, parse_stderr_error_msg
+from letta.utils import get_friendly_error_msg, parse_stderr_error_msg, safe_create_task
 logger = get_logger(__name__)
@@ -89,7 +89,7 @@ class AsyncToolSandboxLocal(AsyncToolSandboxBase):
         venv_preparation_task = None
         if use_venv:
             venv_path = str(os.path.join(sandbox_dir, local_configs.venv_name))
-            venv_preparation_task = asyncio.create_task(self._prepare_venv(local_configs, venv_path, env))
+            venv_preparation_task = safe_create_task(self._prepare_venv(local_configs, venv_path, env), label="prepare_venv")
         # Generate and write execution script (always with markers, since we rely on stdout)
         code = await self.generate_execution_script(agent_state=agent_state, wrap_print_with_markers=True)

letta/services/tool_sandbox/modal_version_manager.py CHANGED Viewed

@@ -16,6 +16,7 @@ from letta.log import get_logger
 from letta.schemas.tool import ToolUpdate
 from letta.services.tool_manager import ToolManager
 from letta.services.tool_sandbox.modal_constants import CACHE_TTL_SECONDS, DEFAULT_CONFIG_KEY, MODAL_DEPLOYMENTS_KEY
+from letta.utils import safe_create_task
 logger = get_logger(__name__)
@@ -197,7 +198,7 @@ class ModalVersionManager:
         if deployment_key in self._deployments_in_progress:
             self._deployments_in_progress[deployment_key].set()
             # Clean up after a short delay to allow waiters to wake up
-            asyncio.create_task(self._cleanup_deployment_marker(deployment_key))
+            safe_create_task(self._cleanup_deployment_marker(deployment_key), label=f"cleanup_deployment_{deployment_key}")
     async def _cleanup_deployment_marker(self, deployment_key: str):
         """Clean up deployment marker after a delay."""

letta/settings.py CHANGED Viewed

@@ -211,6 +211,9 @@ class Settings(BaseSettings):
     enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts")
     keepalive_interval: float = Field(50.0, description="Seconds between keepalive messages (default: 50)")
+    # SSE Streaming cancellation settings
+    enable_cancellation_aware_streaming: bool = Field(True, description="Enable cancellation aware streaming")
     # default handles
     default_llm_handle: Optional[str] = None
     default_embedding_handle: Optional[str] = None
@@ -303,6 +306,9 @@ class Settings(BaseSettings):
     tpuf_region: str = "gcp-us-central1"
     embed_all_messages: bool = False
+    # For encryption
+    encryption_key: Optional[str] = None
     # File processing timeout settings
     file_processing_timeout_minutes: int = 30
     file_processing_timeout_error_message: str = "File processing timed out after {} minutes. Please try again."

letta/streaming_utils.py CHANGED Viewed

@@ -99,6 +99,15 @@ class JSONInnerThoughtsExtractor:
                         else:
                             updates_main_json += c
                             self.main_buffer += c
+            # NOTE (fix): Streaming JSON can arrive token-by-token from the LLM.
+            # In the old implementation we pre-inserted an opening quote after every
+            # key's colon (i.e. we emitted '"key":"' immediately). That implicitly
+            # assumed all values are strings. When a non-string value (e.g. true/false,
+            # numbers, null, or a nested object/array) streamed in next, the stream
+            # ended up with an unmatched '"' and appeared as a "missing end-quote" to
+            # clients. We now only emit an opening quote when we actually enter a
+            # string value (see below). This keeps values like booleans unquoted and
+            # avoids generating dangling quotes mid-stream.
             elif c == '"':
                 if not self.escaped:
                     self.in_string = not self.in_string
@@ -112,6 +121,14 @@ class JSONInnerThoughtsExtractor:
                                 self.main_buffer += self.main_json_held_buffer
                                 self.main_json_held_buffer = ""
                                 self.hold_main_json = False
+                        elif self.state == "value":
+                            # Opening quote for a string value (non-inner-thoughts only)
+                            if not self.is_inner_thoughts_value:
+                                if self.hold_main_json:
+                                    self.main_json_held_buffer += '"'
+                                else:
+                                    updates_main_json += '"'
+                                    self.main_buffer += '"'
                     else:
                         if self.state == "key":
                             self.state = "colon"
@@ -156,18 +173,26 @@ class JSONInnerThoughtsExtractor:
                             updates_main_json += c
                             self.main_buffer += c
             else:
+                # NOTE (fix): Do NOT pre-insert an opening quote after ':' any more.
+                # The value may not be a string; we only emit quotes when we actually
+                # see a string begin (handled in the '"' branch above). This prevents
+                # forced-quoting of non-string values and eliminates the common
+                # streaming artifact of "... 'request_heartbeat':'true}" missing the
+                # final quote.
                 if c == ":" and self.state == "colon":
+                    # Transition to reading a value; don't pre-insert quotes
                     self.state = "value"
                     self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
                     if self.is_inner_thoughts_value:
-                        pass  # Do not include 'inner_thoughts' key in main_json
+                        # Do not include 'inner_thoughts' key in main_json
+                        pass
                     else:
                         key_colon = f'"{self.current_key}":'
                         if self.hold_main_json:
-                            self.main_json_held_buffer += key_colon + '"'
+                            self.main_json_held_buffer += key_colon
                         else:
-                            updates_main_json += key_colon + '"'
-                            self.main_buffer += key_colon + '"'
+                            updates_main_json += key_colon
+                            self.main_buffer += key_colon
                 elif c == "," and self.state == "comma_or_end":
                     if self.is_inner_thoughts_value:
                         # Inner thoughts value ended

letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250911104039py3-none-any.whl