PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250910104051py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

letta/adapters/letta_llm_adapter.py +81 -0
letta/adapters/letta_llm_request_adapter.py +111 -0
letta/adapters/letta_llm_stream_adapter.py +169 -0
letta/agents/base_agent.py +4 -1
letta/agents/base_agent_v2.py +68 -0
letta/agents/helpers.py +3 -5
letta/agents/letta_agent.py +23 -12
letta/agents/letta_agent_v2.py +1220 -0
letta/agents/voice_agent.py +2 -1
letta/constants.py +1 -1
letta/errors.py +12 -0
letta/functions/function_sets/base.py +53 -12
letta/functions/schema_generator.py +1 -1
letta/groups/sleeptime_multi_agent_v3.py +231 -0
letta/helpers/tool_rule_solver.py +4 -0
letta/helpers/tpuf_client.py +607 -34
letta/interfaces/anthropic_streaming_interface.py +64 -24
letta/interfaces/openai_streaming_interface.py +80 -37
letta/llm_api/openai_client.py +45 -4
letta/orm/block.py +1 -0
letta/orm/group.py +1 -0
letta/orm/source.py +8 -1
letta/orm/step_metrics.py +10 -0
letta/schemas/block.py +4 -0
letta/schemas/enums.py +1 -0
letta/schemas/group.py +8 -0
letta/schemas/letta_message.py +1 -1
letta/schemas/letta_request.py +2 -2
letta/schemas/mcp.py +9 -1
letta/schemas/message.py +23 -0
letta/schemas/providers/ollama.py +1 -1
letta/schemas/providers.py +1 -2
letta/schemas/source.py +6 -0
letta/schemas/step_metrics.py +2 -0
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +100 -5
letta/server/rest_api/routers/v1/blocks.py +6 -0
letta/server/rest_api/routers/v1/folders.py +23 -5
letta/server/rest_api/routers/v1/groups.py +6 -0
letta/server/rest_api/routers/v1/internal_templates.py +218 -12
letta/server/rest_api/routers/v1/messages.py +14 -19
letta/server/rest_api/routers/v1/runs.py +43 -28
letta/server/rest_api/routers/v1/sources.py +23 -5
letta/server/rest_api/routers/v1/tools.py +42 -0
letta/server/rest_api/streaming_response.py +9 -1
letta/server/server.py +2 -1
letta/services/agent_manager.py +39 -59
letta/services/agent_serialization_manager.py +22 -8
letta/services/archive_manager.py +60 -9
letta/services/block_manager.py +5 -0
letta/services/file_processor/embedder/base_embedder.py +5 -0
letta/services/file_processor/embedder/openai_embedder.py +4 -0
letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
letta/services/file_processor/file_processor.py +9 -7
letta/services/group_manager.py +74 -11
letta/services/mcp_manager.py +132 -26
letta/services/message_manager.py +229 -125
letta/services/passage_manager.py +2 -1
letta/services/source_manager.py +23 -1
letta/services/summarizer/summarizer.py +2 -0
letta/services/tool_executor/core_tool_executor.py +2 -120
letta/services/tool_executor/files_tool_executor.py +133 -8
letta/settings.py +6 -0
letta/utils.py +34 -1
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0

letta/services/tool_executor/core_tool_executor.py CHANGED Viewed

@@ -71,15 +71,6 @@ class LettaCoreToolExecutor(ToolExecutor):
             )
     async def send_message(self, agent_state: AgentState, actor: User, message: str) -> Optional[str]:
-        """
-        Sends a message to the human user.
-        Args:
-            message (str): Message contents. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         return "Sent message successfully."
     async def conversation_search(
@@ -92,19 +83,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         start_date: Optional[str] = None,
         end_date: Optional[str] = None,
     ) -> Optional[str]:
-        """
-        Search prior conversation history using hybrid search (text + semantic similarity).
-        Args:
-            query (str): String to search for using both text matching and semantic similarity.
-            roles (Optional[List[Literal["assistant", "user", "tool"]]]): Optional list of message roles to filter by.
-            limit (Optional[int]): Maximum number of results to return. Uses system default if not specified.
-            start_date (Optional[str]): Filter results to messages created after this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-15", "2024-01-15T14:30".
-            end_date (Optional[str]): Filter results to messages created before this date. ISO 8601 format: "YYYY-MM-DD" or "YYYY-MM-DDTHH:MM". Examples: "2024-01-20", "2024-01-20T17:00".
-        Returns:
-            str: Query result string containing matching messages with timestamps and content.
-        """
         try:
             # Parse datetime parameters if provided
             start_datetime = None
@@ -163,7 +141,6 @@ class LettaCoreToolExecutor(ToolExecutor):
                 limit=search_limit,
                 start_date=start_datetime,
                 end_date=end_datetime,
-                embedding_config=agent_state.embedding_config,
             )
             if len(message_results) == 0:
@@ -286,23 +263,9 @@ class LettaCoreToolExecutor(ToolExecutor):
         start_datetime: Optional[str] = None,
         end_datetime: Optional[str] = None,
     ) -> Optional[str]:
-        """
-        Search archival memory using semantic (embedding-based) search with optional temporal filtering.
-        Args:
-            query (str): String to search for using semantic similarity.
-            tags (Optional[list[str]]): Optional list of tags to filter search results. Only passages with these tags will be returned.
-            tag_match_mode (Literal["any", "all"]): How to match tags - "any" to match passages with any of the tags, "all" to match only passages with all tags. Defaults to "any".
-            top_k (Optional[int]): Maximum number of results to return. Uses system default if not specified.
-            start_datetime (Optional[str]): Filter results to passages created after this datetime. ISO 8601 format.
-            end_datetime (Optional[str]): Filter results to passages created before this datetime. ISO 8601 format.
-        Returns:
-            str: Query result string containing matching passages with timestamps, content, and tags.
-        """
         try:
             # Use the shared service method to get results
-            formatted_results, count = await self.agent_manager.search_agent_archival_memory_async(
+            formatted_results = await self.agent_manager.search_agent_archival_memory_async(
                 agent_id=agent_state.id,
                 actor=actor,
                 query=query,
@@ -313,7 +276,7 @@ class LettaCoreToolExecutor(ToolExecutor):
                 end_datetime=end_datetime,
             )
-            return formatted_results, count
+            return formatted_results
         except Exception as e:
             raise e
@@ -321,16 +284,6 @@ class LettaCoreToolExecutor(ToolExecutor):
     async def archival_memory_insert(
         self, agent_state: AgentState, actor: User, content: str, tags: Optional[list[str]] = None
     ) -> Optional[str]:
-        """
-        Add to archival memory. Make sure to phrase the memory contents such that it can be easily queried later.
-        Args:
-            content (str): Content to write to the memory. All unicode (including emojis) are supported.
-            tags (Optional[list[str]]): Optional list of tags to associate with this memory for better organization and filtering.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         await self.passage_manager.insert_passage(
             agent_state=agent_state,
             text=content,
@@ -341,16 +294,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return None
     async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> Optional[str]:
-        """
-        Append to the contents of core memory.
-        Args:
-            label (str): Section of the memory to be edited.
-            content (str): Content to write to the memory. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
         current_value = str(agent_state.memory.get_block(label).value)
@@ -367,17 +310,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         old_content: str,
         new_content: str,
     ) -> Optional[str]:
-        """
-        Replace the contents of core memory. To delete memories, use an empty string for new_content.
-        Args:
-            label (str): Section of the memory to be edited.
-            old_content (str): String to replace. Must be an exact match.
-            new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
         current_value = str(agent_state.memory.get_block(label).value)
@@ -389,20 +321,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return None
     async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
-        """
-        The memory_replace command allows you to replace a specific string in a memory
-        block with a new string. This is used for making precise edits.
-        Args:
-            label (str): Section of the memory to be edited, identified by its label.
-            old_str (str): The text to replace (must match exactly, including whitespace
-                and indentation). Do not include line number prefixes.
-            new_str (str): The new text to insert in place of the old text. Do not include line number prefixes.
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -479,20 +397,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         new_str: str,
         insert_line: int = -1,
     ) -> str:
-        """
-        The memory_insert command allows you to insert text at a specific location
-        in a memory block.
-        Args:
-            label (str): Section of the memory to be edited, identified by its label.
-            new_str (str): The text to insert. Do not include line number prefixes.
-            insert_line (int): The line number after which to insert the text (0 for
-                beginning of file). Defaults to -1 (end of the file).
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -559,20 +463,6 @@ class LettaCoreToolExecutor(ToolExecutor):
         return success_msg
     async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
-        """
-        The memory_rethink command allows you to completely rewrite the contents of a
-        memory block. Use this tool to make large sweeping changes (e.g. when you want
-        to condense or reorganize the memory blocks), do NOT use this tool to make small
-        precise edits (e.g. add or remove a line, replace a specific string, etc).
-        Args:
-            label (str): The memory block to be rewritten, identified by its label.
-            new_memory (str): The new memory contents with information integrated from
-                existing memory blocks and the conversation context. Do not include line number prefixes.
-        Returns:
-            str: The success message
-        """
         if agent_state.memory.get_block(label).read_only:
             raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
@@ -611,12 +501,4 @@ class LettaCoreToolExecutor(ToolExecutor):
         return success_msg
     async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
-        """
-        Call the memory_finish_edits command when you are finished making edits
-        (integrating all new information) into the memory blocks. This function
-        is called when the agent is done rethinking the memory.
-        Returns:
-            Optional[str]: None is always returned as this function does not produce a response.
-        """
         return None

letta/services/tool_executor/files_tool_executor.py CHANGED Viewed

@@ -5,10 +5,13 @@ from typing import Any, Dict, List, Optional
 from letta.constants import PINECONE_TEXT_FIELD_NAME
 from letta.functions.types import FileOpenRequest
 from letta.helpers.pinecone_utils import search_pinecone_index, should_use_pinecone
+from letta.helpers.tpuf_client import should_use_tpuf
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
+from letta.schemas.enums import VectorDBProvider
 from letta.schemas.sandbox_config import SandboxConfig
+from letta.schemas.source import Source
 from letta.schemas.tool import Tool
 from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
@@ -554,18 +557,140 @@ class LettaFileToolExecutor(ToolExecutor):
         self.logger.info(f"Semantic search started for agent {agent_state.id} with query '{query}' (limit: {limit})")
-        # Check if Pinecone is enabled and use it if available
-        if should_use_pinecone():
-            return await self._search_files_pinecone(agent_state, query, limit)
-        else:
-            return await self._search_files_traditional(agent_state, query, limit)
+        # Check which vector DB to use - Turbopuffer takes precedence
+        attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
+        attached_tpuf_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.TPUF]
+        attached_pinecone_sources = [source for source in attached_sources if source.vector_db_provider == VectorDBProvider.PINECONE]
+        if not attached_tpuf_sources and not attached_pinecone_sources:
+            return await self._search_files_native(agent_state, query, limit)
+        results = []
+        # If both have items, we half the limit roughly
+        # TODO: This is very hacky bc it skips the re-ranking - but this is a temporary stopgap while we think about migrating data
+        if attached_tpuf_sources and attached_pinecone_sources:
+            limit = max(limit // 2, 1)
+        if should_use_tpuf() and attached_tpuf_sources:
+            tpuf_result = await self._search_files_turbopuffer(agent_state, attached_tpuf_sources, query, limit)
+            results.append(tpuf_result)
+        if should_use_pinecone() and attached_pinecone_sources:
+            pinecone_result = await self._search_files_pinecone(agent_state, attached_pinecone_sources, query, limit)
+            results.append(pinecone_result)
+        # combine results from both sources
+        if results:
+            return "\n\n".join(results)
+        # fallback if no results from either source
+        return "No results found"
+    async def _search_files_turbopuffer(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
+        """Search files using Turbopuffer vector database."""
+        # Get attached sources
+        source_ids = [source.id for source in attached_sources]
+        if not source_ids:
+            return "No valid source IDs found for attached files"
+        # Get all attached files for this agent
+        file_agents = await self.files_agents_manager.list_files_for_agent(
+            agent_id=agent_state.id, per_file_view_window_char_limit=agent_state.per_file_view_window_char_limit, actor=self.actor
+        )
+        if not file_agents:
+            return "No files are currently attached to search"
+        # Create a map of file_id to file_name for quick lookup
+        file_map = {fa.file_id: fa.file_name for fa in file_agents}
+        results = []
+        total_hits = 0
+        files_with_matches = {}
+        try:
+            from letta.helpers.tpuf_client import TurbopufferClient
+            tpuf_client = TurbopufferClient()
+            # Query Turbopuffer for all sources at once
+            search_results = await tpuf_client.query_file_passages(
+                source_ids=source_ids,  # pass all source_ids as a list
+                organization_id=self.actor.organization_id,
+                actor=self.actor,
+                query_text=query,
+                search_mode="hybrid",  # use hybrid search for best results
+                top_k=limit,
+            )
+            # Process search results
+            for passage, score, metadata in search_results:
+                if total_hits >= limit:
+                    break
+                total_hits += 1
+                # get file name from our map
+                file_name = file_map.get(passage.file_id, "Unknown File")
-    async def _search_files_pinecone(self, agent_state: AgentState, query: str, limit: int) -> str:
+                # group by file name
+                if file_name not in files_with_matches:
+                    files_with_matches[file_name] = []
+                files_with_matches[file_name].append({"text": passage.text, "score": score, "passage_id": passage.id})
+        except Exception as e:
+            self.logger.error(f"Turbopuffer search failed: {str(e)}")
+            raise e
+        if not files_with_matches:
+            return f"No semantic matches found in Turbopuffer for query: '{query}'"
+        # Format results
+        passage_num = 0
+        for file_name, matches in files_with_matches.items():
+            for match in matches:
+                passage_num += 1
+                # format each passage with terminal-style header
+                score_display = f"(score: {match['score']:.3f})"
+                passage_header = f"\n=== {file_name} (passage #{passage_num}) {score_display} ==="
+                # format the passage text
+                passage_text = match["text"].strip()
+                lines = passage_text.splitlines()
+                formatted_lines = []
+                for line in lines[:20]:  # limit to first 20 lines per passage
+                    formatted_lines.append(f"  {line}")
+                if len(lines) > 20:
+                    formatted_lines.append(f"  ... [truncated {len(lines) - 20} more lines]")
+                passage_content = "\n".join(formatted_lines)
+                results.append(f"{passage_header}\n{passage_content}")
+        # mark access for files that had matches
+        if files_with_matches:
+            matched_file_names = [name for name in files_with_matches.keys() if name != "Unknown File"]
+            if matched_file_names:
+                await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=matched_file_names, actor=self.actor)
+        # create summary header
+        file_count = len(files_with_matches)
+        summary = f"Found {total_hits} Turbopuffer matches in {file_count} file{'s' if file_count != 1 else ''} for query: '{query}'"
+        # combine all results
+        formatted_results = [summary, "=" * len(summary)] + results
+        self.logger.info(f"Turbopuffer search completed: {total_hits} matches across {file_count} files")
+        return "\n".join(formatted_results)
+    async def _search_files_pinecone(self, agent_state: AgentState, attached_sources: List[Source], query: str, limit: int) -> str:
         """Search files using Pinecone vector database."""
         # Extract unique source_ids
         # TODO: Inefficient
-        attached_sources = await self.agent_manager.list_attached_sources_async(agent_id=agent_state.id, actor=self.actor)
         source_ids = [source.id for source in attached_sources]
         if not source_ids:
             return "No valid source IDs found for attached files"
@@ -658,7 +783,7 @@ class LettaFileToolExecutor(ToolExecutor):
         self.logger.info(f"Pinecone search completed: {total_hits} matches across {file_count} files")
         return "\n".join(formatted_results)
-    async def _search_files_traditional(self, agent_state: AgentState, query: str, limit: int) -> str:
+    async def _search_files_native(self, agent_state: AgentState, query: str, limit: int) -> str:
         """Traditional search using existing passage manager."""
         # Get semantic search results
         passages = await self.agent_manager.query_source_passages_async(

letta/settings.py CHANGED Viewed

@@ -211,6 +211,9 @@ class Settings(BaseSettings):
     enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts")
     keepalive_interval: float = Field(50.0, description="Seconds between keepalive messages (default: 50)")
+    # SSE Streaming cancellation settings
+    enable_cancellation_aware_streaming: bool = Field(True, description="Enable cancellation aware streaming")
     # default handles
     default_llm_handle: Optional[str] = None
     default_embedding_handle: Optional[str] = None
@@ -303,6 +306,9 @@ class Settings(BaseSettings):
     tpuf_region: str = "gcp-us-central1"
     embed_all_messages: bool = False
+    # For encryption
+    encryption_key: Optional[str] = None
     # File processing timeout settings
     file_processing_timeout_minutes: int = 30
     file_processing_timeout_error_message: str = "File processing timed out after {} minutes. Please try again."

letta/utils.py CHANGED Viewed

@@ -17,7 +17,7 @@ from contextlib import contextmanager
 from datetime import datetime, timezone
 from functools import wraps
 from logging import Logger
-from typing import Any, Coroutine, Optional, Union, _GenericAlias, get_args, get_origin, get_type_hints
+from typing import Any, Callable, Coroutine, Optional, Union, _GenericAlias, get_args, get_origin, get_type_hints
 from urllib.parse import urljoin, urlparse
 import demjson3 as demjson
@@ -1271,3 +1271,36 @@ def truncate_file_visible_content(visible_content: str, is_open: bool, per_file_
         visible_content += truncated_warning
     return visible_content
+def fire_and_forget(coro, task_name: Optional[str] = None, error_callback: Optional[Callable[[Exception], None]] = None) -> asyncio.Task:
+    """
+    Execute an async coroutine in the background without waiting for completion.
+    Args:
+        coro: The coroutine to execute
+        task_name: Optional name for logging purposes
+        error_callback: Optional callback to execute if the task fails
+    Returns:
+        The created asyncio Task object
+    """
+    import traceback
+    task = asyncio.create_task(coro)
+    def callback(t):
+        try:
+            t.result()  # this re-raises exceptions from the task
+        except Exception as e:
+            task_desc = f"Background task {task_name}" if task_name else "Background task"
+            logger.error(f"{task_desc} failed: {str(e)}\n{traceback.format_exc()}")
+            if error_callback:
+                try:
+                    error_callback(e)
+                except Exception as callback_error:
+                    logger.error(f"Error callback failed: {callback_error}")
+    task.add_done_callback(callback)
+    return task

{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: letta-nightly
-Version: 0.11.7.dev20250909104137
+Version: 0.11.7.dev20250910104051
 Summary: Create LLM agents with long-term memory and custom tools
 Author-email: Letta Team <contact@letta.com>
 License: Apache License
@@ -26,7 +26,7 @@ Requires-Dist: html2text>=2020.1.16
 Requires-Dist: httpx-sse>=0.4.0
 Requires-Dist: httpx>=0.28.0
 Requires-Dist: jinja2>=3.1.5
-Requires-Dist: letta-client==0.1.307
+Requires-Dist: letta-client>=0.1.319
 Requires-Dist: llama-index-embeddings-openai>=0.3.1
 Requires-Dist: llama-index>=0.12.2
 Requires-Dist: markitdown[docx,pdf,pptx]>=0.1.2

letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250910104051py3-none-any.whl