PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250911104039py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

letta/adapters/letta_llm_adapter.py +81 -0
letta/adapters/letta_llm_request_adapter.py +113 -0
letta/adapters/letta_llm_stream_adapter.py +171 -0
letta/agents/agent_loop.py +23 -0
letta/agents/base_agent.py +4 -1
letta/agents/base_agent_v2.py +68 -0
letta/agents/helpers.py +3 -5
letta/agents/letta_agent.py +23 -12
letta/agents/letta_agent_v2.py +1221 -0
letta/agents/voice_agent.py +2 -1
letta/constants.py +1 -1
letta/errors.py +12 -0
letta/functions/function_sets/base.py +53 -12
letta/functions/helpers.py +3 -2
letta/functions/schema_generator.py +1 -1
letta/groups/sleeptime_multi_agent_v2.py +4 -2
letta/groups/sleeptime_multi_agent_v3.py +233 -0
letta/helpers/tool_rule_solver.py +4 -0
letta/helpers/tpuf_client.py +607 -34
letta/interfaces/anthropic_streaming_interface.py +74 -30
letta/interfaces/openai_streaming_interface.py +80 -37
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/openai_client.py +45 -4
letta/orm/agent.py +4 -1
letta/orm/block.py +2 -0
letta/orm/blocks_agents.py +1 -0
letta/orm/group.py +1 -0
letta/orm/source.py +8 -1
letta/orm/sources_agents.py +2 -1
letta/orm/step_metrics.py +10 -0
letta/orm/tools_agents.py +5 -2
letta/schemas/block.py +4 -0
letta/schemas/enums.py +1 -0
letta/schemas/group.py +8 -0
letta/schemas/letta_message.py +1 -1
letta/schemas/letta_request.py +2 -2
letta/schemas/mcp.py +9 -1
letta/schemas/message.py +42 -2
letta/schemas/providers/ollama.py +1 -1
letta/schemas/providers.py +1 -2
letta/schemas/source.py +6 -0
letta/schemas/step_metrics.py +2 -0
letta/server/rest_api/interface.py +34 -2
letta/server/rest_api/json_parser.py +2 -0
letta/server/rest_api/redis_stream_manager.py +2 -1
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +132 -170
letta/server/rest_api/routers/v1/blocks.py +6 -0
letta/server/rest_api/routers/v1/folders.py +25 -7
letta/server/rest_api/routers/v1/groups.py +6 -0
letta/server/rest_api/routers/v1/internal_templates.py +218 -12
letta/server/rest_api/routers/v1/messages.py +14 -19
letta/server/rest_api/routers/v1/runs.py +43 -28
letta/server/rest_api/routers/v1/sources.py +25 -7
letta/server/rest_api/routers/v1/tools.py +42 -0
letta/server/rest_api/streaming_response.py +11 -2
letta/server/server.py +9 -6
letta/services/agent_manager.py +39 -59
letta/services/agent_serialization_manager.py +26 -11
letta/services/archive_manager.py +60 -9
letta/services/block_manager.py +5 -0
letta/services/file_processor/embedder/base_embedder.py +5 -0
letta/services/file_processor/embedder/openai_embedder.py +4 -0
letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
letta/services/file_processor/file_processor.py +9 -7
letta/services/group_manager.py +74 -11
letta/services/mcp_manager.py +134 -28
letta/services/message_manager.py +229 -125
letta/services/passage_manager.py +2 -1
letta/services/source_manager.py +23 -1
letta/services/summarizer/summarizer.py +4 -1
letta/services/tool_executor/core_tool_executor.py +2 -120
letta/services/tool_executor/files_tool_executor.py +133 -8
letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
letta/services/tool_sandbox/local_sandbox.py +2 -2
letta/services/tool_sandbox/modal_version_manager.py +2 -1
letta/settings.py +6 -0
letta/streaming_utils.py +29 -4
letta/utils.py +106 -4
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0

letta/helpers/tpuf_client.py CHANGED Viewed

@@ -4,16 +4,19 @@ import logging
 from datetime import datetime, timezone
 from typing import Any, Callable, List, Optional, Tuple
+from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
 from letta.otel.tracing import trace_method
+from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import MessageRole, TagMatchMode
 from letta.schemas.passage import Passage as PydanticPassage
-from letta.settings import settings
+from letta.settings import model_settings, settings
 logger = logging.getLogger(__name__)
 def should_use_tpuf() -> bool:
-    return bool(settings.use_tpuf) and bool(settings.tpuf_api_key)
+    # We need OpenAI since we default to their embedding model
+    return bool(settings.use_tpuf) and bool(settings.tpuf_api_key) and bool(model_settings.openai_api_key)
 def should_use_tpuf_for_messages() -> bool:
@@ -24,6 +27,14 @@ def should_use_tpuf_for_messages() -> bool:
 class TurbopufferClient:
     """Client for managing archival memory with Turbopuffer vector database."""
+    default_embedding_config = EmbeddingConfig(
+        embedding_model="text-embedding-3-small",
+        embedding_endpoint_type="openai",
+        embedding_endpoint="https://api.openai.com/v1",
+        embedding_dim=1536,
+        embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
+    )
     def __init__(self, api_key: str = None, region: str = None):
         """Initialize Turbopuffer client."""
         self.api_key = api_key or settings.tpuf_api_key
@@ -38,32 +49,57 @@ class TurbopufferClient:
         if not self.api_key:
             raise ValueError("Turbopuffer API key not provided")
+    @trace_method
+    async def _generate_embeddings(self, texts: List[str], actor: "PydanticUser") -> List[List[float]]:
+        """Generate embeddings using the default embedding configuration.
+        Args:
+            texts: List of texts to embed
+            actor: User actor for embedding generation
+        Returns:
+            List of embedding vectors
+        """
+        from letta.llm_api.llm_client import LLMClient
+        embedding_client = LLMClient.create(
+            provider_type=self.default_embedding_config.embedding_endpoint_type,
+            actor=actor,
+        )
+        embeddings = await embedding_client.request_embeddings(texts, self.default_embedding_config)
+        return embeddings
     @trace_method
     async def _get_archive_namespace_name(self, archive_id: str) -> str:
         """Get namespace name for a specific archive."""
         return await self.archive_manager.get_or_set_vector_db_namespace_async(archive_id)
     @trace_method
-    async def _get_message_namespace_name(self, agent_id: str, organization_id: str) -> str:
+    async def _get_message_namespace_name(self, organization_id: str) -> str:
         """Get namespace name for messages (org-scoped).
         Args:
-            agent_id: Agent ID (stored for future sharding)
             organization_id: Organization ID for namespace generation
         Returns:
             The org-scoped namespace name for messages
         """
-        return await self.agent_manager.get_or_set_vector_db_namespace_async(agent_id, organization_id)
+        environment = settings.environment
+        if environment:
+            namespace_name = f"messages_{organization_id}_{environment.lower()}"
+        else:
+            namespace_name = f"messages_{organization_id}"
+        return namespace_name
     @trace_method
     async def insert_archival_memories(
         self,
         archive_id: str,
         text_chunks: List[str],
-        embeddings: List[List[float]],
         passage_ids: List[str],
         organization_id: str,
+        actor: "PydanticUser",
         tags: Optional[List[str]] = None,
         created_at: Optional[datetime] = None,
     ) -> List[PydanticPassage]:
@@ -72,9 +108,9 @@ class TurbopufferClient:
         Args:
             archive_id: ID of the archive
             text_chunks: List of text chunks to store
-            embeddings: List of embedding vectors corresponding to text chunks
             passage_ids: List of passage IDs (must match 1:1 with text_chunks)
             organization_id: Organization ID for the passages
+            actor: User actor for embedding generation
             tags: Optional list of tags to attach to all passages
             created_at: Optional timestamp for retroactive entries (defaults to current UTC time)
@@ -83,6 +119,9 @@ class TurbopufferClient:
         """
         from turbopuffer import AsyncTurbopuffer
+        # generate embeddings using the default config
+        embeddings = await self._generate_embeddings(text_chunks, actor)
         namespace_name = await self._get_archive_namespace_name(archive_id)
         # handle timestamp - ensure UTC
@@ -102,8 +141,6 @@ class TurbopufferClient:
             raise ValueError("passage_ids must be provided for Turbopuffer insertion")
         if len(passage_ids) != len(text_chunks):
             raise ValueError(f"passage_ids length ({len(passage_ids)}) must match text_chunks length ({len(text_chunks)})")
-        if len(passage_ids) != len(embeddings):
-            raise ValueError(f"passage_ids length ({len(passage_ids)}) must match embeddings length ({len(embeddings)})")
         # prepare column-based data for turbopuffer - optimized for batch insert
         ids = []
@@ -137,7 +174,7 @@ class TurbopufferClient:
                 metadata_={},
                 tags=tags or [],  # Include tags in the passage
                 embedding=embedding,
-                embedding_config=None,  # Will be set by caller if needed
+                embedding_config=self.default_embedding_config,  # Will be set by caller if needed
             )
             passages.append(passage)
@@ -177,37 +214,42 @@ class TurbopufferClient:
         self,
         agent_id: str,
         message_texts: List[str],
-        embeddings: List[List[float]],
         message_ids: List[str],
         organization_id: str,
+        actor: "PydanticUser",
         roles: List[MessageRole],
         created_ats: List[datetime],
+        project_id: Optional[str] = None,
+        template_id: Optional[str] = None,
     ) -> bool:
         """Insert messages into Turbopuffer.
         Args:
             agent_id: ID of the agent
             message_texts: List of message text content to store
-            embeddings: List of embedding vectors corresponding to message texts
             message_ids: List of message IDs (must match 1:1 with message_texts)
             organization_id: Organization ID for the messages
+            actor: User actor for embedding generation
             roles: List of message roles corresponding to each message
             created_ats: List of creation timestamps for each message
+            project_id: Optional project ID for all messages
+            template_id: Optional template ID for all messages
         Returns:
             True if successful
         """
         from turbopuffer import AsyncTurbopuffer
-        namespace_name = await self._get_message_namespace_name(agent_id, organization_id)
+        # generate embeddings using the default config
+        embeddings = await self._generate_embeddings(message_texts, actor)
+        namespace_name = await self._get_message_namespace_name(organization_id)
         # validation checks
         if not message_ids:
             raise ValueError("message_ids must be provided for Turbopuffer insertion")
         if len(message_ids) != len(message_texts):
             raise ValueError(f"message_ids length ({len(message_ids)}) must match message_texts length ({len(message_texts)})")
-        if len(message_ids) != len(embeddings):
-            raise ValueError(f"message_ids length ({len(message_ids)}) must match embeddings length ({len(embeddings)})")
         if len(message_ids) != len(roles):
             raise ValueError(f"message_ids length ({len(message_ids)}) must match roles length ({len(roles)})")
         if len(message_ids) != len(created_ats):
@@ -221,6 +263,8 @@ class TurbopufferClient:
         agent_ids = []
         message_roles = []
         created_at_timestamps = []
+        project_ids = []
+        template_ids = []
         for idx, (text, embedding, role, created_at) in enumerate(zip(message_texts, embeddings, roles, created_ats)):
             message_id = message_ids[idx]
@@ -241,6 +285,8 @@ class TurbopufferClient:
             agent_ids.append(agent_id)
             message_roles.append(role.value)
             created_at_timestamps.append(timestamp)
+            project_ids.append(project_id)
+            template_ids.append(template_id)
         # build column-based upsert data
         upsert_columns = {
@@ -253,6 +299,14 @@ class TurbopufferClient:
             "created_at": created_at_timestamps,
         }
+        # only include project_id if it's provided
+        if project_id is not None:
+            upsert_columns["project_id"] = project_ids
+        # only include template_id if it's provided
+        if template_id is not None:
+            upsert_columns["template_id"] = template_ids
         try:
             # Use AsyncTurbopuffer as a context manager for proper resource cleanup
             async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
@@ -382,7 +436,7 @@ class TurbopufferClient:
     async def query_passages(
         self,
         archive_id: str,
-        query_embedding: Optional[List[float]] = None,
+        actor: "PydanticUser",
         query_text: Optional[str] = None,
         search_mode: str = "vector",  # "vector", "fts", "hybrid"
         top_k: int = 10,
@@ -392,13 +446,13 @@ class TurbopufferClient:
         fts_weight: float = 0.5,
         start_date: Optional[datetime] = None,
         end_date: Optional[datetime] = None,
-    ) -> List[Tuple[PydanticPassage, float]]:
+    ) -> List[Tuple[PydanticPassage, float, dict]]:
         """Query passages from Turbopuffer using vector search, full-text search, or hybrid search.
         Args:
             archive_id: ID of the archive
-            query_embedding: Embedding vector for vector search (required for "vector" and "hybrid" modes)
-            query_text: Text query for full-text search (required for "fts" and "hybrid" modes)
+            actor: User actor for embedding generation
+            query_text: Text query for search (used for embedding in vector/hybrid modes, and FTS in fts/hybrid modes)
             search_mode: Search mode - "vector", "fts", or "hybrid" (default: "vector")
             top_k: Number of results to return
             tags: Optional list of tags to filter by
@@ -406,11 +460,17 @@ class TurbopufferClient:
             vector_weight: Weight for vector search results in hybrid mode (default: 0.5)
             fts_weight: Weight for FTS results in hybrid mode (default: 0.5)
             start_date: Optional datetime to filter passages created after this date
-            end_date: Optional datetime to filter passages created before this date
+            end_date: Optional datetime to filter passages created on or before this date (inclusive)
         Returns:
-            List of (passage, score) tuples
+            List of (passage, score, metadata) tuples with relevance rankings
         """
+        # generate embedding for vector/hybrid search if query_text is provided
+        query_embedding = None
+        if query_text and search_mode in ["vector", "hybrid"]:
+            embeddings = await self._generate_embeddings([query_text], actor)
+            query_embedding = embeddings[0]
         # Check if we should fallback to timestamp-based retrieval
         if query_embedding is None and query_text is None and search_mode not in ["timestamp"]:
             # Fallback to retrieving most recent passages when no search query is provided
@@ -439,6 +499,13 @@ class TurbopufferClient:
         if start_date:
             date_filters.append(("created_at", "Gte", start_date))
         if end_date:
+            # if end_date has no time component (is at midnight), adjust to end of day
+            # to make the filter inclusive of the entire day
+            if end_date.hour == 0 and end_date.minute == 0 and end_date.second == 0 and end_date.microsecond == 0:
+                from datetime import timedelta
+                # add 1 day and subtract 1 microsecond to get 23:59:59.999999
+                end_date = end_date + timedelta(days=1) - timedelta(microseconds=1)
             date_filters.append(("created_at", "Lte", end_date))
         # combine all filters
@@ -474,7 +541,7 @@ class TurbopufferClient:
                 # for hybrid mode, we get a multi-query response
                 vector_results = self._process_single_query_results(result.results[0], archive_id, tags)
                 fts_results = self._process_single_query_results(result.results[1], archive_id, tags, is_fts=True)
-                # use RRF and return only (passage, score) for backwards compatibility
+                # use RRF and include metadata with ranks
                 results_with_metadata = self._reciprocal_rank_fusion(
                     vector_results=[passage for passage, _ in vector_results],
                     fts_results=[passage for passage, _ in fts_results],
@@ -483,26 +550,38 @@ class TurbopufferClient:
                     fts_weight=fts_weight,
                     top_k=top_k,
                 )
-                return [(passage, rrf_score) for passage, rrf_score, metadata in results_with_metadata]
+                # Return (passage, score, metadata) with ranks
+                return results_with_metadata
             else:
-                # for single queries (vector, fts, timestamp)
+                # for single queries (vector, fts, timestamp) - add basic metadata
                 is_fts = search_mode == "fts"
-                return self._process_single_query_results(result, archive_id, tags, is_fts=is_fts)
+                results = self._process_single_query_results(result, archive_id, tags, is_fts=is_fts)
+                # Add simple metadata for single search modes
+                results_with_metadata = []
+                for idx, (passage, score) in enumerate(results):
+                    metadata = {
+                        "combined_score": score,
+                        f"{search_mode}_rank": idx + 1,  # Add the rank for this search mode
+                    }
+                    results_with_metadata.append((passage, score, metadata))
+                return results_with_metadata
         except Exception as e:
             logger.error(f"Failed to query passages from Turbopuffer: {e}")
             raise
     @trace_method
-    async def query_messages(
+    async def query_messages_by_agent_id(
         self,
         agent_id: str,
         organization_id: str,
-        query_embedding: Optional[List[float]] = None,
+        actor: "PydanticUser",
         query_text: Optional[str] = None,
         search_mode: str = "vector",  # "vector", "fts", "hybrid", "timestamp"
         top_k: int = 10,
         roles: Optional[List[MessageRole]] = None,
+        project_id: Optional[str] = None,
+        template_id: Optional[str] = None,
         vector_weight: float = 0.5,
         fts_weight: float = 0.5,
         start_date: Optional[datetime] = None,
@@ -513,15 +592,17 @@ class TurbopufferClient:
         Args:
             agent_id: ID of the agent (used for filtering results)
             organization_id: Organization ID for namespace lookup
-            query_embedding: Embedding vector for vector search (required for "vector" and "hybrid" modes)
-            query_text: Text query for full-text search (required for "fts" and "hybrid" modes)
+            actor: User actor for embedding generation
+            query_text: Text query for search (used for embedding in vector/hybrid modes, and FTS in fts/hybrid modes)
             search_mode: Search mode - "vector", "fts", "hybrid", or "timestamp" (default: "vector")
             top_k: Number of results to return
             roles: Optional list of message roles to filter by
+            project_id: Optional project ID to filter messages by
+            template_id: Optional template ID to filter messages by
             vector_weight: Weight for vector search results in hybrid mode (default: 0.5)
             fts_weight: Weight for FTS results in hybrid mode (default: 0.5)
             start_date: Optional datetime to filter messages created after this date
-            end_date: Optional datetime to filter messages created before this date
+            end_date: Optional datetime to filter messages created on or before this date (inclusive)
         Returns:
             List of (message_dict, score, metadata) tuples where:
@@ -529,12 +610,18 @@ class TurbopufferClient:
             - score is the final relevance score
             - metadata contains individual scores and ranking information
         """
+        # generate embedding for vector/hybrid search if query_text is provided
+        query_embedding = None
+        if query_text and search_mode in ["vector", "hybrid"]:
+            embeddings = await self._generate_embeddings([query_text], actor)
+            query_embedding = embeddings[0]
         # Check if we should fallback to timestamp-based retrieval
         if query_embedding is None and query_text is None and search_mode not in ["timestamp"]:
             # Fallback to retrieving most recent messages when no search query is provided
             search_mode = "timestamp"
-        namespace_name = await self._get_message_namespace_name(agent_id, organization_id)
+        namespace_name = await self._get_message_namespace_name(organization_id)
         # build agent_id filter
         agent_filter = ("agent_id", "Eq", agent_id)
@@ -553,12 +640,33 @@ class TurbopufferClient:
         if start_date:
             date_filters.append(("created_at", "Gte", start_date))
         if end_date:
+            # if end_date has no time component (is at midnight), adjust to end of day
+            # to make the filter inclusive of the entire day
+            if end_date.hour == 0 and end_date.minute == 0 and end_date.second == 0 and end_date.microsecond == 0:
+                from datetime import timedelta
+                # add 1 day and subtract 1 microsecond to get 23:59:59.999999
+                end_date = end_date + timedelta(days=1) - timedelta(microseconds=1)
             date_filters.append(("created_at", "Lte", end_date))
+        # build project_id filter if provided
+        project_filter = None
+        if project_id:
+            project_filter = ("project_id", "Eq", project_id)
+        # build template_id filter if provided
+        template_filter = None
+        if template_id:
+            template_filter = ("template_id", "Eq", template_id)
         # combine all filters
         all_filters = [agent_filter]  # always include agent_id filter
         if role_filter:
             all_filters.append(role_filter)
+        if project_filter:
+            all_filters.append(project_filter)
+        if template_filter:
+            all_filters.append(template_filter)
         if date_filters:
             all_filters.extend(date_filters)
@@ -617,6 +725,165 @@ class TurbopufferClient:
             logger.error(f"Failed to query messages from Turbopuffer: {e}")
             raise
+    async def query_messages_by_org_id(
+        self,
+        organization_id: str,
+        actor: "PydanticUser",
+        query_text: Optional[str] = None,
+        search_mode: str = "hybrid",  # "vector", "fts", "hybrid"
+        top_k: int = 10,
+        roles: Optional[List[MessageRole]] = None,
+        project_id: Optional[str] = None,
+        template_id: Optional[str] = None,
+        vector_weight: float = 0.5,
+        fts_weight: float = 0.5,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+    ) -> List[Tuple[dict, float, dict]]:
+        """Query messages from Turbopuffer across an entire organization.
+        Args:
+            organization_id: Organization ID for namespace lookup (required)
+            actor: User actor for embedding generation
+            query_text: Text query for search (used for embedding in vector/hybrid modes, and FTS in fts/hybrid modes)
+            search_mode: Search mode - "vector", "fts", or "hybrid" (default: "hybrid")
+            top_k: Number of results to return
+            roles: Optional list of message roles to filter by
+            project_id: Optional project ID to filter messages by
+            template_id: Optional template ID to filter messages by
+            vector_weight: Weight for vector search results in hybrid mode (default: 0.5)
+            fts_weight: Weight for FTS results in hybrid mode (default: 0.5)
+            start_date: Optional datetime to filter messages created after this date
+            end_date: Optional datetime to filter messages created on or before this date (inclusive)
+        Returns:
+            List of (message_dict, score, metadata) tuples where:
+            - message_dict contains id, text, role, created_at, agent_id
+            - score is the final relevance score (RRF score for hybrid, rank-based for single mode)
+            - metadata contains individual scores and ranking information
+        """
+        # generate embedding for vector/hybrid search if query_text is provided
+        query_embedding = None
+        if query_text and search_mode in ["vector", "hybrid"]:
+            embeddings = await self._generate_embeddings([query_text], actor)
+            query_embedding = embeddings[0]
+        # namespace is org-scoped
+        namespace_name = await self._get_message_namespace_name(organization_id)
+        # build filters
+        all_filters = []
+        # role filter
+        if roles:
+            role_values = [r.value for r in roles]
+            if len(role_values) == 1:
+                all_filters.append(("role", "Eq", role_values[0]))
+            else:
+                all_filters.append(("role", "In", role_values))
+        # project filter
+        if project_id:
+            all_filters.append(("project_id", "Eq", project_id))
+        # template filter
+        if template_id:
+            all_filters.append(("template_id", "Eq", template_id))
+        # date filters
+        if start_date:
+            all_filters.append(("created_at", "Gte", start_date))
+        if end_date:
+            # make end_date inclusive of the entire day
+            if end_date.hour == 0 and end_date.minute == 0 and end_date.second == 0 and end_date.microsecond == 0:
+                from datetime import timedelta
+                end_date = end_date + timedelta(days=1) - timedelta(microseconds=1)
+            all_filters.append(("created_at", "Lte", end_date))
+        # combine filters
+        final_filter = None
+        if len(all_filters) == 1:
+            final_filter = all_filters[0]
+        elif len(all_filters) > 1:
+            final_filter = ("And", all_filters)
+        try:
+            # execute query
+            result = await self._execute_query(
+                namespace_name=namespace_name,
+                search_mode=search_mode,
+                query_embedding=query_embedding,
+                query_text=query_text,
+                top_k=top_k,
+                include_attributes=["text", "organization_id", "agent_id", "role", "created_at"],
+                filters=final_filter,
+                vector_weight=vector_weight,
+                fts_weight=fts_weight,
+            )
+            # process results based on search mode
+            if search_mode == "hybrid":
+                # for hybrid mode, we get a multi-query response
+                vector_results = self._process_message_query_results(result.results[0])
+                fts_results = self._process_message_query_results(result.results[1])
+                # use existing RRF method - it already returns metadata with ranks
+                results_with_metadata = self._reciprocal_rank_fusion(
+                    vector_results=vector_results,
+                    fts_results=fts_results,
+                    get_id_func=lambda msg_dict: msg_dict["id"],
+                    vector_weight=vector_weight,
+                    fts_weight=fts_weight,
+                    top_k=top_k,
+                )
+                # add raw scores to metadata if available
+                vector_scores = {}
+                for row in result.results[0].rows:
+                    if hasattr(row, "dist"):
+                        vector_scores[row.id] = row.dist
+                fts_scores = {}
+                for row in result.results[1].rows:
+                    if hasattr(row, "score"):
+                        fts_scores[row.id] = row.score
+                # enhance metadata with raw scores
+                enhanced_results = []
+                for msg_dict, rrf_score, metadata in results_with_metadata:
+                    msg_id = msg_dict["id"]
+                    if msg_id in vector_scores:
+                        metadata["vector_score"] = vector_scores[msg_id]
+                    if msg_id in fts_scores:
+                        metadata["fts_score"] = fts_scores[msg_id]
+                    enhanced_results.append((msg_dict, rrf_score, metadata))
+                return enhanced_results
+            else:
+                # for single queries (vector or fts)
+                results = self._process_message_query_results(result)
+                results_with_metadata = []
+                for idx, msg_dict in enumerate(results):
+                    metadata = {
+                        "combined_score": 1.0 / (idx + 1),
+                        "search_mode": search_mode,
+                        f"{search_mode}_rank": idx + 1,
+                    }
+                    # add raw score if available
+                    if hasattr(result.rows[idx], "dist"):
+                        metadata["vector_score"] = result.rows[idx].dist
+                    elif hasattr(result.rows[idx], "score"):
+                        metadata["fts_score"] = result.rows[idx].score
+                    results_with_metadata.append((msg_dict, metadata["combined_score"], metadata))
+                return results_with_metadata
+        except Exception as e:
+            logger.error(f"Failed to query messages from Turbopuffer: {e}")
+            raise
     def _process_message_query_results(self, result) -> List[dict]:
         """Process results from a message query into message dicts.
@@ -662,7 +929,7 @@ class TurbopufferClient:
                 tags=passage_tags,  # Set the actual tags from the passage
                 # Set required fields to empty/default values since we don't store embeddings
                 embedding=[],  # Empty embedding since we don't return it from Turbopuffer
-                embedding_config=None,  # No embedding config needed for retrieved passages
+                embedding_config=self.default_embedding_config,  # No embedding config needed for retrieved passages
             )
             # handle score based on search type
@@ -815,7 +1082,7 @@ class TurbopufferClient:
         if not message_ids:
             return True
-        namespace_name = await self._get_message_namespace_name(agent_id, organization_id)
+        namespace_name = await self._get_message_namespace_name(organization_id)
         try:
             async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
@@ -833,7 +1100,7 @@ class TurbopufferClient:
         """Delete all messages for an agent from Turbopuffer."""
         from turbopuffer import AsyncTurbopuffer
-        namespace_name = await self._get_message_namespace_name(agent_id, organization_id)
+        namespace_name = await self._get_message_namespace_name(organization_id)
         try:
             async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
@@ -846,3 +1113,309 @@ class TurbopufferClient:
         except Exception as e:
             logger.error(f"Failed to delete all messages from Turbopuffer: {e}")
             raise
+    # file/source passage methods
+    @trace_method
+    async def _get_file_passages_namespace_name(self, organization_id: str) -> str:
+        """Get namespace name for file passages (org-scoped).
+        Args:
+            organization_id: Organization ID for namespace generation
+        Returns:
+            The org-scoped namespace name for file passages
+        """
+        environment = settings.environment
+        if environment:
+            namespace_name = f"file_passages_{organization_id}_{environment.lower()}"
+        else:
+            namespace_name = f"file_passages_{organization_id}"
+        return namespace_name
+    @trace_method
+    async def insert_file_passages(
+        self,
+        source_id: str,
+        file_id: str,
+        text_chunks: List[str],
+        organization_id: str,
+        actor: "PydanticUser",
+        created_at: Optional[datetime] = None,
+    ) -> List[PydanticPassage]:
+        """Insert file passages into Turbopuffer using org-scoped namespace.
+        Args:
+            source_id: ID of the source containing the file
+            file_id: ID of the file
+            text_chunks: List of text chunks to store
+            organization_id: Organization ID for the passages
+            actor: User actor for embedding generation
+            created_at: Optional timestamp for retroactive entries (defaults to current UTC time)
+        Returns:
+            List of PydanticPassage objects that were inserted
+        """
+        from turbopuffer import AsyncTurbopuffer
+        if not text_chunks:
+            return []
+        # generate embeddings using the default config
+        embeddings = await self._generate_embeddings(text_chunks, actor)
+        namespace_name = await self._get_file_passages_namespace_name(organization_id)
+        # handle timestamp - ensure UTC
+        if created_at is None:
+            timestamp = datetime.now(timezone.utc)
+        else:
+            # ensure the provided timestamp is timezone-aware and in UTC
+            if created_at.tzinfo is None:
+                # assume UTC if no timezone provided
+                timestamp = created_at.replace(tzinfo=timezone.utc)
+            else:
+                # convert to UTC if in different timezone
+                timestamp = created_at.astimezone(timezone.utc)
+        # prepare column-based data for turbopuffer - optimized for batch insert
+        ids = []
+        vectors = []
+        texts = []
+        organization_ids = []
+        source_ids = []
+        file_ids = []
+        created_ats = []
+        passages = []
+        for idx, (text, embedding) in enumerate(zip(text_chunks, embeddings)):
+            passage = PydanticPassage(
+                text=text,
+                file_id=file_id,
+                source_id=source_id,
+                embedding=embedding,
+                embedding_config=self.default_embedding_config,
+                organization_id=actor.organization_id,
+            )
+            passages.append(passage)
+            # append to columns
+            ids.append(passage.id)
+            vectors.append(embedding)
+            texts.append(text)
+            organization_ids.append(organization_id)
+            source_ids.append(source_id)
+            file_ids.append(file_id)
+            created_ats.append(timestamp)
+        # build column-based upsert data
+        upsert_columns = {
+            "id": ids,
+            "vector": vectors,
+            "text": texts,
+            "organization_id": organization_ids,
+            "source_id": source_ids,
+            "file_id": file_ids,
+            "created_at": created_ats,
+        }
+        try:
+            # use AsyncTurbopuffer as a context manager for proper resource cleanup
+            async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
+                namespace = client.namespace(namespace_name)
+                # turbopuffer recommends column-based writes for performance
+                await namespace.write(
+                    upsert_columns=upsert_columns,
+                    distance_metric="cosine_distance",
+                    schema={"text": {"type": "string", "full_text_search": True}},
+                )
+                logger.info(f"Successfully inserted {len(ids)} file passages to Turbopuffer for source {source_id}, file {file_id}")
+                return passages
+        except Exception as e:
+            logger.error(f"Failed to insert file passages to Turbopuffer: {e}")
+            # check if it's a duplicate ID error
+            if "duplicate" in str(e).lower():
+                logger.error("Duplicate passage IDs detected in batch")
+            raise
+    @trace_method
+    async def query_file_passages(
+        self,
+        source_ids: List[str],
+        organization_id: str,
+        actor: "PydanticUser",
+        query_text: Optional[str] = None,
+        search_mode: str = "vector",  # "vector", "fts", "hybrid"
+        top_k: int = 10,
+        file_id: Optional[str] = None,  # optional filter by specific file
+        vector_weight: float = 0.5,
+        fts_weight: float = 0.5,
+    ) -> List[Tuple[PydanticPassage, float, dict]]:
+        """Query file passages from Turbopuffer using org-scoped namespace.
+        Args:
+            source_ids: List of source IDs to query
+            organization_id: Organization ID for namespace lookup
+            actor: User actor for embedding generation
+            query_text: Text query for search
+            search_mode: Search mode - "vector", "fts", or "hybrid" (default: "vector")
+            top_k: Number of results to return
+            file_id: Optional file ID to filter results to a specific file
+            vector_weight: Weight for vector search results in hybrid mode (default: 0.5)
+            fts_weight: Weight for FTS results in hybrid mode (default: 0.5)
+        Returns:
+            List of (passage, score, metadata) tuples with relevance rankings
+        """
+        # generate embedding for vector/hybrid search if query_text is provided
+        query_embedding = None
+        if query_text and search_mode in ["vector", "hybrid"]:
+            embeddings = await self._generate_embeddings([query_text], actor)
+            query_embedding = embeddings[0]
+        # check if we should fallback to timestamp-based retrieval
+        if query_embedding is None and query_text is None and search_mode not in ["timestamp"]:
+            # fallback to retrieving most recent passages when no search query is provided
+            search_mode = "timestamp"
+        namespace_name = await self._get_file_passages_namespace_name(organization_id)
+        # build filters - always filter by source_ids
+        if len(source_ids) == 1:
+            # single source_id, use Eq for efficiency
+            filters = [("source_id", "Eq", source_ids[0])]
+        else:
+            # multiple source_ids, use In operator
+            filters = [("source_id", "In", source_ids)]
+        # add file filter if specified
+        if file_id:
+            filters.append(("file_id", "Eq", file_id))
+        # combine filters
+        final_filter = filters[0] if len(filters) == 1 else ("And", filters)
+        try:
+            # use generic query executor
+            result = await self._execute_query(
+                namespace_name=namespace_name,
+                search_mode=search_mode,
+                query_embedding=query_embedding,
+                query_text=query_text,
+                top_k=top_k,
+                include_attributes=["text", "organization_id", "source_id", "file_id", "created_at"],
+                filters=final_filter,
+                vector_weight=vector_weight,
+                fts_weight=fts_weight,
+            )
+            # process results based on search mode
+            if search_mode == "hybrid":
+                # for hybrid mode, we get a multi-query response
+                vector_results = self._process_file_query_results(result.results[0])
+                fts_results = self._process_file_query_results(result.results[1], is_fts=True)
+                # use RRF and include metadata with ranks
+                results_with_metadata = self._reciprocal_rank_fusion(
+                    vector_results=[passage for passage, _ in vector_results],
+                    fts_results=[passage for passage, _ in fts_results],
+                    get_id_func=lambda p: p.id,
+                    vector_weight=vector_weight,
+                    fts_weight=fts_weight,
+                    top_k=top_k,
+                )
+                return results_with_metadata
+            else:
+                # for single queries (vector, fts, timestamp) - add basic metadata
+                is_fts = search_mode == "fts"
+                results = self._process_file_query_results(result, is_fts=is_fts)
+                # add simple metadata for single search modes
+                results_with_metadata = []
+                for idx, (passage, score) in enumerate(results):
+                    metadata = {
+                        "combined_score": score,
+                        f"{search_mode}_rank": idx + 1,  # add the rank for this search mode
+                    }
+                    results_with_metadata.append((passage, score, metadata))
+                return results_with_metadata
+        except Exception as e:
+            logger.error(f"Failed to query file passages from Turbopuffer: {e}")
+            raise
+    def _process_file_query_results(self, result, is_fts: bool = False) -> List[Tuple[PydanticPassage, float]]:
+        """Process results from a file query into passage objects with scores."""
+        passages_with_scores = []
+        for row in result.rows:
+            # build metadata
+            metadata = {}
+            # create a passage with minimal fields - embeddings are not returned from Turbopuffer
+            passage = PydanticPassage(
+                id=row.id,
+                text=getattr(row, "text", ""),
+                organization_id=getattr(row, "organization_id", None),
+                source_id=getattr(row, "source_id", None),  # get source_id from the row
+                file_id=getattr(row, "file_id", None),
+                created_at=getattr(row, "created_at", None),
+                metadata_=metadata,
+                tags=[],
+                # set required fields to empty/default values since we don't store embeddings
+                embedding=[],  # empty embedding since we don't return it from Turbopuffer
+                embedding_config=self.default_embedding_config,
+            )
+            # handle score based on search type
+            if is_fts:
+                # for FTS, use the BM25 score directly (higher is better)
+                score = getattr(row, "$score", 0.0)
+            else:
+                # for vector search, convert distance to similarity score
+                distance = getattr(row, "$dist", 0.0)
+                score = 1.0 - distance
+            passages_with_scores.append((passage, score))
+        return passages_with_scores
+    @trace_method
+    async def delete_file_passages(self, source_id: str, file_id: str, organization_id: str) -> bool:
+        """Delete all passages for a specific file from Turbopuffer."""
+        from turbopuffer import AsyncTurbopuffer
+        namespace_name = await self._get_file_passages_namespace_name(organization_id)
+        try:
+            async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
+                namespace = client.namespace(namespace_name)
+                # use delete_by_filter to only delete passages for this file
+                # need to filter by both source_id and file_id
+                filter_expr = ("And", [("source_id", "Eq", source_id), ("file_id", "Eq", file_id)])
+                result = await namespace.write(delete_by_filter=filter_expr)
+                logger.info(
+                    f"Successfully deleted passages for file {file_id} from source {source_id} (deleted {result.rows_affected} rows)"
+                )
+                return True
+        except Exception as e:
+            logger.error(f"Failed to delete file passages from Turbopuffer: {e}")
+            raise
+    @trace_method
+    async def delete_source_passages(self, source_id: str, organization_id: str) -> bool:
+        """Delete all passages for a source from Turbopuffer."""
+        from turbopuffer import AsyncTurbopuffer
+        namespace_name = await self._get_file_passages_namespace_name(organization_id)
+        try:
+            async with AsyncTurbopuffer(api_key=self.api_key, region=self.region) as client:
+                namespace = client.namespace(namespace_name)
+                # delete all passages for this source
+                result = await namespace.write(delete_by_filter=("source_id", "Eq", source_id))
+                logger.info(f"Successfully deleted all passages for source {source_id} (deleted {result.rows_affected} rows)")
+                return True
+        except Exception as e:
+            logger.error(f"Failed to delete source passages from Turbopuffer: {e}")
+            raise

letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

letta-nightly 0.11.7.dev20250909104137py3-none-any.whl → 0.11.7.dev20250911104039py3-none-any.whl