letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +81 -0
- letta/adapters/letta_llm_request_adapter.py +113 -0
- letta/adapters/letta_llm_stream_adapter.py +171 -0
- letta/agents/agent_loop.py +23 -0
- letta/agents/base_agent.py +4 -1
- letta/agents/base_agent_v2.py +68 -0
- letta/agents/helpers.py +3 -5
- letta/agents/letta_agent.py +23 -12
- letta/agents/letta_agent_v2.py +1221 -0
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +1 -1
- letta/errors.py +12 -0
- letta/functions/function_sets/base.py +53 -12
- letta/functions/helpers.py +3 -2
- letta/functions/schema_generator.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +4 -2
- letta/groups/sleeptime_multi_agent_v3.py +233 -0
- letta/helpers/tool_rule_solver.py +4 -0
- letta/helpers/tpuf_client.py +607 -34
- letta/interfaces/anthropic_streaming_interface.py +74 -30
- letta/interfaces/openai_streaming_interface.py +80 -37
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/openai_client.py +45 -4
- letta/orm/agent.py +4 -1
- letta/orm/block.py +2 -0
- letta/orm/blocks_agents.py +1 -0
- letta/orm/group.py +1 -0
- letta/orm/source.py +8 -1
- letta/orm/sources_agents.py +2 -1
- letta/orm/step_metrics.py +10 -0
- letta/orm/tools_agents.py +5 -2
- letta/schemas/block.py +4 -0
- letta/schemas/enums.py +1 -0
- letta/schemas/group.py +8 -0
- letta/schemas/letta_message.py +1 -1
- letta/schemas/letta_request.py +2 -2
- letta/schemas/mcp.py +9 -1
- letta/schemas/message.py +42 -2
- letta/schemas/providers/ollama.py +1 -1
- letta/schemas/providers.py +1 -2
- letta/schemas/source.py +6 -0
- letta/schemas/step_metrics.py +2 -0
- letta/server/rest_api/interface.py +34 -2
- letta/server/rest_api/json_parser.py +2 -0
- letta/server/rest_api/redis_stream_manager.py +2 -1
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +132 -170
- letta/server/rest_api/routers/v1/blocks.py +6 -0
- letta/server/rest_api/routers/v1/folders.py +25 -7
- letta/server/rest_api/routers/v1/groups.py +6 -0
- letta/server/rest_api/routers/v1/internal_templates.py +218 -12
- letta/server/rest_api/routers/v1/messages.py +14 -19
- letta/server/rest_api/routers/v1/runs.py +43 -28
- letta/server/rest_api/routers/v1/sources.py +25 -7
- letta/server/rest_api/routers/v1/tools.py +42 -0
- letta/server/rest_api/streaming_response.py +11 -2
- letta/server/server.py +9 -6
- letta/services/agent_manager.py +39 -59
- letta/services/agent_serialization_manager.py +26 -11
- letta/services/archive_manager.py +60 -9
- letta/services/block_manager.py +5 -0
- letta/services/file_processor/embedder/base_embedder.py +5 -0
- letta/services/file_processor/embedder/openai_embedder.py +4 -0
- letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
- letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
- letta/services/file_processor/file_processor.py +9 -7
- letta/services/group_manager.py +74 -11
- letta/services/mcp_manager.py +134 -28
- letta/services/message_manager.py +229 -125
- letta/services/passage_manager.py +2 -1
- letta/services/source_manager.py +23 -1
- letta/services/summarizer/summarizer.py +4 -1
- letta/services/tool_executor/core_tool_executor.py +2 -120
- letta/services/tool_executor/files_tool_executor.py +133 -8
- letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
- letta/services/tool_sandbox/local_sandbox.py +2 -2
- letta/services/tool_sandbox/modal_version_manager.py +2 -1
- letta/settings.py +6 -0
- letta/streaming_utils.py +29 -4
- letta/utils.py +106 -4
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
letta/services/agent_manager.py
CHANGED
@@ -720,7 +720,7 @@ class AgentManager:
|
|
720
720
|
# Only create messages if we initialized with messages
|
721
721
|
if not _init_with_no_messages:
|
722
722
|
await self.message_manager.create_many_messages_async(
|
723
|
-
pydantic_msgs=init_messages, actor=actor,
|
723
|
+
pydantic_msgs=init_messages, actor=actor, project_id=result.project_id, template_id=result.template_id
|
724
724
|
)
|
725
725
|
return result
|
726
726
|
|
@@ -1834,6 +1834,7 @@ class AgentManager:
|
|
1834
1834
|
message_id=curr_system_message.id,
|
1835
1835
|
message_update=MessageUpdate(**temp_message.model_dump()),
|
1836
1836
|
actor=actor,
|
1837
|
+
project_id=agent_state.project_id,
|
1837
1838
|
)
|
1838
1839
|
else:
|
1839
1840
|
curr_system_message = temp_message
|
@@ -1887,7 +1888,9 @@ class AgentManager:
|
|
1887
1888
|
self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser
|
1888
1889
|
) -> PydanticAgentState:
|
1889
1890
|
agent = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
1890
|
-
messages = await self.message_manager.create_many_messages_async(
|
1891
|
+
messages = await self.message_manager.create_many_messages_async(
|
1892
|
+
messages, actor=actor, project_id=agent.project_id, template_id=agent.template_id
|
1893
|
+
)
|
1891
1894
|
message_ids = agent.message_ids or []
|
1892
1895
|
message_ids += [m.id for m in messages]
|
1893
1896
|
return await self.set_in_context_messages_async(agent_id=agent_id, message_ids=message_ids, actor=actor)
|
@@ -2655,7 +2658,7 @@ class AgentManager:
|
|
2655
2658
|
embedding_config: Optional[EmbeddingConfig] = None,
|
2656
2659
|
tags: Optional[List[str]] = None,
|
2657
2660
|
tag_match_mode: Optional[TagMatchMode] = None,
|
2658
|
-
) -> List[PydanticPassage]:
|
2661
|
+
) -> List[Tuple[PydanticPassage, float, dict]]:
|
2659
2662
|
"""Lists all passages attached to an agent."""
|
2660
2663
|
# Check if we should use Turbopuffer for vector search
|
2661
2664
|
if embed_query and agent_id and query_text and embedding_config:
|
@@ -2688,7 +2691,6 @@ class AgentManager:
|
|
2688
2691
|
# use hybrid search to combine vector and full-text search
|
2689
2692
|
passages_with_scores = await tpuf_client.query_passages(
|
2690
2693
|
archive_id=archive_ids[0],
|
2691
|
-
query_embedding=query_embedding,
|
2692
2694
|
query_text=query_text, # pass text for potential hybrid search
|
2693
2695
|
search_mode="hybrid", # use hybrid mode for better results
|
2694
2696
|
top_k=limit,
|
@@ -2696,10 +2698,11 @@ class AgentManager:
|
|
2696
2698
|
tag_match_mode=tag_match_mode or TagMatchMode.ANY,
|
2697
2699
|
start_date=start_date,
|
2698
2700
|
end_date=end_date,
|
2701
|
+
actor=actor,
|
2699
2702
|
)
|
2700
2703
|
|
2701
|
-
# Return
|
2702
|
-
return
|
2704
|
+
# Return full tuples with metadata
|
2705
|
+
return passages_with_scores
|
2703
2706
|
else:
|
2704
2707
|
return []
|
2705
2708
|
|
@@ -2750,9 +2753,11 @@ class AgentManager:
|
|
2750
2753
|
if query_tags.intersection(passage_tags):
|
2751
2754
|
filtered_passages.append(passage)
|
2752
2755
|
|
2753
|
-
|
2756
|
+
# Return as tuples with empty metadata for SQL path
|
2757
|
+
return [(p, 0.0, {}) for p in filtered_passages]
|
2754
2758
|
|
2755
|
-
|
2759
|
+
# Return as tuples with empty metadata for SQL path
|
2760
|
+
return [(p, 0.0, {}) for p in pydantic_passages]
|
2756
2761
|
|
2757
2762
|
@enforce_types
|
2758
2763
|
@trace_method
|
@@ -2766,7 +2771,7 @@ class AgentManager:
|
|
2766
2771
|
top_k: Optional[int] = None,
|
2767
2772
|
start_datetime: Optional[str] = None,
|
2768
2773
|
end_datetime: Optional[str] = None,
|
2769
|
-
) ->
|
2774
|
+
) -> List[Dict[str, Any]]:
|
2770
2775
|
"""
|
2771
2776
|
Search archival memory using semantic (embedding-based) search with optional temporal filtering.
|
2772
2777
|
|
@@ -2783,11 +2788,11 @@ class AgentManager:
|
|
2783
2788
|
end_datetime: Filter results before this datetime (ISO 8601 format)
|
2784
2789
|
|
2785
2790
|
Returns:
|
2786
|
-
|
2791
|
+
List of formatted results with relevance metadata
|
2787
2792
|
"""
|
2788
2793
|
# Handle empty or whitespace-only queries
|
2789
2794
|
if not query or not query.strip():
|
2790
|
-
return []
|
2795
|
+
return []
|
2791
2796
|
|
2792
2797
|
# Get the agent to access timezone and embedding config
|
2793
2798
|
agent_state = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
@@ -2839,7 +2844,7 @@ class AgentManager:
|
|
2839
2844
|
|
2840
2845
|
# Get results using existing passage query method
|
2841
2846
|
limit = top_k if top_k is not None else RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
|
2842
|
-
|
2847
|
+
passages_with_metadata = await self.query_agent_passages_async(
|
2843
2848
|
actor=actor,
|
2844
2849
|
agent_id=agent_id,
|
2845
2850
|
query_text=query,
|
@@ -2852,11 +2857,11 @@ class AgentManager:
|
|
2852
2857
|
end_date=end_date,
|
2853
2858
|
)
|
2854
2859
|
|
2855
|
-
# Format results to include tags with friendly timestamps
|
2860
|
+
# Format results to include tags with friendly timestamps and relevance metadata
|
2856
2861
|
formatted_results = []
|
2857
|
-
for
|
2862
|
+
for passage, score, metadata in passages_with_metadata:
|
2858
2863
|
# Format timestamp in agent's timezone if available
|
2859
|
-
timestamp =
|
2864
|
+
timestamp = passage.created_at
|
2860
2865
|
if timestamp and agent_state.timezone:
|
2861
2866
|
try:
|
2862
2867
|
# Convert to agent's timezone
|
@@ -2871,9 +2876,26 @@ class AgentManager:
|
|
2871
2876
|
# Use ISO format if no timezone is set
|
2872
2877
|
formatted_timestamp = str(timestamp) if timestamp else "Unknown"
|
2873
2878
|
|
2874
|
-
|
2879
|
+
result_dict = {"timestamp": formatted_timestamp, "content": passage.text, "tags": passage.tags or []}
|
2880
|
+
|
2881
|
+
# Add relevance metadata if available
|
2882
|
+
if metadata:
|
2883
|
+
relevance_info = {
|
2884
|
+
k: v
|
2885
|
+
for k, v in {
|
2886
|
+
"rrf_score": metadata.get("combined_score"),
|
2887
|
+
"vector_rank": metadata.get("vector_rank"),
|
2888
|
+
"fts_rank": metadata.get("fts_rank"),
|
2889
|
+
}.items()
|
2890
|
+
if v is not None
|
2891
|
+
}
|
2892
|
+
|
2893
|
+
if relevance_info: # Only add if we have metadata
|
2894
|
+
result_dict["relevance"] = relevance_info
|
2895
|
+
|
2896
|
+
formatted_results.append(result_dict)
|
2875
2897
|
|
2876
|
-
return formatted_results
|
2898
|
+
return formatted_results
|
2877
2899
|
|
2878
2900
|
@enforce_types
|
2879
2901
|
@trace_method
|
@@ -3698,45 +3720,3 @@ class AgentManager:
|
|
3698
3720
|
num_archival_memories=num_archival_memories,
|
3699
3721
|
num_messages=num_messages,
|
3700
3722
|
)
|
3701
|
-
|
3702
|
-
async def get_or_set_vector_db_namespace_async(
|
3703
|
-
self,
|
3704
|
-
agent_id: str,
|
3705
|
-
organization_id: str,
|
3706
|
-
) -> str:
|
3707
|
-
"""Get the vector database namespace for an agent, creating it if it doesn't exist.
|
3708
|
-
|
3709
|
-
Args:
|
3710
|
-
agent_id: Agent ID to check/store namespace
|
3711
|
-
organization_id: Organization ID for namespace generation
|
3712
|
-
|
3713
|
-
Returns:
|
3714
|
-
The org-scoped namespace name
|
3715
|
-
"""
|
3716
|
-
from sqlalchemy import update
|
3717
|
-
|
3718
|
-
from letta.settings import settings
|
3719
|
-
|
3720
|
-
async with db_registry.async_session() as session:
|
3721
|
-
# check if namespace already exists
|
3722
|
-
result = await session.execute(select(AgentModel._vector_db_namespace).where(AgentModel.id == agent_id))
|
3723
|
-
row = result.fetchone()
|
3724
|
-
|
3725
|
-
if row and row[0]:
|
3726
|
-
return row[0]
|
3727
|
-
|
3728
|
-
# TODO: In the future, we might use agent_id for sharding the namespace
|
3729
|
-
# For now, all messages in an org share the same namespace
|
3730
|
-
|
3731
|
-
# generate org-scoped namespace name
|
3732
|
-
environment = settings.environment
|
3733
|
-
if environment:
|
3734
|
-
namespace_name = f"messages_{organization_id}_{environment.lower()}"
|
3735
|
-
else:
|
3736
|
-
namespace_name = f"messages_{organization_id}"
|
3737
|
-
|
3738
|
-
# update the agent with the namespace (keeps agent-level tracking for future sharding)
|
3739
|
-
await session.execute(update(AgentModel).where(AgentModel.id == agent_id).values(_vector_db_namespace=namespace_name))
|
3740
|
-
await session.commit()
|
3741
|
-
|
3742
|
-
return namespace_name
|
@@ -12,6 +12,7 @@ from letta.errors import (
|
|
12
12
|
AgentNotFoundForExportError,
|
13
13
|
)
|
14
14
|
from letta.helpers.pinecone_utils import should_use_pinecone
|
15
|
+
from letta.helpers.tpuf_client import should_use_tpuf
|
15
16
|
from letta.log import get_logger
|
16
17
|
from letta.schemas.agent import AgentState, CreateAgent
|
17
18
|
from letta.schemas.agent_file import (
|
@@ -29,7 +30,7 @@ from letta.schemas.agent_file import (
|
|
29
30
|
)
|
30
31
|
from letta.schemas.block import Block
|
31
32
|
from letta.schemas.embedding_config import EmbeddingConfig
|
32
|
-
from letta.schemas.enums import FileProcessingStatus
|
33
|
+
from letta.schemas.enums import FileProcessingStatus, VectorDBProvider
|
33
34
|
from letta.schemas.file import FileMetadata
|
34
35
|
from letta.schemas.group import Group, GroupCreate
|
35
36
|
from letta.schemas.mcp import MCPServer
|
@@ -52,7 +53,7 @@ from letta.services.message_manager import MessageManager
|
|
52
53
|
from letta.services.source_manager import SourceManager
|
53
54
|
from letta.services.tool_manager import ToolManager
|
54
55
|
from letta.settings import settings
|
55
|
-
from letta.utils import get_latest_alembic_revision
|
56
|
+
from letta.utils import get_latest_alembic_revision, safe_create_task
|
56
57
|
|
57
58
|
logger = get_logger(__name__)
|
58
59
|
|
@@ -90,7 +91,6 @@ class AgentSerializationManager:
|
|
90
91
|
self.file_agent_manager = file_agent_manager
|
91
92
|
self.message_manager = message_manager
|
92
93
|
self.file_parser = MistralFileParser() if settings.mistral_api_key else MarkitdownFileParser()
|
93
|
-
self.using_pinecone = should_use_pinecone()
|
94
94
|
|
95
95
|
# ID mapping state for export
|
96
96
|
self._db_to_file_ids: Dict[str, str] = {}
|
@@ -208,6 +208,10 @@ class AgentSerializationManager:
|
|
208
208
|
)
|
209
209
|
agent_schema.id = agent_file_id
|
210
210
|
|
211
|
+
# wipe the values of tool_exec_environment_variables (they contain secrets)
|
212
|
+
if agent_schema.tool_exec_environment_variables:
|
213
|
+
agent_schema.tool_exec_environment_variables = {key: "" for key in agent_schema.tool_exec_environment_variables}
|
214
|
+
|
211
215
|
if agent_schema.messages:
|
212
216
|
for message in agent_schema.messages:
|
213
217
|
message_file_id = self._map_db_to_file_id(message.id, MessageSchema.__id_prefix__)
|
@@ -588,7 +592,12 @@ class AgentSerializationManager:
|
|
588
592
|
if schema.files and any(f.content for f in schema.files):
|
589
593
|
# Use override embedding config if provided, otherwise use agent's config
|
590
594
|
embedder_config = override_embedding_config if override_embedding_config else schema.agents[0].embedding_config
|
591
|
-
|
595
|
+
# determine which embedder to use - turbopuffer takes precedence
|
596
|
+
if should_use_tpuf():
|
597
|
+
from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
|
598
|
+
|
599
|
+
embedder = TurbopufferEmbedder(embedding_config=embedder_config)
|
600
|
+
elif should_use_pinecone():
|
592
601
|
embedder = PineconeEmbedder(embedding_config=embedder_config)
|
593
602
|
else:
|
594
603
|
embedder = OpenAIEmbedder(embedding_config=embedder_config)
|
@@ -596,7 +605,6 @@ class AgentSerializationManager:
|
|
596
605
|
file_parser=self.file_parser,
|
597
606
|
embedder=embedder,
|
598
607
|
actor=actor,
|
599
|
-
using_pinecone=self.using_pinecone,
|
600
608
|
)
|
601
609
|
|
602
610
|
for file_schema in schema.files:
|
@@ -614,10 +622,11 @@ class AgentSerializationManager:
|
|
614
622
|
|
615
623
|
# Create background task for file processing
|
616
624
|
# TODO: This can be moved to celery or RQ or something
|
617
|
-
task =
|
625
|
+
task = safe_create_task(
|
618
626
|
self._process_file_async(
|
619
627
|
file_metadata=file_metadata, source_id=source_db_id, file_processor=file_processor, actor=actor
|
620
|
-
)
|
628
|
+
),
|
629
|
+
label=f"process_file_{file_metadata.file_name}",
|
621
630
|
)
|
622
631
|
background_tasks.append(task)
|
623
632
|
logger.info(f"Started background processing for file {file_metadata.file_name} (ID: {file_db_id})")
|
@@ -646,9 +655,10 @@ class AgentSerializationManager:
|
|
646
655
|
if agent_data.get("source_ids"):
|
647
656
|
agent_data["source_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["source_ids"]]
|
648
657
|
|
649
|
-
if env_vars:
|
650
|
-
|
651
|
-
|
658
|
+
if env_vars and agent_data.get("tool_exec_environment_variables"):
|
659
|
+
# update environment variable values from the provided env_vars dict
|
660
|
+
for key in agent_data["tool_exec_environment_variables"]:
|
661
|
+
agent_data["tool_exec_environment_variables"][key] = env_vars.get(key, "")
|
652
662
|
|
653
663
|
# Override project_id if provided
|
654
664
|
if project_id:
|
@@ -675,7 +685,12 @@ class AgentSerializationManager:
|
|
675
685
|
# Map file ID to the generated database ID immediately
|
676
686
|
message_file_to_db_ids[message_schema.id] = message_obj.id
|
677
687
|
|
678
|
-
created_messages = await self.message_manager.create_many_messages_async(
|
688
|
+
created_messages = await self.message_manager.create_many_messages_async(
|
689
|
+
pydantic_msgs=messages,
|
690
|
+
actor=actor,
|
691
|
+
project_id=created_agent.project_id,
|
692
|
+
template_id=created_agent.template_id,
|
693
|
+
)
|
679
694
|
imported_count += len(created_messages)
|
680
695
|
|
681
696
|
# Remap in_context_message_ids from file IDs to database IDs
|
@@ -5,6 +5,7 @@ from sqlalchemy import select
|
|
5
5
|
from letta.helpers.tpuf_client import should_use_tpuf
|
6
6
|
from letta.log import get_logger
|
7
7
|
from letta.orm import ArchivalPassage, Archive as ArchiveModel, ArchivesAgents
|
8
|
+
from letta.otel.tracing import trace_method
|
8
9
|
from letta.schemas.archive import Archive as PydanticArchive
|
9
10
|
from letta.schemas.enums import VectorDBProvider
|
10
11
|
from letta.schemas.user import User as PydanticUser
|
@@ -19,6 +20,7 @@ class ArchiveManager:
|
|
19
20
|
"""Manager class to handle business logic related to Archives."""
|
20
21
|
|
21
22
|
@enforce_types
|
23
|
+
@trace_method
|
22
24
|
def create_archive(
|
23
25
|
self,
|
24
26
|
name: str,
|
@@ -44,6 +46,7 @@ class ArchiveManager:
|
|
44
46
|
raise
|
45
47
|
|
46
48
|
@enforce_types
|
49
|
+
@trace_method
|
47
50
|
async def create_archive_async(
|
48
51
|
self,
|
49
52
|
name: str,
|
@@ -69,6 +72,7 @@ class ArchiveManager:
|
|
69
72
|
raise
|
70
73
|
|
71
74
|
@enforce_types
|
75
|
+
@trace_method
|
72
76
|
async def get_archive_by_id_async(
|
73
77
|
self,
|
74
78
|
archive_id: str,
|
@@ -84,6 +88,7 @@ class ArchiveManager:
|
|
84
88
|
return archive.to_pydantic()
|
85
89
|
|
86
90
|
@enforce_types
|
91
|
+
@trace_method
|
87
92
|
def attach_agent_to_archive(
|
88
93
|
self,
|
89
94
|
agent_id: str,
|
@@ -113,6 +118,7 @@ class ArchiveManager:
|
|
113
118
|
session.commit()
|
114
119
|
|
115
120
|
@enforce_types
|
121
|
+
@trace_method
|
116
122
|
async def attach_agent_to_archive_async(
|
117
123
|
self,
|
118
124
|
agent_id: str,
|
@@ -148,6 +154,7 @@ class ArchiveManager:
|
|
148
154
|
await session.commit()
|
149
155
|
|
150
156
|
@enforce_types
|
157
|
+
@trace_method
|
151
158
|
async def get_default_archive_for_agent_async(
|
152
159
|
self,
|
153
160
|
agent_id: str,
|
@@ -179,6 +186,24 @@ class ArchiveManager:
|
|
179
186
|
return None
|
180
187
|
|
181
188
|
@enforce_types
|
189
|
+
@trace_method
|
190
|
+
async def delete_archive_async(
|
191
|
+
self,
|
192
|
+
archive_id: str,
|
193
|
+
actor: PydanticUser = None,
|
194
|
+
) -> None:
|
195
|
+
"""Delete an archive permanently."""
|
196
|
+
async with db_registry.async_session() as session:
|
197
|
+
archive_model = await ArchiveModel.read_async(
|
198
|
+
db_session=session,
|
199
|
+
identifier=archive_id,
|
200
|
+
actor=actor,
|
201
|
+
)
|
202
|
+
await archive_model.hard_delete_async(session, actor=actor)
|
203
|
+
logger.info(f"Deleted archive {archive_id}")
|
204
|
+
|
205
|
+
@enforce_types
|
206
|
+
@trace_method
|
182
207
|
async def get_or_create_default_archive_for_agent_async(
|
183
208
|
self,
|
184
209
|
agent_id: str,
|
@@ -187,6 +212,8 @@ class ArchiveManager:
|
|
187
212
|
) -> PydanticArchive:
|
188
213
|
"""Get the agent's default archive, creating one if it doesn't exist."""
|
189
214
|
# First check if agent has any archives
|
215
|
+
from sqlalchemy.exc import IntegrityError
|
216
|
+
|
190
217
|
from letta.services.agent_manager import AgentManager
|
191
218
|
|
192
219
|
agent_manager = AgentManager()
|
@@ -215,17 +242,38 @@ class ArchiveManager:
|
|
215
242
|
actor=actor,
|
216
243
|
)
|
217
244
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
245
|
+
try:
|
246
|
+
# Attach the agent to the archive as owner
|
247
|
+
await self.attach_agent_to_archive_async(
|
248
|
+
agent_id=agent_id,
|
249
|
+
archive_id=archive.id,
|
250
|
+
is_owner=True,
|
251
|
+
actor=actor,
|
252
|
+
)
|
253
|
+
return archive
|
254
|
+
except IntegrityError:
|
255
|
+
# race condition: another concurrent request already created and attached an archive
|
256
|
+
# clean up the orphaned archive we just created
|
257
|
+
logger.info(f"Race condition detected for agent {agent_id}, cleaning up orphaned archive {archive.id}")
|
258
|
+
await self.delete_archive_async(archive_id=archive.id, actor=actor)
|
259
|
+
|
260
|
+
# fetch the existing archive that was created by the concurrent request
|
261
|
+
archive_ids = await agent_manager.get_agent_archive_ids_async(
|
262
|
+
agent_id=agent_id,
|
263
|
+
actor=actor,
|
264
|
+
)
|
265
|
+
if archive_ids:
|
266
|
+
archive = await self.get_archive_by_id_async(
|
267
|
+
archive_id=archive_ids[0],
|
268
|
+
actor=actor,
|
269
|
+
)
|
270
|
+
return archive
|
271
|
+
else:
|
272
|
+
# this shouldn't happen, but if it does, re-raise
|
273
|
+
raise
|
227
274
|
|
228
275
|
@enforce_types
|
276
|
+
@trace_method
|
229
277
|
def get_or_create_default_archive_for_agent(
|
230
278
|
self,
|
231
279
|
agent_id: str,
|
@@ -269,6 +317,7 @@ class ArchiveManager:
|
|
269
317
|
return archive_model.to_pydantic()
|
270
318
|
|
271
319
|
@enforce_types
|
320
|
+
@trace_method
|
272
321
|
async def get_agents_for_archive_async(
|
273
322
|
self,
|
274
323
|
archive_id: str,
|
@@ -280,6 +329,7 @@ class ArchiveManager:
|
|
280
329
|
return [row[0] for row in result.fetchall()]
|
281
330
|
|
282
331
|
@enforce_types
|
332
|
+
@trace_method
|
283
333
|
async def get_agent_from_passage_async(
|
284
334
|
self,
|
285
335
|
passage_id: str,
|
@@ -309,6 +359,7 @@ class ArchiveManager:
|
|
309
359
|
return agent_ids[0]
|
310
360
|
|
311
361
|
@enforce_types
|
362
|
+
@trace_method
|
312
363
|
async def get_or_set_vector_db_namespace_async(
|
313
364
|
self,
|
314
365
|
archive_id: str,
|
letta/services/block_manager.py
CHANGED
@@ -188,6 +188,7 @@ class BlockManager:
|
|
188
188
|
connected_to_agents_count_lt: Optional[int] = None,
|
189
189
|
connected_to_agents_count_eq: Optional[List[int]] = None,
|
190
190
|
ascending: bool = True,
|
191
|
+
show_hidden_blocks: Optional[bool] = None,
|
191
192
|
) -> List[PydanticBlock]:
|
192
193
|
"""Async version of get_blocks method. Retrieve blocks based on various optional filters."""
|
193
194
|
from sqlalchemy import select
|
@@ -228,6 +229,10 @@ class BlockManager:
|
|
228
229
|
if value_search:
|
229
230
|
query = query.where(BlockModel.value.ilike(f"%{value_search}%"))
|
230
231
|
|
232
|
+
# Apply hidden filter
|
233
|
+
if not show_hidden_blocks:
|
234
|
+
query = query.where((BlockModel.hidden.is_(None)) | (BlockModel.hidden == False))
|
235
|
+
|
231
236
|
needs_distinct = False
|
232
237
|
|
233
238
|
needs_agent_count_join = any(
|
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
|
|
2
2
|
from typing import List
|
3
3
|
|
4
4
|
from letta.log import get_logger
|
5
|
+
from letta.schemas.enums import VectorDBProvider
|
5
6
|
from letta.schemas.passage import Passage
|
6
7
|
from letta.schemas.user import User
|
7
8
|
|
@@ -11,6 +12,10 @@ logger = get_logger(__name__)
|
|
11
12
|
class BaseEmbedder(ABC):
|
12
13
|
"""Abstract base class for embedding generation"""
|
13
14
|
|
15
|
+
def __init__(self):
|
16
|
+
# Default to NATIVE, subclasses will override this
|
17
|
+
self.vector_db_type = VectorDBProvider.NATIVE
|
18
|
+
|
14
19
|
@abstractmethod
|
15
20
|
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
16
21
|
"""Generate embeddings for chunks with batching and concurrent processing"""
|
@@ -19,6 +19,10 @@ class OpenAIEmbedder(BaseEmbedder):
|
|
19
19
|
"""OpenAI-based embedding generation"""
|
20
20
|
|
21
21
|
def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
|
22
|
+
super().__init__()
|
23
|
+
# OpenAI embedder uses the native vector db (PostgreSQL)
|
24
|
+
# self.vector_db_type already set to VectorDBProvider.NATIVE by parent
|
25
|
+
|
22
26
|
self.default_embedding_config = (
|
23
27
|
EmbeddingConfig.default_config(model_name="text-embedding-3-small", provider="openai")
|
24
28
|
if model_settings.openai_api_key
|
@@ -4,6 +4,7 @@ from letta.helpers.pinecone_utils import upsert_file_records_to_pinecone_index
|
|
4
4
|
from letta.log import get_logger
|
5
5
|
from letta.otel.tracing import log_event, trace_method
|
6
6
|
from letta.schemas.embedding_config import EmbeddingConfig
|
7
|
+
from letta.schemas.enums import VectorDBProvider
|
7
8
|
from letta.schemas.passage import Passage
|
8
9
|
from letta.schemas.user import User
|
9
10
|
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
@@ -20,6 +21,10 @@ class PineconeEmbedder(BaseEmbedder):
|
|
20
21
|
"""Pinecone-based embedding generation"""
|
21
22
|
|
22
23
|
def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
|
24
|
+
super().__init__()
|
25
|
+
# set the vector db type for pinecone
|
26
|
+
self.vector_db_type = VectorDBProvider.PINECONE
|
27
|
+
|
23
28
|
if not PINECONE_AVAILABLE:
|
24
29
|
raise ImportError("Pinecone package is not installed. Install it with: pip install pinecone")
|
25
30
|
|
@@ -28,7 +33,6 @@ class PineconeEmbedder(BaseEmbedder):
|
|
28
33
|
embedding_config = EmbeddingConfig.default_config(provider="pinecone")
|
29
34
|
|
30
35
|
self.embedding_config = embedding_config
|
31
|
-
super().__init__()
|
32
36
|
|
33
37
|
@trace_method
|
34
38
|
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
@@ -0,0 +1,71 @@
|
|
1
|
+
from typing import List, Optional
|
2
|
+
|
3
|
+
from letta.helpers.tpuf_client import TurbopufferClient
|
4
|
+
from letta.log import get_logger
|
5
|
+
from letta.otel.tracing import log_event, trace_method
|
6
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
7
|
+
from letta.schemas.enums import VectorDBProvider
|
8
|
+
from letta.schemas.passage import Passage
|
9
|
+
from letta.schemas.user import User
|
10
|
+
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class TurbopufferEmbedder(BaseEmbedder):
|
16
|
+
"""Turbopuffer-based embedding generation and storage"""
|
17
|
+
|
18
|
+
def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
|
19
|
+
super().__init__()
|
20
|
+
# set the vector db type for turbopuffer
|
21
|
+
self.vector_db_type = VectorDBProvider.TPUF
|
22
|
+
# use the default embedding config from TurbopufferClient if not provided
|
23
|
+
self.embedding_config = embedding_config or TurbopufferClient.default_embedding_config
|
24
|
+
self.tpuf_client = TurbopufferClient()
|
25
|
+
|
26
|
+
@trace_method
|
27
|
+
async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
|
28
|
+
"""Generate embeddings and store in Turbopuffer, then return Passage objects"""
|
29
|
+
if not chunks:
|
30
|
+
return []
|
31
|
+
|
32
|
+
logger.info(f"Generating embeddings for {len(chunks)} chunks using Turbopuffer")
|
33
|
+
log_event(
|
34
|
+
"turbopuffer_embedder.generation_started",
|
35
|
+
{
|
36
|
+
"total_chunks": len(chunks),
|
37
|
+
"file_id": file_id,
|
38
|
+
"source_id": source_id,
|
39
|
+
"embedding_model": self.embedding_config.embedding_model,
|
40
|
+
},
|
41
|
+
)
|
42
|
+
|
43
|
+
try:
|
44
|
+
# insert passages to Turbopuffer - it will handle embedding generation internally
|
45
|
+
passages = await self.tpuf_client.insert_file_passages(
|
46
|
+
source_id=source_id,
|
47
|
+
file_id=file_id,
|
48
|
+
text_chunks=chunks,
|
49
|
+
organization_id=actor.organization_id,
|
50
|
+
actor=actor,
|
51
|
+
)
|
52
|
+
|
53
|
+
logger.info(f"Successfully generated and stored {len(passages)} passages in Turbopuffer")
|
54
|
+
log_event(
|
55
|
+
"turbopuffer_embedder.generation_completed",
|
56
|
+
{
|
57
|
+
"passages_created": len(passages),
|
58
|
+
"total_chunks_processed": len(chunks),
|
59
|
+
"file_id": file_id,
|
60
|
+
"source_id": source_id,
|
61
|
+
},
|
62
|
+
)
|
63
|
+
return passages
|
64
|
+
|
65
|
+
except Exception as e:
|
66
|
+
logger.error(f"Failed to generate embeddings with Turbopuffer: {str(e)}")
|
67
|
+
log_event(
|
68
|
+
"turbopuffer_embedder.generation_failed",
|
69
|
+
{"error": str(e), "error_type": type(e).__name__, "file_id": file_id, "source_id": source_id},
|
70
|
+
)
|
71
|
+
raise
|
@@ -6,7 +6,7 @@ from letta.log import get_logger
|
|
6
6
|
from letta.otel.context import get_ctx_attributes
|
7
7
|
from letta.otel.tracing import log_event, trace_method
|
8
8
|
from letta.schemas.agent import AgentState
|
9
|
-
from letta.schemas.enums import FileProcessingStatus
|
9
|
+
from letta.schemas.enums import FileProcessingStatus, VectorDBProvider
|
10
10
|
from letta.schemas.file import FileMetadata
|
11
11
|
from letta.schemas.passage import Passage
|
12
12
|
from letta.schemas.user import User
|
@@ -30,7 +30,6 @@ class FileProcessor:
|
|
30
30
|
file_parser: FileParser,
|
31
31
|
embedder: BaseEmbedder,
|
32
32
|
actor: User,
|
33
|
-
using_pinecone: bool,
|
34
33
|
max_file_size: int = 50 * 1024 * 1024, # 50MB default
|
35
34
|
):
|
36
35
|
self.file_parser = file_parser
|
@@ -42,7 +41,8 @@ class FileProcessor:
|
|
42
41
|
self.job_manager = JobManager()
|
43
42
|
self.agent_manager = AgentManager()
|
44
43
|
self.actor = actor
|
45
|
-
|
44
|
+
# get vector db type from the embedder
|
45
|
+
self.vector_db_type = embedder.vector_db_type
|
46
46
|
|
47
47
|
async def _chunk_and_embed_with_fallback(self, file_metadata: FileMetadata, ocr_response, source_id: str) -> List:
|
48
48
|
"""Chunk text and generate embeddings with fallback to default chunker if needed"""
|
@@ -218,7 +218,7 @@ class FileProcessor:
|
|
218
218
|
source_id=source_id,
|
219
219
|
)
|
220
220
|
|
221
|
-
if
|
221
|
+
if self.vector_db_type == VectorDBProvider.NATIVE:
|
222
222
|
all_passages = await self.passage_manager.create_many_source_passages_async(
|
223
223
|
passages=all_passages,
|
224
224
|
file_metadata=file_metadata,
|
@@ -241,7 +241,8 @@ class FileProcessor:
|
|
241
241
|
)
|
242
242
|
|
243
243
|
# update job status
|
244
|
-
|
244
|
+
# pinecone completes slowly, so gets updated later
|
245
|
+
if self.vector_db_type != VectorDBProvider.PINECONE:
|
245
246
|
await self.file_manager.update_file_status(
|
246
247
|
file_id=file_metadata.id,
|
247
248
|
actor=self.actor,
|
@@ -317,14 +318,15 @@ class FileProcessor:
|
|
317
318
|
)
|
318
319
|
|
319
320
|
# Create passages in database (unless using Pinecone)
|
320
|
-
if
|
321
|
+
if self.vector_db_type == VectorDBProvider.NATIVE:
|
321
322
|
all_passages = await self.passage_manager.create_many_source_passages_async(
|
322
323
|
passages=all_passages, file_metadata=file_metadata, actor=self.actor
|
323
324
|
)
|
324
325
|
log_event("file_processor.import_passages_created", {"filename": filename, "total_passages": len(all_passages)})
|
325
326
|
|
326
327
|
# Update file status to completed (valid transition from EMBEDDING)
|
327
|
-
|
328
|
+
# pinecone completes slowly, so gets updated later
|
329
|
+
if self.vector_db_type != VectorDBProvider.PINECONE:
|
328
330
|
await self.file_manager.update_file_status(
|
329
331
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
|
330
332
|
)
|