letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -720,7 +720,7 @@ class AgentManager:
720
720
  # Only create messages if we initialized with messages
721
721
  if not _init_with_no_messages:
722
722
  await self.message_manager.create_many_messages_async(
723
- pydantic_msgs=init_messages, actor=actor, embedding_config=result.embedding_config
723
+ pydantic_msgs=init_messages, actor=actor, project_id=result.project_id, template_id=result.template_id
724
724
  )
725
725
  return result
726
726
 
@@ -1834,6 +1834,7 @@ class AgentManager:
1834
1834
  message_id=curr_system_message.id,
1835
1835
  message_update=MessageUpdate(**temp_message.model_dump()),
1836
1836
  actor=actor,
1837
+ project_id=agent_state.project_id,
1837
1838
  )
1838
1839
  else:
1839
1840
  curr_system_message = temp_message
@@ -1887,7 +1888,9 @@ class AgentManager:
1887
1888
  self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser
1888
1889
  ) -> PydanticAgentState:
1889
1890
  agent = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
1890
- messages = await self.message_manager.create_many_messages_async(messages, actor=actor, embedding_config=agent.embedding_config)
1891
+ messages = await self.message_manager.create_many_messages_async(
1892
+ messages, actor=actor, project_id=agent.project_id, template_id=agent.template_id
1893
+ )
1891
1894
  message_ids = agent.message_ids or []
1892
1895
  message_ids += [m.id for m in messages]
1893
1896
  return await self.set_in_context_messages_async(agent_id=agent_id, message_ids=message_ids, actor=actor)
@@ -2655,7 +2658,7 @@ class AgentManager:
2655
2658
  embedding_config: Optional[EmbeddingConfig] = None,
2656
2659
  tags: Optional[List[str]] = None,
2657
2660
  tag_match_mode: Optional[TagMatchMode] = None,
2658
- ) -> List[PydanticPassage]:
2661
+ ) -> List[Tuple[PydanticPassage, float, dict]]:
2659
2662
  """Lists all passages attached to an agent."""
2660
2663
  # Check if we should use Turbopuffer for vector search
2661
2664
  if embed_query and agent_id and query_text and embedding_config:
@@ -2688,7 +2691,6 @@ class AgentManager:
2688
2691
  # use hybrid search to combine vector and full-text search
2689
2692
  passages_with_scores = await tpuf_client.query_passages(
2690
2693
  archive_id=archive_ids[0],
2691
- query_embedding=query_embedding,
2692
2694
  query_text=query_text, # pass text for potential hybrid search
2693
2695
  search_mode="hybrid", # use hybrid mode for better results
2694
2696
  top_k=limit,
@@ -2696,10 +2698,11 @@ class AgentManager:
2696
2698
  tag_match_mode=tag_match_mode or TagMatchMode.ANY,
2697
2699
  start_date=start_date,
2698
2700
  end_date=end_date,
2701
+ actor=actor,
2699
2702
  )
2700
2703
 
2701
- # Return just the passages (without scores)
2702
- return [passage for passage, _ in passages_with_scores]
2704
+ # Return full tuples with metadata
2705
+ return passages_with_scores
2703
2706
  else:
2704
2707
  return []
2705
2708
 
@@ -2750,9 +2753,11 @@ class AgentManager:
2750
2753
  if query_tags.intersection(passage_tags):
2751
2754
  filtered_passages.append(passage)
2752
2755
 
2753
- return filtered_passages
2756
+ # Return as tuples with empty metadata for SQL path
2757
+ return [(p, 0.0, {}) for p in filtered_passages]
2754
2758
 
2755
- return pydantic_passages
2759
+ # Return as tuples with empty metadata for SQL path
2760
+ return [(p, 0.0, {}) for p in pydantic_passages]
2756
2761
 
2757
2762
  @enforce_types
2758
2763
  @trace_method
@@ -2766,7 +2771,7 @@ class AgentManager:
2766
2771
  top_k: Optional[int] = None,
2767
2772
  start_datetime: Optional[str] = None,
2768
2773
  end_datetime: Optional[str] = None,
2769
- ) -> Tuple[List[Dict[str, Any]], int]:
2774
+ ) -> List[Dict[str, Any]]:
2770
2775
  """
2771
2776
  Search archival memory using semantic (embedding-based) search with optional temporal filtering.
2772
2777
 
@@ -2783,11 +2788,11 @@ class AgentManager:
2783
2788
  end_datetime: Filter results before this datetime (ISO 8601 format)
2784
2789
 
2785
2790
  Returns:
2786
- Tuple of (formatted_results, count)
2791
+ List of formatted results with relevance metadata
2787
2792
  """
2788
2793
  # Handle empty or whitespace-only queries
2789
2794
  if not query or not query.strip():
2790
- return [], 0
2795
+ return []
2791
2796
 
2792
2797
  # Get the agent to access timezone and embedding config
2793
2798
  agent_state = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
@@ -2839,7 +2844,7 @@ class AgentManager:
2839
2844
 
2840
2845
  # Get results using existing passage query method
2841
2846
  limit = top_k if top_k is not None else RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
2842
- all_results = await self.query_agent_passages_async(
2847
+ passages_with_metadata = await self.query_agent_passages_async(
2843
2848
  actor=actor,
2844
2849
  agent_id=agent_id,
2845
2850
  query_text=query,
@@ -2852,11 +2857,11 @@ class AgentManager:
2852
2857
  end_date=end_date,
2853
2858
  )
2854
2859
 
2855
- # Format results to include tags with friendly timestamps
2860
+ # Format results to include tags with friendly timestamps and relevance metadata
2856
2861
  formatted_results = []
2857
- for result in all_results:
2862
+ for passage, score, metadata in passages_with_metadata:
2858
2863
  # Format timestamp in agent's timezone if available
2859
- timestamp = result.created_at
2864
+ timestamp = passage.created_at
2860
2865
  if timestamp and agent_state.timezone:
2861
2866
  try:
2862
2867
  # Convert to agent's timezone
@@ -2871,9 +2876,26 @@ class AgentManager:
2871
2876
  # Use ISO format if no timezone is set
2872
2877
  formatted_timestamp = str(timestamp) if timestamp else "Unknown"
2873
2878
 
2874
- formatted_results.append({"timestamp": formatted_timestamp, "content": result.text, "tags": result.tags or []})
2879
+ result_dict = {"timestamp": formatted_timestamp, "content": passage.text, "tags": passage.tags or []}
2880
+
2881
+ # Add relevance metadata if available
2882
+ if metadata:
2883
+ relevance_info = {
2884
+ k: v
2885
+ for k, v in {
2886
+ "rrf_score": metadata.get("combined_score"),
2887
+ "vector_rank": metadata.get("vector_rank"),
2888
+ "fts_rank": metadata.get("fts_rank"),
2889
+ }.items()
2890
+ if v is not None
2891
+ }
2892
+
2893
+ if relevance_info: # Only add if we have metadata
2894
+ result_dict["relevance"] = relevance_info
2895
+
2896
+ formatted_results.append(result_dict)
2875
2897
 
2876
- return formatted_results, len(formatted_results)
2898
+ return formatted_results
2877
2899
 
2878
2900
  @enforce_types
2879
2901
  @trace_method
@@ -3698,45 +3720,3 @@ class AgentManager:
3698
3720
  num_archival_memories=num_archival_memories,
3699
3721
  num_messages=num_messages,
3700
3722
  )
3701
-
3702
- async def get_or_set_vector_db_namespace_async(
3703
- self,
3704
- agent_id: str,
3705
- organization_id: str,
3706
- ) -> str:
3707
- """Get the vector database namespace for an agent, creating it if it doesn't exist.
3708
-
3709
- Args:
3710
- agent_id: Agent ID to check/store namespace
3711
- organization_id: Organization ID for namespace generation
3712
-
3713
- Returns:
3714
- The org-scoped namespace name
3715
- """
3716
- from sqlalchemy import update
3717
-
3718
- from letta.settings import settings
3719
-
3720
- async with db_registry.async_session() as session:
3721
- # check if namespace already exists
3722
- result = await session.execute(select(AgentModel._vector_db_namespace).where(AgentModel.id == agent_id))
3723
- row = result.fetchone()
3724
-
3725
- if row and row[0]:
3726
- return row[0]
3727
-
3728
- # TODO: In the future, we might use agent_id for sharding the namespace
3729
- # For now, all messages in an org share the same namespace
3730
-
3731
- # generate org-scoped namespace name
3732
- environment = settings.environment
3733
- if environment:
3734
- namespace_name = f"messages_{organization_id}_{environment.lower()}"
3735
- else:
3736
- namespace_name = f"messages_{organization_id}"
3737
-
3738
- # update the agent with the namespace (keeps agent-level tracking for future sharding)
3739
- await session.execute(update(AgentModel).where(AgentModel.id == agent_id).values(_vector_db_namespace=namespace_name))
3740
- await session.commit()
3741
-
3742
- return namespace_name
@@ -12,6 +12,7 @@ from letta.errors import (
12
12
  AgentNotFoundForExportError,
13
13
  )
14
14
  from letta.helpers.pinecone_utils import should_use_pinecone
15
+ from letta.helpers.tpuf_client import should_use_tpuf
15
16
  from letta.log import get_logger
16
17
  from letta.schemas.agent import AgentState, CreateAgent
17
18
  from letta.schemas.agent_file import (
@@ -29,7 +30,7 @@ from letta.schemas.agent_file import (
29
30
  )
30
31
  from letta.schemas.block import Block
31
32
  from letta.schemas.embedding_config import EmbeddingConfig
32
- from letta.schemas.enums import FileProcessingStatus
33
+ from letta.schemas.enums import FileProcessingStatus, VectorDBProvider
33
34
  from letta.schemas.file import FileMetadata
34
35
  from letta.schemas.group import Group, GroupCreate
35
36
  from letta.schemas.mcp import MCPServer
@@ -52,7 +53,7 @@ from letta.services.message_manager import MessageManager
52
53
  from letta.services.source_manager import SourceManager
53
54
  from letta.services.tool_manager import ToolManager
54
55
  from letta.settings import settings
55
- from letta.utils import get_latest_alembic_revision
56
+ from letta.utils import get_latest_alembic_revision, safe_create_task
56
57
 
57
58
  logger = get_logger(__name__)
58
59
 
@@ -90,7 +91,6 @@ class AgentSerializationManager:
90
91
  self.file_agent_manager = file_agent_manager
91
92
  self.message_manager = message_manager
92
93
  self.file_parser = MistralFileParser() if settings.mistral_api_key else MarkitdownFileParser()
93
- self.using_pinecone = should_use_pinecone()
94
94
 
95
95
  # ID mapping state for export
96
96
  self._db_to_file_ids: Dict[str, str] = {}
@@ -208,6 +208,10 @@ class AgentSerializationManager:
208
208
  )
209
209
  agent_schema.id = agent_file_id
210
210
 
211
+ # wipe the values of tool_exec_environment_variables (they contain secrets)
212
+ if agent_schema.tool_exec_environment_variables:
213
+ agent_schema.tool_exec_environment_variables = {key: "" for key in agent_schema.tool_exec_environment_variables}
214
+
211
215
  if agent_schema.messages:
212
216
  for message in agent_schema.messages:
213
217
  message_file_id = self._map_db_to_file_id(message.id, MessageSchema.__id_prefix__)
@@ -588,7 +592,12 @@ class AgentSerializationManager:
588
592
  if schema.files and any(f.content for f in schema.files):
589
593
  # Use override embedding config if provided, otherwise use agent's config
590
594
  embedder_config = override_embedding_config if override_embedding_config else schema.agents[0].embedding_config
591
- if should_use_pinecone():
595
+ # determine which embedder to use - turbopuffer takes precedence
596
+ if should_use_tpuf():
597
+ from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
598
+
599
+ embedder = TurbopufferEmbedder(embedding_config=embedder_config)
600
+ elif should_use_pinecone():
592
601
  embedder = PineconeEmbedder(embedding_config=embedder_config)
593
602
  else:
594
603
  embedder = OpenAIEmbedder(embedding_config=embedder_config)
@@ -596,7 +605,6 @@ class AgentSerializationManager:
596
605
  file_parser=self.file_parser,
597
606
  embedder=embedder,
598
607
  actor=actor,
599
- using_pinecone=self.using_pinecone,
600
608
  )
601
609
 
602
610
  for file_schema in schema.files:
@@ -614,10 +622,11 @@ class AgentSerializationManager:
614
622
 
615
623
  # Create background task for file processing
616
624
  # TODO: This can be moved to celery or RQ or something
617
- task = asyncio.create_task(
625
+ task = safe_create_task(
618
626
  self._process_file_async(
619
627
  file_metadata=file_metadata, source_id=source_db_id, file_processor=file_processor, actor=actor
620
- )
628
+ ),
629
+ label=f"process_file_{file_metadata.file_name}",
621
630
  )
622
631
  background_tasks.append(task)
623
632
  logger.info(f"Started background processing for file {file_metadata.file_name} (ID: {file_db_id})")
@@ -646,9 +655,10 @@ class AgentSerializationManager:
646
655
  if agent_data.get("source_ids"):
647
656
  agent_data["source_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["source_ids"]]
648
657
 
649
- if env_vars:
650
- for var in agent_data["tool_exec_environment_variables"]:
651
- var["value"] = env_vars.get(var["key"], "")
658
+ if env_vars and agent_data.get("tool_exec_environment_variables"):
659
+ # update environment variable values from the provided env_vars dict
660
+ for key in agent_data["tool_exec_environment_variables"]:
661
+ agent_data["tool_exec_environment_variables"][key] = env_vars.get(key, "")
652
662
 
653
663
  # Override project_id if provided
654
664
  if project_id:
@@ -675,7 +685,12 @@ class AgentSerializationManager:
675
685
  # Map file ID to the generated database ID immediately
676
686
  message_file_to_db_ids[message_schema.id] = message_obj.id
677
687
 
678
- created_messages = await self.message_manager.create_many_messages_async(pydantic_msgs=messages, actor=actor)
688
+ created_messages = await self.message_manager.create_many_messages_async(
689
+ pydantic_msgs=messages,
690
+ actor=actor,
691
+ project_id=created_agent.project_id,
692
+ template_id=created_agent.template_id,
693
+ )
679
694
  imported_count += len(created_messages)
680
695
 
681
696
  # Remap in_context_message_ids from file IDs to database IDs
@@ -5,6 +5,7 @@ from sqlalchemy import select
5
5
  from letta.helpers.tpuf_client import should_use_tpuf
6
6
  from letta.log import get_logger
7
7
  from letta.orm import ArchivalPassage, Archive as ArchiveModel, ArchivesAgents
8
+ from letta.otel.tracing import trace_method
8
9
  from letta.schemas.archive import Archive as PydanticArchive
9
10
  from letta.schemas.enums import VectorDBProvider
10
11
  from letta.schemas.user import User as PydanticUser
@@ -19,6 +20,7 @@ class ArchiveManager:
19
20
  """Manager class to handle business logic related to Archives."""
20
21
 
21
22
  @enforce_types
23
+ @trace_method
22
24
  def create_archive(
23
25
  self,
24
26
  name: str,
@@ -44,6 +46,7 @@ class ArchiveManager:
44
46
  raise
45
47
 
46
48
  @enforce_types
49
+ @trace_method
47
50
  async def create_archive_async(
48
51
  self,
49
52
  name: str,
@@ -69,6 +72,7 @@ class ArchiveManager:
69
72
  raise
70
73
 
71
74
  @enforce_types
75
+ @trace_method
72
76
  async def get_archive_by_id_async(
73
77
  self,
74
78
  archive_id: str,
@@ -84,6 +88,7 @@ class ArchiveManager:
84
88
  return archive.to_pydantic()
85
89
 
86
90
  @enforce_types
91
+ @trace_method
87
92
  def attach_agent_to_archive(
88
93
  self,
89
94
  agent_id: str,
@@ -113,6 +118,7 @@ class ArchiveManager:
113
118
  session.commit()
114
119
 
115
120
  @enforce_types
121
+ @trace_method
116
122
  async def attach_agent_to_archive_async(
117
123
  self,
118
124
  agent_id: str,
@@ -148,6 +154,7 @@ class ArchiveManager:
148
154
  await session.commit()
149
155
 
150
156
  @enforce_types
157
+ @trace_method
151
158
  async def get_default_archive_for_agent_async(
152
159
  self,
153
160
  agent_id: str,
@@ -179,6 +186,24 @@ class ArchiveManager:
179
186
  return None
180
187
 
181
188
  @enforce_types
189
+ @trace_method
190
+ async def delete_archive_async(
191
+ self,
192
+ archive_id: str,
193
+ actor: PydanticUser = None,
194
+ ) -> None:
195
+ """Delete an archive permanently."""
196
+ async with db_registry.async_session() as session:
197
+ archive_model = await ArchiveModel.read_async(
198
+ db_session=session,
199
+ identifier=archive_id,
200
+ actor=actor,
201
+ )
202
+ await archive_model.hard_delete_async(session, actor=actor)
203
+ logger.info(f"Deleted archive {archive_id}")
204
+
205
+ @enforce_types
206
+ @trace_method
182
207
  async def get_or_create_default_archive_for_agent_async(
183
208
  self,
184
209
  agent_id: str,
@@ -187,6 +212,8 @@ class ArchiveManager:
187
212
  ) -> PydanticArchive:
188
213
  """Get the agent's default archive, creating one if it doesn't exist."""
189
214
  # First check if agent has any archives
215
+ from sqlalchemy.exc import IntegrityError
216
+
190
217
  from letta.services.agent_manager import AgentManager
191
218
 
192
219
  agent_manager = AgentManager()
@@ -215,17 +242,38 @@ class ArchiveManager:
215
242
  actor=actor,
216
243
  )
217
244
 
218
- # Attach the agent to the archive as owner
219
- await self.attach_agent_to_archive_async(
220
- agent_id=agent_id,
221
- archive_id=archive.id,
222
- is_owner=True,
223
- actor=actor,
224
- )
225
-
226
- return archive
245
+ try:
246
+ # Attach the agent to the archive as owner
247
+ await self.attach_agent_to_archive_async(
248
+ agent_id=agent_id,
249
+ archive_id=archive.id,
250
+ is_owner=True,
251
+ actor=actor,
252
+ )
253
+ return archive
254
+ except IntegrityError:
255
+ # race condition: another concurrent request already created and attached an archive
256
+ # clean up the orphaned archive we just created
257
+ logger.info(f"Race condition detected for agent {agent_id}, cleaning up orphaned archive {archive.id}")
258
+ await self.delete_archive_async(archive_id=archive.id, actor=actor)
259
+
260
+ # fetch the existing archive that was created by the concurrent request
261
+ archive_ids = await agent_manager.get_agent_archive_ids_async(
262
+ agent_id=agent_id,
263
+ actor=actor,
264
+ )
265
+ if archive_ids:
266
+ archive = await self.get_archive_by_id_async(
267
+ archive_id=archive_ids[0],
268
+ actor=actor,
269
+ )
270
+ return archive
271
+ else:
272
+ # this shouldn't happen, but if it does, re-raise
273
+ raise
227
274
 
228
275
  @enforce_types
276
+ @trace_method
229
277
  def get_or_create_default_archive_for_agent(
230
278
  self,
231
279
  agent_id: str,
@@ -269,6 +317,7 @@ class ArchiveManager:
269
317
  return archive_model.to_pydantic()
270
318
 
271
319
  @enforce_types
320
+ @trace_method
272
321
  async def get_agents_for_archive_async(
273
322
  self,
274
323
  archive_id: str,
@@ -280,6 +329,7 @@ class ArchiveManager:
280
329
  return [row[0] for row in result.fetchall()]
281
330
 
282
331
  @enforce_types
332
+ @trace_method
283
333
  async def get_agent_from_passage_async(
284
334
  self,
285
335
  passage_id: str,
@@ -309,6 +359,7 @@ class ArchiveManager:
309
359
  return agent_ids[0]
310
360
 
311
361
  @enforce_types
362
+ @trace_method
312
363
  async def get_or_set_vector_db_namespace_async(
313
364
  self,
314
365
  archive_id: str,
@@ -188,6 +188,7 @@ class BlockManager:
188
188
  connected_to_agents_count_lt: Optional[int] = None,
189
189
  connected_to_agents_count_eq: Optional[List[int]] = None,
190
190
  ascending: bool = True,
191
+ show_hidden_blocks: Optional[bool] = None,
191
192
  ) -> List[PydanticBlock]:
192
193
  """Async version of get_blocks method. Retrieve blocks based on various optional filters."""
193
194
  from sqlalchemy import select
@@ -228,6 +229,10 @@ class BlockManager:
228
229
  if value_search:
229
230
  query = query.where(BlockModel.value.ilike(f"%{value_search}%"))
230
231
 
232
+ # Apply hidden filter
233
+ if not show_hidden_blocks:
234
+ query = query.where((BlockModel.hidden.is_(None)) | (BlockModel.hidden == False))
235
+
231
236
  needs_distinct = False
232
237
 
233
238
  needs_agent_count_join = any(
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from typing import List
3
3
 
4
4
  from letta.log import get_logger
5
+ from letta.schemas.enums import VectorDBProvider
5
6
  from letta.schemas.passage import Passage
6
7
  from letta.schemas.user import User
7
8
 
@@ -11,6 +12,10 @@ logger = get_logger(__name__)
11
12
  class BaseEmbedder(ABC):
12
13
  """Abstract base class for embedding generation"""
13
14
 
15
+ def __init__(self):
16
+ # Default to NATIVE, subclasses will override this
17
+ self.vector_db_type = VectorDBProvider.NATIVE
18
+
14
19
  @abstractmethod
15
20
  async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
16
21
  """Generate embeddings for chunks with batching and concurrent processing"""
@@ -19,6 +19,10 @@ class OpenAIEmbedder(BaseEmbedder):
19
19
  """OpenAI-based embedding generation"""
20
20
 
21
21
  def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
22
+ super().__init__()
23
+ # OpenAI embedder uses the native vector db (PostgreSQL)
24
+ # self.vector_db_type already set to VectorDBProvider.NATIVE by parent
25
+
22
26
  self.default_embedding_config = (
23
27
  EmbeddingConfig.default_config(model_name="text-embedding-3-small", provider="openai")
24
28
  if model_settings.openai_api_key
@@ -4,6 +4,7 @@ from letta.helpers.pinecone_utils import upsert_file_records_to_pinecone_index
4
4
  from letta.log import get_logger
5
5
  from letta.otel.tracing import log_event, trace_method
6
6
  from letta.schemas.embedding_config import EmbeddingConfig
7
+ from letta.schemas.enums import VectorDBProvider
7
8
  from letta.schemas.passage import Passage
8
9
  from letta.schemas.user import User
9
10
  from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
@@ -20,6 +21,10 @@ class PineconeEmbedder(BaseEmbedder):
20
21
  """Pinecone-based embedding generation"""
21
22
 
22
23
  def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
24
+ super().__init__()
25
+ # set the vector db type for pinecone
26
+ self.vector_db_type = VectorDBProvider.PINECONE
27
+
23
28
  if not PINECONE_AVAILABLE:
24
29
  raise ImportError("Pinecone package is not installed. Install it with: pip install pinecone")
25
30
 
@@ -28,7 +33,6 @@ class PineconeEmbedder(BaseEmbedder):
28
33
  embedding_config = EmbeddingConfig.default_config(provider="pinecone")
29
34
 
30
35
  self.embedding_config = embedding_config
31
- super().__init__()
32
36
 
33
37
  @trace_method
34
38
  async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
@@ -0,0 +1,71 @@
1
+ from typing import List, Optional
2
+
3
+ from letta.helpers.tpuf_client import TurbopufferClient
4
+ from letta.log import get_logger
5
+ from letta.otel.tracing import log_event, trace_method
6
+ from letta.schemas.embedding_config import EmbeddingConfig
7
+ from letta.schemas.enums import VectorDBProvider
8
+ from letta.schemas.passage import Passage
9
+ from letta.schemas.user import User
10
+ from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class TurbopufferEmbedder(BaseEmbedder):
16
+ """Turbopuffer-based embedding generation and storage"""
17
+
18
+ def __init__(self, embedding_config: Optional[EmbeddingConfig] = None):
19
+ super().__init__()
20
+ # set the vector db type for turbopuffer
21
+ self.vector_db_type = VectorDBProvider.TPUF
22
+ # use the default embedding config from TurbopufferClient if not provided
23
+ self.embedding_config = embedding_config or TurbopufferClient.default_embedding_config
24
+ self.tpuf_client = TurbopufferClient()
25
+
26
+ @trace_method
27
+ async def generate_embedded_passages(self, file_id: str, source_id: str, chunks: List[str], actor: User) -> List[Passage]:
28
+ """Generate embeddings and store in Turbopuffer, then return Passage objects"""
29
+ if not chunks:
30
+ return []
31
+
32
+ logger.info(f"Generating embeddings for {len(chunks)} chunks using Turbopuffer")
33
+ log_event(
34
+ "turbopuffer_embedder.generation_started",
35
+ {
36
+ "total_chunks": len(chunks),
37
+ "file_id": file_id,
38
+ "source_id": source_id,
39
+ "embedding_model": self.embedding_config.embedding_model,
40
+ },
41
+ )
42
+
43
+ try:
44
+ # insert passages to Turbopuffer - it will handle embedding generation internally
45
+ passages = await self.tpuf_client.insert_file_passages(
46
+ source_id=source_id,
47
+ file_id=file_id,
48
+ text_chunks=chunks,
49
+ organization_id=actor.organization_id,
50
+ actor=actor,
51
+ )
52
+
53
+ logger.info(f"Successfully generated and stored {len(passages)} passages in Turbopuffer")
54
+ log_event(
55
+ "turbopuffer_embedder.generation_completed",
56
+ {
57
+ "passages_created": len(passages),
58
+ "total_chunks_processed": len(chunks),
59
+ "file_id": file_id,
60
+ "source_id": source_id,
61
+ },
62
+ )
63
+ return passages
64
+
65
+ except Exception as e:
66
+ logger.error(f"Failed to generate embeddings with Turbopuffer: {str(e)}")
67
+ log_event(
68
+ "turbopuffer_embedder.generation_failed",
69
+ {"error": str(e), "error_type": type(e).__name__, "file_id": file_id, "source_id": source_id},
70
+ )
71
+ raise
@@ -6,7 +6,7 @@ from letta.log import get_logger
6
6
  from letta.otel.context import get_ctx_attributes
7
7
  from letta.otel.tracing import log_event, trace_method
8
8
  from letta.schemas.agent import AgentState
9
- from letta.schemas.enums import FileProcessingStatus
9
+ from letta.schemas.enums import FileProcessingStatus, VectorDBProvider
10
10
  from letta.schemas.file import FileMetadata
11
11
  from letta.schemas.passage import Passage
12
12
  from letta.schemas.user import User
@@ -30,7 +30,6 @@ class FileProcessor:
30
30
  file_parser: FileParser,
31
31
  embedder: BaseEmbedder,
32
32
  actor: User,
33
- using_pinecone: bool,
34
33
  max_file_size: int = 50 * 1024 * 1024, # 50MB default
35
34
  ):
36
35
  self.file_parser = file_parser
@@ -42,7 +41,8 @@ class FileProcessor:
42
41
  self.job_manager = JobManager()
43
42
  self.agent_manager = AgentManager()
44
43
  self.actor = actor
45
- self.using_pinecone = using_pinecone
44
+ # get vector db type from the embedder
45
+ self.vector_db_type = embedder.vector_db_type
46
46
 
47
47
  async def _chunk_and_embed_with_fallback(self, file_metadata: FileMetadata, ocr_response, source_id: str) -> List:
48
48
  """Chunk text and generate embeddings with fallback to default chunker if needed"""
@@ -218,7 +218,7 @@ class FileProcessor:
218
218
  source_id=source_id,
219
219
  )
220
220
 
221
- if not self.using_pinecone:
221
+ if self.vector_db_type == VectorDBProvider.NATIVE:
222
222
  all_passages = await self.passage_manager.create_many_source_passages_async(
223
223
  passages=all_passages,
224
224
  file_metadata=file_metadata,
@@ -241,7 +241,8 @@ class FileProcessor:
241
241
  )
242
242
 
243
243
  # update job status
244
- if not self.using_pinecone:
244
+ # pinecone completes slowly, so gets updated later
245
+ if self.vector_db_type != VectorDBProvider.PINECONE:
245
246
  await self.file_manager.update_file_status(
246
247
  file_id=file_metadata.id,
247
248
  actor=self.actor,
@@ -317,14 +318,15 @@ class FileProcessor:
317
318
  )
318
319
 
319
320
  # Create passages in database (unless using Pinecone)
320
- if not self.using_pinecone:
321
+ if self.vector_db_type == VectorDBProvider.NATIVE:
321
322
  all_passages = await self.passage_manager.create_many_source_passages_async(
322
323
  passages=all_passages, file_metadata=file_metadata, actor=self.actor
323
324
  )
324
325
  log_event("file_processor.import_passages_created", {"filename": filename, "total_passages": len(all_passages)})
325
326
 
326
327
  # Update file status to completed (valid transition from EMBEDDING)
327
- if not self.using_pinecone:
328
+ # pinecone completes slowly, so gets updated later
329
+ if self.vector_db_type != VectorDBProvider.PINECONE:
328
330
  await self.file_manager.update_file_status(
329
331
  file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
330
332
  )