letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +111 -0
  3. letta/adapters/letta_llm_stream_adapter.py +169 -0
  4. letta/agents/base_agent.py +4 -1
  5. letta/agents/base_agent_v2.py +68 -0
  6. letta/agents/helpers.py +3 -5
  7. letta/agents/letta_agent.py +23 -12
  8. letta/agents/letta_agent_v2.py +1220 -0
  9. letta/agents/voice_agent.py +2 -1
  10. letta/constants.py +1 -1
  11. letta/errors.py +12 -0
  12. letta/functions/function_sets/base.py +53 -12
  13. letta/functions/schema_generator.py +1 -1
  14. letta/groups/sleeptime_multi_agent_v3.py +231 -0
  15. letta/helpers/tool_rule_solver.py +4 -0
  16. letta/helpers/tpuf_client.py +607 -34
  17. letta/interfaces/anthropic_streaming_interface.py +64 -24
  18. letta/interfaces/openai_streaming_interface.py +80 -37
  19. letta/llm_api/openai_client.py +45 -4
  20. letta/orm/block.py +1 -0
  21. letta/orm/group.py +1 -0
  22. letta/orm/source.py +8 -1
  23. letta/orm/step_metrics.py +10 -0
  24. letta/schemas/block.py +4 -0
  25. letta/schemas/enums.py +1 -0
  26. letta/schemas/group.py +8 -0
  27. letta/schemas/letta_message.py +1 -1
  28. letta/schemas/letta_request.py +2 -2
  29. letta/schemas/mcp.py +9 -1
  30. letta/schemas/message.py +23 -0
  31. letta/schemas/providers/ollama.py +1 -1
  32. letta/schemas/providers.py +1 -2
  33. letta/schemas/source.py +6 -0
  34. letta/schemas/step_metrics.py +2 -0
  35. letta/server/rest_api/routers/v1/__init__.py +2 -0
  36. letta/server/rest_api/routers/v1/agents.py +100 -5
  37. letta/server/rest_api/routers/v1/blocks.py +6 -0
  38. letta/server/rest_api/routers/v1/folders.py +23 -5
  39. letta/server/rest_api/routers/v1/groups.py +6 -0
  40. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  41. letta/server/rest_api/routers/v1/messages.py +14 -19
  42. letta/server/rest_api/routers/v1/runs.py +43 -28
  43. letta/server/rest_api/routers/v1/sources.py +23 -5
  44. letta/server/rest_api/routers/v1/tools.py +42 -0
  45. letta/server/rest_api/streaming_response.py +9 -1
  46. letta/server/server.py +2 -1
  47. letta/services/agent_manager.py +39 -59
  48. letta/services/agent_serialization_manager.py +22 -8
  49. letta/services/archive_manager.py +60 -9
  50. letta/services/block_manager.py +5 -0
  51. letta/services/file_processor/embedder/base_embedder.py +5 -0
  52. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  53. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  54. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  55. letta/services/file_processor/file_processor.py +9 -7
  56. letta/services/group_manager.py +74 -11
  57. letta/services/mcp_manager.py +132 -26
  58. letta/services/message_manager.py +229 -125
  59. letta/services/passage_manager.py +2 -1
  60. letta/services/source_manager.py +23 -1
  61. letta/services/summarizer/summarizer.py +2 -0
  62. letta/services/tool_executor/core_tool_executor.py +2 -120
  63. letta/services/tool_executor/files_tool_executor.py +133 -8
  64. letta/settings.py +6 -0
  65. letta/utils.py +34 -1
  66. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
  67. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
  68. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
  69. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
  70. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,5 @@
1
1
  from datetime import timedelta
2
- from typing import Annotated, List, Optional
2
+ from typing import Annotated, List, Literal, Optional
3
3
 
4
4
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
5
5
  from pydantic import Field
@@ -14,7 +14,11 @@ from letta.schemas.openai.chat_completion_response import UsageStatistics
14
14
  from letta.schemas.run import Run
15
15
  from letta.schemas.step import Step
16
16
  from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
17
- from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
17
+ from letta.server.rest_api.streaming_response import (
18
+ StreamingResponseWithStatusCode,
19
+ add_keepalive_to_stream,
20
+ cancellation_aware_stream_wrapper,
21
+ )
18
22
  from letta.server.rest_api.utils import get_letta_server
19
23
  from letta.server.server import SyncServer
20
24
  from letta.settings import settings
@@ -115,33 +119,18 @@ async def list_run_messages(
115
119
  run_id: str,
116
120
  server: "SyncServer" = Depends(get_letta_server),
117
121
  actor_id: Optional[str] = Header(None, alias="user_id"),
118
- before: Optional[str] = Query(None, description="Cursor for pagination"),
119
- after: Optional[str] = Query(None, description="Cursor for pagination"),
122
+ before: Optional[str] = Query(
123
+ None, description="Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order"
124
+ ),
125
+ after: Optional[str] = Query(
126
+ None, description="Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order"
127
+ ),
120
128
  limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
121
- order: str = Query(
122
- "asc", description="Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order."
129
+ order: Literal["asc", "desc"] = Query(
130
+ "asc", description="Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first"
123
131
  ),
124
- role: Optional[MessageRole] = Query(None, description="Filter by role"),
125
132
  ):
126
- """
127
- Get messages associated with a run with filtering options.
128
-
129
- Args:
130
- run_id: ID of the run
131
- before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, starting with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.
132
- after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
133
- limit: Maximum number of messages to return
134
- order: Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.
135
- role: Filter by role (user/assistant/system/tool)
136
- return_message_object: Whether to return Message objects or LettaMessage objects
137
- user_id: ID of the user making the request
138
-
139
- Returns:
140
- A list of messages associated with the run. Default is List[LettaMessage].
141
- """
142
- if order not in ["asc", "desc"]:
143
- raise HTTPException(status_code=400, detail="Order must be 'asc' or 'desc'")
144
-
133
+ """Get response messages associated with a run."""
145
134
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
146
135
 
147
136
  try:
@@ -152,7 +141,6 @@ async def list_run_messages(
152
141
  before=before,
153
142
  after=after,
154
143
  ascending=(order == "asc"),
155
- role=role,
156
144
  )
157
145
  return messages
158
146
  except NoResultFound as e:
@@ -251,7 +239,26 @@ async def delete_run(
251
239
  200: {
252
240
  "description": "Successful response",
253
241
  "content": {
254
- "text/event-stream": {"description": "Server-Sent Events stream"},
242
+ # Align streaming schema with agents.create_stream so SDKs accept approval messages
243
+ "text/event-stream": {
244
+ "description": "Server-Sent Events stream",
245
+ "schema": {
246
+ "oneOf": [
247
+ {"$ref": "#/components/schemas/SystemMessage"},
248
+ {"$ref": "#/components/schemas/UserMessage"},
249
+ {"$ref": "#/components/schemas/ReasoningMessage"},
250
+ {"$ref": "#/components/schemas/HiddenReasoningMessage"},
251
+ {"$ref": "#/components/schemas/ToolCallMessage"},
252
+ {"$ref": "#/components/schemas/ToolReturnMessage"},
253
+ {"$ref": "#/components/schemas/AssistantMessage"},
254
+ {"$ref": "#/components/schemas/ApprovalRequestMessage"},
255
+ {"$ref": "#/components/schemas/ApprovalResponseMessage"},
256
+ {"$ref": "#/components/schemas/LettaPing"},
257
+ {"$ref": "#/components/schemas/LettaStopReason"},
258
+ {"$ref": "#/components/schemas/LettaUsageStatistics"},
259
+ ]
260
+ },
261
+ },
255
262
  },
256
263
  }
257
264
  },
@@ -296,6 +303,14 @@ async def retrieve_stream(
296
303
  batch_size=request.batch_size,
297
304
  )
298
305
 
306
+ if settings.enable_cancellation_aware_streaming:
307
+ stream = cancellation_aware_stream_wrapper(
308
+ stream_generator=stream,
309
+ job_manager=server.job_manager,
310
+ job_id=run_id,
311
+ actor=actor,
312
+ )
313
+
299
314
  if request.include_pings and settings.enable_keepalive:
300
315
  stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
301
316
 
@@ -15,6 +15,7 @@ from letta.helpers.pinecone_utils import (
15
15
  delete_source_records_from_pinecone_index,
16
16
  should_use_pinecone,
17
17
  )
18
+ from letta.helpers.tpuf_client import should_use_tpuf
18
19
  from letta.log import get_logger
19
20
  from letta.otel.tracing import trace_method
20
21
  from letta.schemas.agent import AgentState
@@ -189,7 +190,13 @@ async def delete_source(
189
190
  files = await server.file_manager.list_files(source_id, actor)
190
191
  file_ids = [f.id for f in files]
191
192
 
192
- if should_use_pinecone():
193
+ if should_use_tpuf():
194
+ logger.info(f"Deleting source {source_id} from Turbopuffer")
195
+ from letta.helpers.tpuf_client import TurbopufferClient
196
+
197
+ tpuf_client = TurbopufferClient()
198
+ await tpuf_client.delete_source_passages(source_id=source_id, organization_id=actor.organization_id)
199
+ elif should_use_pinecone():
193
200
  logger.info(f"Deleting source {source_id} from pinecone index")
194
201
  await delete_source_records_from_pinecone_index(source_id=source_id, actor=actor)
195
202
 
@@ -435,7 +442,13 @@ async def delete_file_from_source(
435
442
 
436
443
  await server.remove_file_from_context_windows(source_id=source_id, file_id=deleted_file.id, actor=actor)
437
444
 
438
- if should_use_pinecone():
445
+ if should_use_tpuf():
446
+ logger.info(f"Deleting file {file_id} from Turbopuffer")
447
+ from letta.helpers.tpuf_client import TurbopufferClient
448
+
449
+ tpuf_client = TurbopufferClient()
450
+ await tpuf_client.delete_file_passages(source_id=source_id, file_id=file_id, organization_id=actor.organization_id)
451
+ elif should_use_pinecone():
439
452
  logger.info(f"Deleting file {file_id} from pinecone index")
440
453
  await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
441
454
 
@@ -481,10 +494,15 @@ async def load_file_to_source_cloud(
481
494
  else:
482
495
  file_parser = MarkitdownFileParser()
483
496
 
484
- using_pinecone = should_use_pinecone()
485
- if using_pinecone:
497
+ # determine which embedder to use - turbopuffer takes precedence
498
+ if should_use_tpuf():
499
+ from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
500
+
501
+ embedder = TurbopufferEmbedder(embedding_config=embedding_config)
502
+ elif should_use_pinecone():
486
503
  embedder = PineconeEmbedder(embedding_config=embedding_config)
487
504
  else:
488
505
  embedder = OpenAIEmbedder(embedding_config=embedding_config)
489
- file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor, using_pinecone=using_pinecone)
506
+
507
+ file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor)
490
508
  await file_processor.process(agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata)
@@ -587,6 +587,48 @@ async def list_mcp_tools_by_server(
587
587
  return mcp_tools
588
588
 
589
589
 
590
+ @router.post("/mcp/servers/{mcp_server_name}/resync", operation_id="resync_mcp_server_tools")
591
+ async def resync_mcp_server_tools(
592
+ mcp_server_name: str,
593
+ server: SyncServer = Depends(get_letta_server),
594
+ actor_id: Optional[str] = Header(None, alias="user_id"),
595
+ agent_id: Optional[str] = None,
596
+ ):
597
+ """
598
+ Resync tools for an MCP server by:
599
+ 1. Fetching current tools from the MCP server
600
+ 2. Deleting tools that no longer exist on the server
601
+ 3. Updating schemas for existing tools
602
+ 4. Adding new tools from the server
603
+
604
+ Returns a summary of changes made.
605
+ """
606
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
607
+
608
+ try:
609
+ result = await server.mcp_manager.resync_mcp_server_tools(mcp_server_name=mcp_server_name, actor=actor, agent_id=agent_id)
610
+ return result
611
+ except ValueError as e:
612
+ raise HTTPException(
613
+ status_code=404,
614
+ detail={
615
+ "code": "MCPServerNotFoundError",
616
+ "message": str(e),
617
+ "mcp_server_name": mcp_server_name,
618
+ },
619
+ )
620
+ except Exception as e:
621
+ logger.error(f"Unexpected error refreshing MCP server tools: {e}")
622
+ raise HTTPException(
623
+ status_code=404,
624
+ detail={
625
+ "code": "MCPRefreshError",
626
+ "message": f"Failed to refresh MCP server tools: {str(e)}",
627
+ "mcp_server_name": mcp_server_name,
628
+ },
629
+ )
630
+
631
+
590
632
  @router.post("/mcp/servers/{mcp_server_name}/{mcp_tool_name}", response_model=Tool, operation_id="add_mcp_tool")
591
633
  async def add_mcp_tool(
592
634
  mcp_server_name: str,
@@ -7,10 +7,11 @@ import json
7
7
  from collections.abc import AsyncIterator
8
8
 
9
9
  import anyio
10
+ from fastapi import HTTPException
10
11
  from fastapi.responses import StreamingResponse
11
12
  from starlette.types import Send
12
13
 
13
- from letta.errors import LettaUnexpectedStreamCancellationError
14
+ from letta.errors import LettaUnexpectedStreamCancellationError, PendingApprovalError
14
15
  from letta.log import get_logger
15
16
  from letta.schemas.enums import JobStatus
16
17
  from letta.schemas.letta_ping import LettaPing
@@ -189,6 +190,13 @@ class StreamingResponseWithStatusCode(StreamingResponse):
189
190
  except anyio.ClosedResourceError:
190
191
  logger.info("Client disconnected, but shielded task should continue")
191
192
  self._client_connected = False
193
+ except PendingApprovalError as e:
194
+ # This is an expected error, don't log as error
195
+ logger.info(f"Pending approval conflict in stream response: {e}")
196
+ # Re-raise as HTTPException for proper client handling
197
+ raise HTTPException(
198
+ status_code=409, detail={"code": "PENDING_APPROVAL", "message": str(e), "pending_request_id": e.pending_request_id}
199
+ )
192
200
  except Exception as e:
193
201
  logger.error(f"Error in protected stream response: {e}")
194
202
  raise
letta/server/server.py CHANGED
@@ -1125,7 +1125,8 @@ class SyncServer(Server):
1125
1125
  ascending=ascending,
1126
1126
  limit=limit,
1127
1127
  )
1128
- return records
1128
+ # Extract just the passages (SQL path returns empty metadata)
1129
+ return [passage for passage, _, _ in records]
1129
1130
 
1130
1131
  async def insert_archival_memory_async(
1131
1132
  self, agent_id: str, memory_contents: str, actor: User, tags: Optional[List[str]], created_at: Optional[datetime]
@@ -720,7 +720,7 @@ class AgentManager:
720
720
  # Only create messages if we initialized with messages
721
721
  if not _init_with_no_messages:
722
722
  await self.message_manager.create_many_messages_async(
723
- pydantic_msgs=init_messages, actor=actor, embedding_config=result.embedding_config
723
+ pydantic_msgs=init_messages, actor=actor, project_id=result.project_id, template_id=result.template_id
724
724
  )
725
725
  return result
726
726
 
@@ -1834,6 +1834,7 @@ class AgentManager:
1834
1834
  message_id=curr_system_message.id,
1835
1835
  message_update=MessageUpdate(**temp_message.model_dump()),
1836
1836
  actor=actor,
1837
+ project_id=agent_state.project_id,
1837
1838
  )
1838
1839
  else:
1839
1840
  curr_system_message = temp_message
@@ -1887,7 +1888,9 @@ class AgentManager:
1887
1888
  self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser
1888
1889
  ) -> PydanticAgentState:
1889
1890
  agent = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
1890
- messages = await self.message_manager.create_many_messages_async(messages, actor=actor, embedding_config=agent.embedding_config)
1891
+ messages = await self.message_manager.create_many_messages_async(
1892
+ messages, actor=actor, project_id=agent.project_id, template_id=agent.template_id
1893
+ )
1891
1894
  message_ids = agent.message_ids or []
1892
1895
  message_ids += [m.id for m in messages]
1893
1896
  return await self.set_in_context_messages_async(agent_id=agent_id, message_ids=message_ids, actor=actor)
@@ -2655,7 +2658,7 @@ class AgentManager:
2655
2658
  embedding_config: Optional[EmbeddingConfig] = None,
2656
2659
  tags: Optional[List[str]] = None,
2657
2660
  tag_match_mode: Optional[TagMatchMode] = None,
2658
- ) -> List[PydanticPassage]:
2661
+ ) -> List[Tuple[PydanticPassage, float, dict]]:
2659
2662
  """Lists all passages attached to an agent."""
2660
2663
  # Check if we should use Turbopuffer for vector search
2661
2664
  if embed_query and agent_id and query_text and embedding_config:
@@ -2688,7 +2691,6 @@ class AgentManager:
2688
2691
  # use hybrid search to combine vector and full-text search
2689
2692
  passages_with_scores = await tpuf_client.query_passages(
2690
2693
  archive_id=archive_ids[0],
2691
- query_embedding=query_embedding,
2692
2694
  query_text=query_text, # pass text for potential hybrid search
2693
2695
  search_mode="hybrid", # use hybrid mode for better results
2694
2696
  top_k=limit,
@@ -2696,10 +2698,11 @@ class AgentManager:
2696
2698
  tag_match_mode=tag_match_mode or TagMatchMode.ANY,
2697
2699
  start_date=start_date,
2698
2700
  end_date=end_date,
2701
+ actor=actor,
2699
2702
  )
2700
2703
 
2701
- # Return just the passages (without scores)
2702
- return [passage for passage, _ in passages_with_scores]
2704
+ # Return full tuples with metadata
2705
+ return passages_with_scores
2703
2706
  else:
2704
2707
  return []
2705
2708
 
@@ -2750,9 +2753,11 @@ class AgentManager:
2750
2753
  if query_tags.intersection(passage_tags):
2751
2754
  filtered_passages.append(passage)
2752
2755
 
2753
- return filtered_passages
2756
+ # Return as tuples with empty metadata for SQL path
2757
+ return [(p, 0.0, {}) for p in filtered_passages]
2754
2758
 
2755
- return pydantic_passages
2759
+ # Return as tuples with empty metadata for SQL path
2760
+ return [(p, 0.0, {}) for p in pydantic_passages]
2756
2761
 
2757
2762
  @enforce_types
2758
2763
  @trace_method
@@ -2766,7 +2771,7 @@ class AgentManager:
2766
2771
  top_k: Optional[int] = None,
2767
2772
  start_datetime: Optional[str] = None,
2768
2773
  end_datetime: Optional[str] = None,
2769
- ) -> Tuple[List[Dict[str, Any]], int]:
2774
+ ) -> List[Dict[str, Any]]:
2770
2775
  """
2771
2776
  Search archival memory using semantic (embedding-based) search with optional temporal filtering.
2772
2777
 
@@ -2783,11 +2788,11 @@ class AgentManager:
2783
2788
  end_datetime: Filter results before this datetime (ISO 8601 format)
2784
2789
 
2785
2790
  Returns:
2786
- Tuple of (formatted_results, count)
2791
+ List of formatted results with relevance metadata
2787
2792
  """
2788
2793
  # Handle empty or whitespace-only queries
2789
2794
  if not query or not query.strip():
2790
- return [], 0
2795
+ return []
2791
2796
 
2792
2797
  # Get the agent to access timezone and embedding config
2793
2798
  agent_state = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
@@ -2839,7 +2844,7 @@ class AgentManager:
2839
2844
 
2840
2845
  # Get results using existing passage query method
2841
2846
  limit = top_k if top_k is not None else RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
2842
- all_results = await self.query_agent_passages_async(
2847
+ passages_with_metadata = await self.query_agent_passages_async(
2843
2848
  actor=actor,
2844
2849
  agent_id=agent_id,
2845
2850
  query_text=query,
@@ -2852,11 +2857,11 @@ class AgentManager:
2852
2857
  end_date=end_date,
2853
2858
  )
2854
2859
 
2855
- # Format results to include tags with friendly timestamps
2860
+ # Format results to include tags with friendly timestamps and relevance metadata
2856
2861
  formatted_results = []
2857
- for result in all_results:
2862
+ for passage, score, metadata in passages_with_metadata:
2858
2863
  # Format timestamp in agent's timezone if available
2859
- timestamp = result.created_at
2864
+ timestamp = passage.created_at
2860
2865
  if timestamp and agent_state.timezone:
2861
2866
  try:
2862
2867
  # Convert to agent's timezone
@@ -2871,9 +2876,26 @@ class AgentManager:
2871
2876
  # Use ISO format if no timezone is set
2872
2877
  formatted_timestamp = str(timestamp) if timestamp else "Unknown"
2873
2878
 
2874
- formatted_results.append({"timestamp": formatted_timestamp, "content": result.text, "tags": result.tags or []})
2879
+ result_dict = {"timestamp": formatted_timestamp, "content": passage.text, "tags": passage.tags or []}
2880
+
2881
+ # Add relevance metadata if available
2882
+ if metadata:
2883
+ relevance_info = {
2884
+ k: v
2885
+ for k, v in {
2886
+ "rrf_score": metadata.get("combined_score"),
2887
+ "vector_rank": metadata.get("vector_rank"),
2888
+ "fts_rank": metadata.get("fts_rank"),
2889
+ }.items()
2890
+ if v is not None
2891
+ }
2892
+
2893
+ if relevance_info: # Only add if we have metadata
2894
+ result_dict["relevance"] = relevance_info
2895
+
2896
+ formatted_results.append(result_dict)
2875
2897
 
2876
- return formatted_results, len(formatted_results)
2898
+ return formatted_results
2877
2899
 
2878
2900
  @enforce_types
2879
2901
  @trace_method
@@ -3698,45 +3720,3 @@ class AgentManager:
3698
3720
  num_archival_memories=num_archival_memories,
3699
3721
  num_messages=num_messages,
3700
3722
  )
3701
-
3702
- async def get_or_set_vector_db_namespace_async(
3703
- self,
3704
- agent_id: str,
3705
- organization_id: str,
3706
- ) -> str:
3707
- """Get the vector database namespace for an agent, creating it if it doesn't exist.
3708
-
3709
- Args:
3710
- agent_id: Agent ID to check/store namespace
3711
- organization_id: Organization ID for namespace generation
3712
-
3713
- Returns:
3714
- The org-scoped namespace name
3715
- """
3716
- from sqlalchemy import update
3717
-
3718
- from letta.settings import settings
3719
-
3720
- async with db_registry.async_session() as session:
3721
- # check if namespace already exists
3722
- result = await session.execute(select(AgentModel._vector_db_namespace).where(AgentModel.id == agent_id))
3723
- row = result.fetchone()
3724
-
3725
- if row and row[0]:
3726
- return row[0]
3727
-
3728
- # TODO: In the future, we might use agent_id for sharding the namespace
3729
- # For now, all messages in an org share the same namespace
3730
-
3731
- # generate org-scoped namespace name
3732
- environment = settings.environment
3733
- if environment:
3734
- namespace_name = f"messages_{organization_id}_{environment.lower()}"
3735
- else:
3736
- namespace_name = f"messages_{organization_id}"
3737
-
3738
- # update the agent with the namespace (keeps agent-level tracking for future sharding)
3739
- await session.execute(update(AgentModel).where(AgentModel.id == agent_id).values(_vector_db_namespace=namespace_name))
3740
- await session.commit()
3741
-
3742
- return namespace_name
@@ -12,6 +12,7 @@ from letta.errors import (
12
12
  AgentNotFoundForExportError,
13
13
  )
14
14
  from letta.helpers.pinecone_utils import should_use_pinecone
15
+ from letta.helpers.tpuf_client import should_use_tpuf
15
16
  from letta.log import get_logger
16
17
  from letta.schemas.agent import AgentState, CreateAgent
17
18
  from letta.schemas.agent_file import (
@@ -29,7 +30,7 @@ from letta.schemas.agent_file import (
29
30
  )
30
31
  from letta.schemas.block import Block
31
32
  from letta.schemas.embedding_config import EmbeddingConfig
32
- from letta.schemas.enums import FileProcessingStatus
33
+ from letta.schemas.enums import FileProcessingStatus, VectorDBProvider
33
34
  from letta.schemas.file import FileMetadata
34
35
  from letta.schemas.group import Group, GroupCreate
35
36
  from letta.schemas.mcp import MCPServer
@@ -90,7 +91,6 @@ class AgentSerializationManager:
90
91
  self.file_agent_manager = file_agent_manager
91
92
  self.message_manager = message_manager
92
93
  self.file_parser = MistralFileParser() if settings.mistral_api_key else MarkitdownFileParser()
93
- self.using_pinecone = should_use_pinecone()
94
94
 
95
95
  # ID mapping state for export
96
96
  self._db_to_file_ids: Dict[str, str] = {}
@@ -208,6 +208,10 @@ class AgentSerializationManager:
208
208
  )
209
209
  agent_schema.id = agent_file_id
210
210
 
211
+ # wipe the values of tool_exec_environment_variables (they contain secrets)
212
+ if agent_schema.tool_exec_environment_variables:
213
+ agent_schema.tool_exec_environment_variables = {key: "" for key in agent_schema.tool_exec_environment_variables}
214
+
211
215
  if agent_schema.messages:
212
216
  for message in agent_schema.messages:
213
217
  message_file_id = self._map_db_to_file_id(message.id, MessageSchema.__id_prefix__)
@@ -588,7 +592,12 @@ class AgentSerializationManager:
588
592
  if schema.files and any(f.content for f in schema.files):
589
593
  # Use override embedding config if provided, otherwise use agent's config
590
594
  embedder_config = override_embedding_config if override_embedding_config else schema.agents[0].embedding_config
591
- if should_use_pinecone():
595
+ # determine which embedder to use - turbopuffer takes precedence
596
+ if should_use_tpuf():
597
+ from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
598
+
599
+ embedder = TurbopufferEmbedder(embedding_config=embedder_config)
600
+ elif should_use_pinecone():
592
601
  embedder = PineconeEmbedder(embedding_config=embedder_config)
593
602
  else:
594
603
  embedder = OpenAIEmbedder(embedding_config=embedder_config)
@@ -596,7 +605,6 @@ class AgentSerializationManager:
596
605
  file_parser=self.file_parser,
597
606
  embedder=embedder,
598
607
  actor=actor,
599
- using_pinecone=self.using_pinecone,
600
608
  )
601
609
 
602
610
  for file_schema in schema.files:
@@ -646,9 +654,10 @@ class AgentSerializationManager:
646
654
  if agent_data.get("source_ids"):
647
655
  agent_data["source_ids"] = [file_to_db_ids[file_id] for file_id in agent_data["source_ids"]]
648
656
 
649
- if env_vars:
650
- for var in agent_data["tool_exec_environment_variables"]:
651
- var["value"] = env_vars.get(var["key"], "")
657
+ if env_vars and agent_data.get("tool_exec_environment_variables"):
658
+ # update environment variable values from the provided env_vars dict
659
+ for key in agent_data["tool_exec_environment_variables"]:
660
+ agent_data["tool_exec_environment_variables"][key] = env_vars.get(key, "")
652
661
 
653
662
  # Override project_id if provided
654
663
  if project_id:
@@ -675,7 +684,12 @@ class AgentSerializationManager:
675
684
  # Map file ID to the generated database ID immediately
676
685
  message_file_to_db_ids[message_schema.id] = message_obj.id
677
686
 
678
- created_messages = await self.message_manager.create_many_messages_async(pydantic_msgs=messages, actor=actor)
687
+ created_messages = await self.message_manager.create_many_messages_async(
688
+ pydantic_msgs=messages,
689
+ actor=actor,
690
+ project_id=created_agent.project_id,
691
+ template_id=created_agent.template_id,
692
+ )
679
693
  imported_count += len(created_messages)
680
694
 
681
695
  # Remap in_context_message_ids from file IDs to database IDs