letta-nightly 0.11.7.dev20250908104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +111 -0
  3. letta/adapters/letta_llm_stream_adapter.py +169 -0
  4. letta/agents/base_agent.py +4 -1
  5. letta/agents/base_agent_v2.py +68 -0
  6. letta/agents/helpers.py +3 -5
  7. letta/agents/letta_agent.py +23 -12
  8. letta/agents/letta_agent_v2.py +1220 -0
  9. letta/agents/voice_agent.py +2 -1
  10. letta/constants.py +1 -1
  11. letta/errors.py +12 -0
  12. letta/functions/function_sets/base.py +53 -12
  13. letta/functions/schema_generator.py +1 -1
  14. letta/groups/sleeptime_multi_agent_v3.py +231 -0
  15. letta/helpers/tool_rule_solver.py +4 -0
  16. letta/helpers/tpuf_client.py +607 -34
  17. letta/interfaces/anthropic_streaming_interface.py +64 -24
  18. letta/interfaces/openai_streaming_interface.py +80 -37
  19. letta/llm_api/openai_client.py +45 -4
  20. letta/orm/block.py +1 -0
  21. letta/orm/group.py +1 -0
  22. letta/orm/source.py +8 -1
  23. letta/orm/step_metrics.py +10 -0
  24. letta/schemas/block.py +4 -0
  25. letta/schemas/enums.py +1 -0
  26. letta/schemas/group.py +8 -0
  27. letta/schemas/letta_message.py +1 -1
  28. letta/schemas/letta_request.py +2 -2
  29. letta/schemas/mcp.py +9 -1
  30. letta/schemas/message.py +23 -0
  31. letta/schemas/providers/ollama.py +1 -1
  32. letta/schemas/providers.py +1 -2
  33. letta/schemas/source.py +6 -0
  34. letta/schemas/step_metrics.py +2 -0
  35. letta/server/rest_api/routers/v1/__init__.py +2 -0
  36. letta/server/rest_api/routers/v1/agents.py +100 -5
  37. letta/server/rest_api/routers/v1/blocks.py +6 -0
  38. letta/server/rest_api/routers/v1/folders.py +23 -5
  39. letta/server/rest_api/routers/v1/groups.py +6 -0
  40. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  41. letta/server/rest_api/routers/v1/messages.py +14 -19
  42. letta/server/rest_api/routers/v1/runs.py +43 -28
  43. letta/server/rest_api/routers/v1/sources.py +23 -5
  44. letta/server/rest_api/routers/v1/tools.py +42 -0
  45. letta/server/rest_api/streaming_response.py +9 -1
  46. letta/server/server.py +2 -1
  47. letta/services/agent_manager.py +39 -59
  48. letta/services/agent_serialization_manager.py +22 -8
  49. letta/services/archive_manager.py +60 -9
  50. letta/services/block_manager.py +5 -0
  51. letta/services/file_processor/embedder/base_embedder.py +5 -0
  52. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  53. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  54. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  55. letta/services/file_processor/file_processor.py +9 -7
  56. letta/services/group_manager.py +74 -11
  57. letta/services/mcp_manager.py +132 -26
  58. letta/services/message_manager.py +229 -125
  59. letta/services/passage_manager.py +2 -1
  60. letta/services/source_manager.py +23 -1
  61. letta/services/summarizer/summarizer.py +2 -0
  62. letta/services/tool_executor/core_tool_executor.py +2 -120
  63. letta/services/tool_executor/files_tool_executor.py +133 -8
  64. letta/settings.py +6 -0
  65. letta/utils.py +34 -1
  66. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
  67. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
  68. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
  69. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
  70. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0
@@ -11,17 +11,16 @@ from letta.orm.agent import Agent as AgentModel
11
11
  from letta.orm.errors import NoResultFound
12
12
  from letta.orm.message import Message as MessageModel
13
13
  from letta.otel.tracing import trace_method
14
- from letta.schemas.embedding_config import EmbeddingConfig
15
14
  from letta.schemas.enums import MessageRole
16
15
  from letta.schemas.letta_message import LettaMessageUpdateUnion
17
16
  from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType, TextContent
18
- from letta.schemas.message import Message as PydanticMessage, MessageUpdate
17
+ from letta.schemas.message import Message as PydanticMessage, MessageSearchResult, MessageUpdate
19
18
  from letta.schemas.user import User as PydanticUser
20
19
  from letta.server.db import db_registry
21
20
  from letta.services.file_manager import FileManager
22
21
  from letta.services.helpers.agent_manager_helper import validate_agent_exists_async
23
22
  from letta.settings import DatabaseChoice, settings
24
- from letta.utils import enforce_types
23
+ from letta.utils import enforce_types, fire_and_forget
25
24
 
26
25
  logger = get_logger(__name__)
27
26
 
@@ -101,7 +100,7 @@ class MessageManager:
101
100
  args = json.loads(tool_call.function.arguments)
102
101
  actual_message = args.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
103
102
 
104
- return json.dumps({"thinking": content_str, "message": actual_message})
103
+ return json.dumps({"thinking": content_str, "content": actual_message})
105
104
  except (json.JSONDecodeError, KeyError):
106
105
  # fallback if parsing fails
107
106
  pass
@@ -314,8 +313,9 @@ class MessageManager:
314
313
  self,
315
314
  pydantic_msgs: List[PydanticMessage],
316
315
  actor: PydanticUser,
317
- embedding_config: Optional[EmbeddingConfig] = None,
318
316
  strict_mode: bool = False,
317
+ project_id: Optional[str] = None,
318
+ template_id: Optional[str] = None,
319
319
  ) -> List[PydanticMessage]:
320
320
  """
321
321
  Create multiple messages in a single database transaction asynchronously.
@@ -323,7 +323,9 @@ class MessageManager:
323
323
  Args:
324
324
  pydantic_msgs: List of Pydantic message models to create
325
325
  actor: User performing the action
326
- embedding_config: Optional embedding configuration to enable message embedding in Turbopuffer
326
+ strict_mode: If True, wait for embedding to complete; if False, run in background
327
+ project_id: Optional project ID for the messages (for Turbopuffer indexing)
328
+ template_id: Optional template ID for the messages (for Turbopuffer indexing)
327
329
 
328
330
  Returns:
329
331
  List of created Pydantic message models
@@ -362,60 +364,81 @@ class MessageManager:
362
364
  result = [msg.to_pydantic() for msg in created_messages]
363
365
  await session.commit()
364
366
 
365
- # embed messages in turbopuffer if enabled and embedding_config provided
366
- from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
367
-
368
- if should_use_tpuf_for_messages() and embedding_config and result:
369
- try:
370
- # extract agent_id from the first message (all should have same agent_id)
371
- agent_id = result[0].agent_id
372
- if agent_id:
373
- # extract text content from each message
374
- message_texts = []
375
- message_ids = []
376
- roles = []
377
- created_ats = []
378
- # combine assistant+tool messages before embedding
379
- combined_messages = self._combine_assistant_tool_messages(result)
380
-
381
- for msg in combined_messages:
382
- text = self._extract_message_text(msg).strip()
383
- if text: # only embed messages with text content (role filtering is handled in _extract_message_text)
384
- message_texts.append(text)
385
- message_ids.append(msg.id)
386
- roles.append(msg.role)
387
- created_ats.append(msg.created_at)
388
-
389
- if message_texts:
390
- # generate embeddings using provided config
391
- from letta.llm_api.llm_client import LLMClient
392
-
393
- embedding_client = LLMClient.create(
394
- provider_type=embedding_config.embedding_endpoint_type,
395
- actor=actor,
396
- )
397
- embeddings = await embedding_client.request_embeddings(message_texts, embedding_config)
398
-
399
- # insert to turbopuffer
400
- tpuf_client = TurbopufferClient()
401
- await tpuf_client.insert_messages(
402
- agent_id=agent_id,
403
- message_texts=message_texts,
404
- embeddings=embeddings,
405
- message_ids=message_ids,
406
- organization_id=actor.organization_id,
407
- roles=roles,
408
- created_ats=created_ats,
409
- )
410
- logger.info(f"Successfully embedded {len(message_texts)} messages for agent {agent_id}")
411
- except Exception as e:
412
- logger.error(f"Failed to embed messages in Turbopuffer: {e}")
367
+ # embed messages in turbopuffer if enabled
368
+ from letta.helpers.tpuf_client import should_use_tpuf_for_messages
413
369
 
370
+ if should_use_tpuf_for_messages() and result:
371
+ # extract agent_id from the first message (all should have same agent_id)
372
+ agent_id = result[0].agent_id
373
+ if agent_id:
414
374
  if strict_mode:
415
- raise # Re-raise the exception in strict mode
375
+ # wait for embedding to complete
376
+ await self._embed_messages_background(result, actor, agent_id, project_id, template_id)
377
+ else:
378
+ # fire and forget - run embedding in background
379
+ fire_and_forget(
380
+ self._embed_messages_background(result, actor, agent_id, project_id, template_id),
381
+ task_name=f"embed_messages_for_agent_{agent_id}",
382
+ )
416
383
 
417
384
  return result
418
385
 
386
+ async def _embed_messages_background(
387
+ self,
388
+ messages: List[PydanticMessage],
389
+ actor: PydanticUser,
390
+ agent_id: str,
391
+ project_id: Optional[str] = None,
392
+ template_id: Optional[str] = None,
393
+ ) -> None:
394
+ """Background task to embed and store messages in Turbopuffer.
395
+
396
+ Args:
397
+ messages: List of messages to embed
398
+ actor: User performing the action
399
+ agent_id: Agent ID for the messages
400
+ project_id: Optional project ID for the messages
401
+ template_id: Optional template ID for the messages
402
+ """
403
+ try:
404
+ from letta.helpers.tpuf_client import TurbopufferClient
405
+
406
+ # extract text content from each message
407
+ message_texts = []
408
+ message_ids = []
409
+ roles = []
410
+ created_ats = []
411
+
412
+ # combine assistant+tool messages before embedding
413
+ combined_messages = self._combine_assistant_tool_messages(messages)
414
+
415
+ for msg in combined_messages:
416
+ text = self._extract_message_text(msg).strip()
417
+ if text: # only embed messages with text content (role filtering is handled in _extract_message_text)
418
+ message_texts.append(text)
419
+ message_ids.append(msg.id)
420
+ roles.append(msg.role)
421
+ created_ats.append(msg.created_at)
422
+
423
+ if message_texts:
424
+ # insert to turbopuffer - TurbopufferClient will generate embeddings internally
425
+ tpuf_client = TurbopufferClient()
426
+ await tpuf_client.insert_messages(
427
+ agent_id=agent_id,
428
+ message_texts=message_texts,
429
+ message_ids=message_ids,
430
+ organization_id=actor.organization_id,
431
+ actor=actor,
432
+ roles=roles,
433
+ created_ats=created_ats,
434
+ project_id=project_id,
435
+ template_id=template_id,
436
+ )
437
+ logger.info(f"Successfully embedded {len(message_texts)} messages for agent {agent_id}")
438
+ except Exception as e:
439
+ logger.error(f"Failed to embed messages in Turbopuffer for agent {agent_id}: {e}")
440
+ # don't re-raise the exception in background mode - just log it
441
+
419
442
  @enforce_types
420
443
  @trace_method
421
444
  def update_message_by_letta_message(
@@ -519,12 +542,21 @@ class MessageManager:
519
542
  message_id: str,
520
543
  message_update: MessageUpdate,
521
544
  actor: PydanticUser,
522
- embedding_config: Optional[EmbeddingConfig] = None,
523
545
  strict_mode: bool = False,
546
+ project_id: Optional[str] = None,
547
+ template_id: Optional[str] = None,
524
548
  ) -> PydanticMessage:
525
549
  """
526
550
  Updates an existing record in the database with values from the provided record object.
527
551
  Async version of the function above.
552
+
553
+ Args:
554
+ message_id: ID of the message to update
555
+ message_update: Update data for the message
556
+ actor: User performing the action
557
+ strict_mode: If True, wait for embedding update to complete; if False, run in background
558
+ project_id: Optional project ID for the message (for Turbopuffer indexing)
559
+ template_id: Optional template ID for the message (for Turbopuffer indexing)
528
560
  """
529
561
  async with db_registry.async_session() as session:
530
562
  # Fetch existing message from database
@@ -540,48 +572,62 @@ class MessageManager:
540
572
  await session.commit()
541
573
 
542
574
  # update message in turbopuffer if enabled (delete and re-insert)
543
- from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
544
-
545
- if should_use_tpuf_for_messages() and embedding_config and pydantic_message.agent_id:
546
- try:
547
- # extract text content from updated message
548
- text = self._extract_message_text(pydantic_message)
575
+ from letta.helpers.tpuf_client import should_use_tpuf_for_messages
549
576
 
550
- # only update in turbopuffer if there's text content (role filtering is handled in _extract_message_text)
551
- if text:
552
- tpuf_client = TurbopufferClient()
577
+ if should_use_tpuf_for_messages() and pydantic_message.agent_id:
578
+ # extract text content from updated message
579
+ text = self._extract_message_text(pydantic_message)
553
580
 
554
- # delete old message from turbopuffer
555
- await tpuf_client.delete_messages(
556
- agent_id=pydantic_message.agent_id, organization_id=actor.organization_id, message_ids=[message_id]
581
+ # only update in turbopuffer if there's text content
582
+ if text:
583
+ if strict_mode:
584
+ # wait for embedding update to complete
585
+ await self._update_message_embedding_background(pydantic_message, text, actor, project_id, template_id)
586
+ else:
587
+ # fire and forget - run embedding update in background
588
+ fire_and_forget(
589
+ self._update_message_embedding_background(pydantic_message, text, actor, project_id, template_id),
590
+ task_name=f"update_message_embedding_{message_id}",
557
591
  )
558
592
 
559
- # generate new embedding
560
- from letta.llm_api.llm_client import LLMClient
593
+ return pydantic_message
561
594
 
562
- embedding_client = LLMClient.create(
563
- provider_type=embedding_config.embedding_endpoint_type,
564
- actor=actor,
565
- )
566
- embeddings = await embedding_client.request_embeddings([text], embedding_config)
567
-
568
- # re-insert with updated content
569
- await tpuf_client.insert_messages(
570
- agent_id=pydantic_message.agent_id,
571
- message_texts=[text],
572
- embeddings=embeddings,
573
- message_ids=[message_id],
574
- organization_id=actor.organization_id,
575
- roles=[pydantic_message.role],
576
- created_ats=[pydantic_message.created_at],
577
- )
578
- logger.info(f"Successfully updated message {message_id} in Turbopuffer")
579
- except Exception as e:
580
- logger.error(f"Failed to update message in Turbopuffer: {e}")
581
- if strict_mode:
582
- raise # Re-raise the exception in strict mode
595
+ async def _update_message_embedding_background(
596
+ self, message: PydanticMessage, text: str, actor: PydanticUser, project_id: Optional[str] = None, template_id: Optional[str] = None
597
+ ) -> None:
598
+ """Background task to update a message's embedding in Turbopuffer.
583
599
 
584
- return pydantic_message
600
+ Args:
601
+ message: The updated message
602
+ text: Extracted text content from the message
603
+ actor: User performing the action
604
+ project_id: Optional project ID for the message
605
+ template_id: Optional template ID for the message
606
+ """
607
+ try:
608
+ from letta.helpers.tpuf_client import TurbopufferClient
609
+
610
+ tpuf_client = TurbopufferClient()
611
+
612
+ # delete old message from turbopuffer
613
+ await tpuf_client.delete_messages(agent_id=message.agent_id, organization_id=actor.organization_id, message_ids=[message.id])
614
+
615
+ # re-insert with updated content - TurbopufferClient will generate embeddings internally
616
+ await tpuf_client.insert_messages(
617
+ agent_id=message.agent_id,
618
+ message_texts=[text],
619
+ message_ids=[message.id],
620
+ organization_id=actor.organization_id,
621
+ actor=actor,
622
+ roles=[message.role],
623
+ created_ats=[message.created_at],
624
+ project_id=project_id,
625
+ template_id=template_id,
626
+ )
627
+ logger.info(f"Successfully updated message {message.id} in Turbopuffer")
628
+ except Exception as e:
629
+ logger.error(f"Failed to update message {message.id} in Turbopuffer: {e}")
630
+ # don't re-raise the exception in background mode - just log it
585
631
 
586
632
  def _update_message_by_id_impl(
587
633
  self, message_id: str, message_update: MessageUpdate, actor: PydanticUser, message: MessageModel
@@ -1058,13 +1104,13 @@ class MessageManager:
1058
1104
  agent_id: str,
1059
1105
  actor: PydanticUser,
1060
1106
  query_text: Optional[str] = None,
1061
- query_embedding: Optional[List[float]] = None,
1062
1107
  search_mode: str = "hybrid",
1063
1108
  roles: Optional[List[MessageRole]] = None,
1109
+ project_id: Optional[str] = None,
1110
+ template_id: Optional[str] = None,
1064
1111
  limit: int = 50,
1065
1112
  start_date: Optional[datetime] = None,
1066
1113
  end_date: Optional[datetime] = None,
1067
- embedding_config: Optional[EmbeddingConfig] = None,
1068
1114
  ) -> List[Tuple[PydanticMessage, dict]]:
1069
1115
  """
1070
1116
  Search messages using Turbopuffer if enabled, otherwise fall back to SQL search.
@@ -1072,14 +1118,14 @@ class MessageManager:
1072
1118
  Args:
1073
1119
  agent_id: ID of the agent whose messages to search
1074
1120
  actor: User performing the search
1075
- query_text: Text query for full-text search
1076
- query_embedding: Optional pre-computed embedding for vector search
1121
+ query_text: Text query (used for embedding in vector/hybrid modes, and FTS in fts/hybrid modes)
1077
1122
  search_mode: "vector", "fts", "hybrid", or "timestamp" (default: "hybrid")
1078
1123
  roles: Optional list of message roles to filter by
1124
+ project_id: Optional project ID to filter messages by
1125
+ template_id: Optional template ID to filter messages by
1079
1126
  limit: Maximum number of results to return
1080
1127
  start_date: Optional filter for messages created after this date
1081
- end_date: Optional filter for messages created before this date
1082
- embedding_config: Optional embedding configuration for generating query embedding
1128
+ end_date: Optional filter for messages created on or before this date (inclusive)
1083
1129
 
1084
1130
  Returns:
1085
1131
  List of tuples (message, metadata) where metadata contains relevance scores
@@ -1089,40 +1135,18 @@ class MessageManager:
1089
1135
  # check if we should use turbopuffer
1090
1136
  if should_use_tpuf_for_messages():
1091
1137
  try:
1092
- # generate embedding if needed and not provided
1093
- if search_mode in ["vector", "hybrid"] and query_embedding is None and query_text:
1094
- if not embedding_config:
1095
- # fall back to SQL search if no embedding config
1096
- logger.warning("No embedding config provided for vector search, falling back to SQL")
1097
- return await self.list_messages_for_agent_async(
1098
- agent_id=agent_id,
1099
- actor=actor,
1100
- query_text=query_text,
1101
- roles=roles,
1102
- limit=limit,
1103
- ascending=False,
1104
- )
1105
-
1106
- # generate embedding from query text
1107
- from letta.llm_api.llm_client import LLMClient
1108
-
1109
- embedding_client = LLMClient.create(
1110
- provider_type=embedding_config.embedding_endpoint_type,
1111
- actor=actor,
1112
- )
1113
- embeddings = await embedding_client.request_embeddings([query_text], embedding_config)
1114
- query_embedding = embeddings[0]
1115
-
1116
- # use turbopuffer for search
1138
+ # use turbopuffer for search - TurbopufferClient will generate embeddings internally
1117
1139
  tpuf_client = TurbopufferClient()
1118
- results = await tpuf_client.query_messages(
1140
+ results = await tpuf_client.query_messages_by_agent_id(
1119
1141
  agent_id=agent_id,
1120
1142
  organization_id=actor.organization_id,
1121
- query_embedding=query_embedding,
1143
+ actor=actor,
1122
1144
  query_text=query_text,
1123
1145
  search_mode=search_mode,
1124
1146
  top_k=limit,
1125
1147
  roles=roles,
1148
+ project_id=project_id,
1149
+ template_id=template_id,
1126
1150
  start_date=start_date,
1127
1151
  end_date=end_date,
1128
1152
  )
@@ -1194,3 +1218,83 @@ class MessageManager:
1194
1218
  }
1195
1219
  message_tuples.append((message, metadata))
1196
1220
  return message_tuples
1221
+
1222
+ async def search_messages_org_async(
1223
+ self,
1224
+ actor: PydanticUser,
1225
+ query_text: Optional[str] = None,
1226
+ search_mode: str = "hybrid",
1227
+ roles: Optional[List[MessageRole]] = None,
1228
+ project_id: Optional[str] = None,
1229
+ template_id: Optional[str] = None,
1230
+ limit: int = 50,
1231
+ start_date: Optional[datetime] = None,
1232
+ end_date: Optional[datetime] = None,
1233
+ ) -> List[MessageSearchResult]:
1234
+ """
1235
+ Search messages across entire organization using Turbopuffer.
1236
+
1237
+ Args:
1238
+ actor: User performing the search (must have org access)
1239
+ query_text: Text query for full-text search
1240
+ search_mode: "vector", "fts", or "hybrid" (default: "hybrid")
1241
+ roles: Optional list of message roles to filter by
1242
+ project_id: Optional project ID to filter messages by
1243
+ template_id: Optional template ID to filter messages by
1244
+ limit: Maximum number of results to return
1245
+ start_date: Optional filter for messages created after this date
1246
+ end_date: Optional filter for messages created on or before this date (inclusive)
1247
+
1248
+ Returns:
1249
+ List of MessageSearchResult objects with scoring details
1250
+
1251
+ Raises:
1252
+ ValueError: If message embedding or Turbopuffer is not enabled
1253
+ """
1254
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
1255
+
1256
+ # check if turbopuffer is enabled
1257
+ # TODO: extend to non-Turbopuffer in the future.
1258
+ if not should_use_tpuf_for_messages():
1259
+ raise ValueError("Message search requires message embedding, OpenAI, and Turbopuffer to be enabled.")
1260
+
1261
+ # use turbopuffer for search - TurbopufferClient will generate embeddings internally
1262
+ tpuf_client = TurbopufferClient()
1263
+ results = await tpuf_client.query_messages_by_org_id(
1264
+ organization_id=actor.organization_id,
1265
+ actor=actor,
1266
+ query_text=query_text,
1267
+ search_mode=search_mode,
1268
+ top_k=limit,
1269
+ roles=roles,
1270
+ project_id=project_id,
1271
+ template_id=template_id,
1272
+ start_date=start_date,
1273
+ end_date=end_date,
1274
+ )
1275
+
1276
+ # convert results to MessageSearchResult objects
1277
+ if not results:
1278
+ return []
1279
+
1280
+ # create message mapping
1281
+ message_ids = []
1282
+ embedded_text = {}
1283
+ for msg_dict, _, _ in results:
1284
+ message_ids.append(msg_dict["id"])
1285
+ embedded_text[msg_dict["id"]] = msg_dict["text"]
1286
+ messages = await self.get_messages_by_ids_async(message_ids=message_ids, actor=actor)
1287
+ message_mapping = {message.id: message for message in messages}
1288
+
1289
+ # create search results using list comprehension
1290
+ return [
1291
+ MessageSearchResult(
1292
+ embedded_text=embedded_text[msg_id],
1293
+ message=message_mapping[msg_id],
1294
+ fts_rank=metadata.get("fts_rank"),
1295
+ vector_rank=metadata.get("vector_rank"),
1296
+ rrf_score=rrf_score,
1297
+ )
1298
+ for msg_dict, rrf_score, metadata in results
1299
+ if (msg_id := msg_dict.get("id")) in message_mapping
1300
+ ]
@@ -623,12 +623,13 @@ class PassageManager:
623
623
  passage_texts = [p.text for p in passages]
624
624
 
625
625
  # Insert to Turbopuffer with the same IDs as SQL
626
+ # TurbopufferClient will generate embeddings internally using default config
626
627
  await tpuf_client.insert_archival_memories(
627
628
  archive_id=archive.id,
628
629
  text_chunks=passage_texts,
629
- embeddings=embeddings,
630
630
  passage_ids=passage_ids, # Use same IDs as SQL
631
631
  organization_id=actor.organization_id,
632
+ actor=actor,
632
633
  tags=tags,
633
634
  created_at=passages[0].created_at if passages else None,
634
635
  )
@@ -3,12 +3,15 @@ from typing import List, Optional, Union
3
3
 
4
4
  from sqlalchemy import and_, exists, select
5
5
 
6
+ from letta.helpers.pinecone_utils import should_use_pinecone
7
+ from letta.helpers.tpuf_client import should_use_tpuf
6
8
  from letta.orm import Agent as AgentModel
7
9
  from letta.orm.errors import NoResultFound
8
10
  from letta.orm.source import Source as SourceModel
9
11
  from letta.orm.sources_agents import SourcesAgents
10
12
  from letta.otel.tracing import trace_method
11
13
  from letta.schemas.agent import AgentState as PydanticAgentState
14
+ from letta.schemas.enums import VectorDBProvider
12
15
  from letta.schemas.source import Source as PydanticSource, SourceUpdate
13
16
  from letta.schemas.user import User as PydanticUser
14
17
  from letta.server.db import db_registry
@@ -16,6 +19,18 @@ from letta.utils import enforce_types, printd
16
19
 
17
20
 
18
21
  class SourceManager:
22
+ def _get_vector_db_provider(self) -> VectorDBProvider:
23
+ """
24
+ determine which vector db provider to use based on configuration.
25
+ turbopuffer takes precedence when available.
26
+ """
27
+ if should_use_tpuf():
28
+ return VectorDBProvider.TPUF
29
+ elif should_use_pinecone():
30
+ return VectorDBProvider.PINECONE
31
+ else:
32
+ return VectorDBProvider.NATIVE
33
+
19
34
  """Manager class to handle business logic related to Sources."""
20
35
 
21
36
  @trace_method
@@ -50,9 +65,12 @@ class SourceManager:
50
65
  if db_source:
51
66
  return db_source
52
67
  else:
68
+ vector_db_provider = self._get_vector_db_provider()
69
+
53
70
  async with db_registry.async_session() as session:
54
71
  # Provide default embedding config if not given
55
72
  source.organization_id = actor.organization_id
73
+ source.vector_db_provider = vector_db_provider
56
74
  source = SourceModel(**source.model_dump(to_orm=True, exclude_none=True))
57
75
  await source.create_async(session, actor=actor)
58
76
  return source.to_pydantic()
@@ -91,6 +109,10 @@ class SourceManager:
91
109
  Returns:
92
110
  List of created/updated sources
93
111
  """
112
+ vector_db_provider = self._get_vector_db_provider()
113
+ for pydantic_source in pydantic_sources:
114
+ pydantic_source.vector_db_provider = vector_db_provider
115
+
94
116
  if not pydantic_sources:
95
117
  return []
96
118
 
@@ -164,7 +186,7 @@ class SourceManager:
164
186
  # update existing source
165
187
  from letta.schemas.source import SourceUpdate
166
188
 
167
- update_data = source.model_dump(exclude={"id"}, exclude_none=True)
189
+ update_data = source.model_dump(exclude={"id", "vector_db_provider"}, exclude_none=True)
168
190
  updated_source = await self.update_source(existing_source.id, SourceUpdate(**update_data), actor)
169
191
  sources.append(updated_source)
170
192
  else:
@@ -195,6 +195,8 @@ class Summarizer:
195
195
  await self.message_manager.create_many_messages_async(
196
196
  pydantic_msgs=[summary_message_obj],
197
197
  actor=self.actor,
198
+ project_id=agent_state.project_id,
199
+ template_id=agent_state.template_id,
198
200
  )
199
201
 
200
202
  updated_in_context_messages = all_in_context_messages[assistant_message_index:]