letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -14
  3. letta/agents/base_agent.py +18 -0
  4. letta/agents/helpers.py +32 -7
  5. letta/agents/letta_agent.py +953 -762
  6. letta/agents/voice_agent.py +1 -1
  7. letta/client/streaming.py +0 -1
  8. letta/constants.py +11 -8
  9. letta/errors.py +9 -0
  10. letta/functions/function_sets/base.py +77 -69
  11. letta/functions/function_sets/builtin.py +41 -22
  12. letta/functions/function_sets/multi_agent.py +1 -2
  13. letta/functions/schema_generator.py +0 -1
  14. letta/helpers/converters.py +8 -3
  15. letta/helpers/datetime_helpers.py +5 -4
  16. letta/helpers/message_helper.py +1 -2
  17. letta/helpers/pinecone_utils.py +0 -1
  18. letta/helpers/tool_rule_solver.py +10 -0
  19. letta/helpers/tpuf_client.py +848 -0
  20. letta/interface.py +8 -8
  21. letta/interfaces/anthropic_streaming_interface.py +7 -0
  22. letta/interfaces/openai_streaming_interface.py +29 -6
  23. letta/llm_api/anthropic_client.py +188 -18
  24. letta/llm_api/azure_client.py +0 -1
  25. letta/llm_api/bedrock_client.py +1 -2
  26. letta/llm_api/deepseek_client.py +319 -5
  27. letta/llm_api/google_vertex_client.py +75 -17
  28. letta/llm_api/groq_client.py +0 -1
  29. letta/llm_api/helpers.py +2 -2
  30. letta/llm_api/llm_api_tools.py +1 -50
  31. letta/llm_api/llm_client.py +6 -8
  32. letta/llm_api/mistral.py +1 -1
  33. letta/llm_api/openai.py +16 -13
  34. letta/llm_api/openai_client.py +31 -16
  35. letta/llm_api/together_client.py +0 -1
  36. letta/llm_api/xai_client.py +0 -1
  37. letta/local_llm/chat_completion_proxy.py +7 -6
  38. letta/local_llm/settings/settings.py +1 -1
  39. letta/orm/__init__.py +1 -0
  40. letta/orm/agent.py +8 -6
  41. letta/orm/archive.py +9 -1
  42. letta/orm/block.py +3 -4
  43. letta/orm/block_history.py +3 -1
  44. letta/orm/group.py +2 -3
  45. letta/orm/identity.py +1 -2
  46. letta/orm/job.py +1 -2
  47. letta/orm/llm_batch_items.py +1 -2
  48. letta/orm/message.py +8 -4
  49. letta/orm/mixins.py +18 -0
  50. letta/orm/organization.py +2 -0
  51. letta/orm/passage.py +8 -1
  52. letta/orm/passage_tag.py +55 -0
  53. letta/orm/sandbox_config.py +1 -3
  54. letta/orm/step.py +1 -2
  55. letta/orm/tool.py +1 -0
  56. letta/otel/resource.py +2 -2
  57. letta/plugins/plugins.py +1 -1
  58. letta/prompts/prompt_generator.py +10 -2
  59. letta/schemas/agent.py +11 -0
  60. letta/schemas/archive.py +4 -0
  61. letta/schemas/block.py +13 -0
  62. letta/schemas/embedding_config.py +0 -1
  63. letta/schemas/enums.py +24 -7
  64. letta/schemas/group.py +12 -0
  65. letta/schemas/letta_message.py +55 -1
  66. letta/schemas/letta_message_content.py +28 -0
  67. letta/schemas/letta_request.py +21 -4
  68. letta/schemas/letta_stop_reason.py +9 -1
  69. letta/schemas/llm_config.py +24 -8
  70. letta/schemas/mcp.py +0 -3
  71. letta/schemas/memory.py +14 -0
  72. letta/schemas/message.py +245 -141
  73. letta/schemas/openai/chat_completion_request.py +2 -1
  74. letta/schemas/passage.py +1 -0
  75. letta/schemas/providers/bedrock.py +1 -1
  76. letta/schemas/providers/openai.py +2 -2
  77. letta/schemas/tool.py +11 -5
  78. letta/schemas/tool_execution_result.py +0 -1
  79. letta/schemas/tool_rule.py +71 -0
  80. letta/serialize_schemas/marshmallow_agent.py +1 -2
  81. letta/server/rest_api/app.py +3 -3
  82. letta/server/rest_api/auth/index.py +0 -1
  83. letta/server/rest_api/interface.py +3 -11
  84. letta/server/rest_api/redis_stream_manager.py +3 -4
  85. letta/server/rest_api/routers/v1/agents.py +143 -84
  86. letta/server/rest_api/routers/v1/blocks.py +1 -1
  87. letta/server/rest_api/routers/v1/folders.py +1 -1
  88. letta/server/rest_api/routers/v1/groups.py +23 -22
  89. letta/server/rest_api/routers/v1/internal_templates.py +68 -0
  90. letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
  91. letta/server/rest_api/routers/v1/sources.py +1 -1
  92. letta/server/rest_api/routers/v1/tools.py +167 -15
  93. letta/server/rest_api/streaming_response.py +4 -3
  94. letta/server/rest_api/utils.py +75 -18
  95. letta/server/server.py +24 -35
  96. letta/services/agent_manager.py +359 -45
  97. letta/services/agent_serialization_manager.py +23 -3
  98. letta/services/archive_manager.py +72 -3
  99. letta/services/block_manager.py +1 -2
  100. letta/services/context_window_calculator/token_counter.py +11 -6
  101. letta/services/file_manager.py +1 -3
  102. letta/services/files_agents_manager.py +2 -4
  103. letta/services/group_manager.py +73 -12
  104. letta/services/helpers/agent_manager_helper.py +5 -5
  105. letta/services/identity_manager.py +8 -3
  106. letta/services/job_manager.py +2 -14
  107. letta/services/llm_batch_manager.py +1 -3
  108. letta/services/mcp/base_client.py +1 -2
  109. letta/services/mcp_manager.py +5 -6
  110. letta/services/message_manager.py +536 -15
  111. letta/services/organization_manager.py +1 -2
  112. letta/services/passage_manager.py +287 -12
  113. letta/services/provider_manager.py +1 -3
  114. letta/services/sandbox_config_manager.py +12 -7
  115. letta/services/source_manager.py +1 -2
  116. letta/services/step_manager.py +0 -1
  117. letta/services/summarizer/summarizer.py +4 -2
  118. letta/services/telemetry_manager.py +1 -3
  119. letta/services/tool_executor/builtin_tool_executor.py +136 -316
  120. letta/services/tool_executor/core_tool_executor.py +231 -74
  121. letta/services/tool_executor/files_tool_executor.py +2 -2
  122. letta/services/tool_executor/mcp_tool_executor.py +0 -1
  123. letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
  124. letta/services/tool_executor/sandbox_tool_executor.py +0 -1
  125. letta/services/tool_executor/tool_execution_sandbox.py +2 -3
  126. letta/services/tool_manager.py +181 -64
  127. letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
  128. letta/services/user_manager.py +1 -2
  129. letta/settings.py +5 -3
  130. letta/streaming_interface.py +3 -3
  131. letta/system.py +1 -1
  132. letta/utils.py +0 -1
  133. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
  134. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
  135. letta/llm_api/deepseek.py +0 -303
  136. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
  137. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
  138. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,21 @@
1
1
  import json
2
2
  import uuid
3
- from typing import List, Optional, Sequence
3
+ from datetime import datetime
4
+ from typing import List, Optional, Sequence, Tuple
4
5
 
5
6
  from sqlalchemy import delete, exists, func, select, text
6
7
 
8
+ from letta.constants import CONVERSATION_SEARCH_TOOL_NAME, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
7
9
  from letta.log import get_logger
8
10
  from letta.orm.agent import Agent as AgentModel
9
11
  from letta.orm.errors import NoResultFound
10
12
  from letta.orm.message import Message as MessageModel
11
13
  from letta.otel.tracing import trace_method
14
+ from letta.schemas.embedding_config import EmbeddingConfig
12
15
  from letta.schemas.enums import MessageRole
13
16
  from letta.schemas.letta_message import LettaMessageUpdateUnion
14
- from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType
15
- from letta.schemas.message import Message as PydanticMessage
16
- from letta.schemas.message import MessageUpdate
17
+ from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType, TextContent
18
+ from letta.schemas.message import Message as PydanticMessage, MessageUpdate
17
19
  from letta.schemas.user import User as PydanticUser
18
20
  from letta.server.db import db_registry
19
21
  from letta.services.file_manager import FileManager
@@ -31,6 +33,188 @@ class MessageManager:
31
33
  """Initialize the MessageManager."""
32
34
  self.file_manager = FileManager()
33
35
 
36
+ def _extract_message_text(self, message: PydanticMessage) -> str:
37
+ """Extract text content from a message's complex content structure.
38
+
39
+ Only extracts text from searchable message roles (assistant, user, tool).
40
+ Returns JSON format for all message types for consistency.
41
+
42
+ Args:
43
+ message: The message to extract text from
44
+
45
+ Returns:
46
+ JSON string with message content, or empty string for non-searchable roles
47
+ """
48
+ # only extract text from searchable roles
49
+ if message.role not in [MessageRole.assistant, MessageRole.user, MessageRole.tool]:
50
+ return ""
51
+
52
+ # skip tool messages related to send_message and conversation_search entirely
53
+ if message.role == MessageRole.tool and message.name in [DEFAULT_MESSAGE_TOOL, CONVERSATION_SEARCH_TOOL_NAME]:
54
+ return ""
55
+
56
+ if not message.content:
57
+ return ""
58
+
59
+ # extract raw content text
60
+ if isinstance(message.content, str):
61
+ content_str = message.content
62
+ else:
63
+ text_parts = []
64
+ for content_item in message.content:
65
+ text = content_item.to_text()
66
+ if text:
67
+ text_parts.append(text)
68
+ content_str = " ".join(text_parts)
69
+
70
+ # skip heartbeat messages entirely
71
+ try:
72
+ if content_str.strip().startswith("{"):
73
+ parsed_content = json.loads(content_str)
74
+ if isinstance(parsed_content, dict) and parsed_content.get("type") == "heartbeat":
75
+ return ""
76
+ except (json.JSONDecodeError, ValueError):
77
+ pass
78
+
79
+ # format everything as JSON
80
+ if message.role == MessageRole.user:
81
+ # check if content_str is already valid JSON to avoid double nesting
82
+ try:
83
+ # if it's already valid JSON, return as-is
84
+ json.loads(content_str)
85
+ return content_str
86
+ except (json.JSONDecodeError, ValueError):
87
+ # if not valid JSON, wrap it
88
+ return json.dumps({"content": content_str})
89
+
90
+ elif message.role == MessageRole.assistant and message.tool_calls:
91
+ # skip assistant messages that call conversation_search
92
+ for tool_call in message.tool_calls:
93
+ if tool_call.function.name == CONVERSATION_SEARCH_TOOL_NAME:
94
+ return ""
95
+
96
+ # check if any tool call is send_message
97
+ for tool_call in message.tool_calls:
98
+ if tool_call.function.name == DEFAULT_MESSAGE_TOOL:
99
+ # extract the actual message from tool call arguments
100
+ try:
101
+ args = json.loads(tool_call.function.arguments)
102
+ actual_message = args.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
103
+
104
+ return json.dumps({"thinking": content_str, "message": actual_message})
105
+ except (json.JSONDecodeError, KeyError):
106
+ # fallback if parsing fails
107
+ pass
108
+
109
+ # default for other messages (tool responses, assistant without send_message)
110
+ # check if content_str is already valid JSON to avoid double nesting
111
+ if message.role == MessageRole.assistant:
112
+ try:
113
+ # if it's already valid JSON, return as-is
114
+ json.loads(content_str)
115
+ return content_str
116
+ except (json.JSONDecodeError, ValueError):
117
+ # if not valid JSON, wrap it
118
+ return json.dumps({"content": content_str})
119
+ else:
120
+ # for tool messages and others, wrap in content
121
+ return json.dumps({"content": content_str})
122
+
123
+ def _combine_assistant_tool_messages(self, messages: List[PydanticMessage]) -> List[PydanticMessage]:
124
+ """Combine assistant messages with their corresponding tool results when IDs match.
125
+
126
+ Args:
127
+ messages: List of messages to process
128
+
129
+ Returns:
130
+ List of messages with assistant+tool combinations merged
131
+ """
132
+ from letta.constants import DEFAULT_MESSAGE_TOOL
133
+
134
+ combined_messages = []
135
+ i = 0
136
+
137
+ while i < len(messages):
138
+ current_msg = messages[i]
139
+
140
+ # skip heartbeat messages
141
+ if self._extract_message_text(current_msg) == "":
142
+ i += 1
143
+ continue
144
+
145
+ # if this is an assistant message with tool calls, look for matching tool response
146
+ if current_msg.role == MessageRole.assistant and current_msg.tool_calls and i + 1 < len(messages):
147
+ next_msg = messages[i + 1]
148
+
149
+ # check if next message is a tool response that matches
150
+ if (
151
+ next_msg.role == MessageRole.tool
152
+ and next_msg.tool_call_id
153
+ and any(tc.id == next_msg.tool_call_id for tc in current_msg.tool_calls)
154
+ ):
155
+ # combine the messages - get raw content to avoid double-processing
156
+ assistant_text = current_msg.content[0].text if current_msg.content else ""
157
+
158
+ # for non-send_message tools, include tool result
159
+ if next_msg.name != DEFAULT_MESSAGE_TOOL:
160
+ tool_result_text = next_msg.content[0].text if next_msg.content else ""
161
+
162
+ # get the tool call that matches this result (we know it exists from the condition above)
163
+ matching_tool_call = next((tc for tc in current_msg.tool_calls if tc.id == next_msg.tool_call_id), None)
164
+
165
+ # format tool call with parameters
166
+ try:
167
+ args = json.loads(matching_tool_call.function.arguments)
168
+ if args:
169
+ # format parameters nicely
170
+ param_strs = [f"{k}={repr(v)}" for k, v in args.items()]
171
+ tool_call_str = f"{matching_tool_call.function.name}({', '.join(param_strs)})"
172
+ else:
173
+ tool_call_str = f"{matching_tool_call.function.name}()"
174
+ except (json.JSONDecodeError, KeyError):
175
+ tool_call_str = f"{matching_tool_call.function.name}()"
176
+
177
+ # format tool result cleanly
178
+ try:
179
+ if tool_result_text.strip().startswith("{"):
180
+ parsed_result = json.loads(tool_result_text)
181
+ if isinstance(parsed_result, dict):
182
+ # extract key information from tool result
183
+ if "message" in parsed_result:
184
+ tool_result_summary = parsed_result["message"]
185
+ elif "status" in parsed_result:
186
+ tool_result_summary = f"Status: {parsed_result['status']}"
187
+ else:
188
+ tool_result_summary = tool_result_text
189
+ else:
190
+ tool_result_summary = tool_result_text
191
+ else:
192
+ tool_result_summary = tool_result_text
193
+ except (json.JSONDecodeError, ValueError):
194
+ tool_result_summary = tool_result_text
195
+
196
+ combined_data = {"thinking": assistant_text, "tool_call": tool_call_str, "tool_result": tool_result_summary}
197
+ combined_text = json.dumps(combined_data)
198
+ else:
199
+ combined_text = assistant_text
200
+
201
+ # create a new combined message
202
+ from letta.schemas.letta_message_content import TextContent
203
+
204
+ combined_message = current_msg.model_copy()
205
+ combined_message.content = [TextContent(text=combined_text)]
206
+ combined_messages.append(combined_message)
207
+
208
+ # skip the tool message since we combined it
209
+ i += 2
210
+ continue
211
+
212
+ # if no combination, add the message as-is
213
+ combined_messages.append(current_msg)
214
+ i += 1
215
+
216
+ return combined_messages
217
+
34
218
  @enforce_types
35
219
  @trace_method
36
220
  def get_message_by_id(self, message_id: str, actor: PydanticUser) -> Optional[PydanticMessage]:
@@ -126,13 +310,20 @@ class MessageManager:
126
310
 
127
311
  @enforce_types
128
312
  @trace_method
129
- async def create_many_messages_async(self, pydantic_msgs: List[PydanticMessage], actor: PydanticUser) -> List[PydanticMessage]:
313
+ async def create_many_messages_async(
314
+ self,
315
+ pydantic_msgs: List[PydanticMessage],
316
+ actor: PydanticUser,
317
+ embedding_config: Optional[EmbeddingConfig] = None,
318
+ strict_mode: bool = False,
319
+ ) -> List[PydanticMessage]:
130
320
  """
131
321
  Create multiple messages in a single database transaction asynchronously.
132
322
 
133
323
  Args:
134
324
  pydantic_msgs: List of Pydantic message models to create
135
325
  actor: User performing the action
326
+ embedding_config: Optional embedding configuration to enable message embedding in Turbopuffer
136
327
 
137
328
  Returns:
138
329
  List of created Pydantic message models
@@ -170,6 +361,59 @@ class MessageManager:
170
361
  created_messages = await MessageModel.batch_create_async(orm_messages, session, actor=actor, no_commit=True, no_refresh=True)
171
362
  result = [msg.to_pydantic() for msg in created_messages]
172
363
  await session.commit()
364
+
365
+ # embed messages in turbopuffer if enabled and embedding_config provided
366
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
367
+
368
+ if should_use_tpuf_for_messages() and embedding_config and result:
369
+ try:
370
+ # extract agent_id from the first message (all should have same agent_id)
371
+ agent_id = result[0].agent_id
372
+ if agent_id:
373
+ # extract text content from each message
374
+ message_texts = []
375
+ message_ids = []
376
+ roles = []
377
+ created_ats = []
378
+ # combine assistant+tool messages before embedding
379
+ combined_messages = self._combine_assistant_tool_messages(result)
380
+
381
+ for msg in combined_messages:
382
+ text = self._extract_message_text(msg).strip()
383
+ if text: # only embed messages with text content (role filtering is handled in _extract_message_text)
384
+ message_texts.append(text)
385
+ message_ids.append(msg.id)
386
+ roles.append(msg.role)
387
+ created_ats.append(msg.created_at)
388
+
389
+ if message_texts:
390
+ # generate embeddings using provided config
391
+ from letta.llm_api.llm_client import LLMClient
392
+
393
+ embedding_client = LLMClient.create(
394
+ provider_type=embedding_config.embedding_endpoint_type,
395
+ actor=actor,
396
+ )
397
+ embeddings = await embedding_client.request_embeddings(message_texts, embedding_config)
398
+
399
+ # insert to turbopuffer
400
+ tpuf_client = TurbopufferClient()
401
+ await tpuf_client.insert_messages(
402
+ agent_id=agent_id,
403
+ message_texts=message_texts,
404
+ embeddings=embeddings,
405
+ message_ids=message_ids,
406
+ organization_id=actor.organization_id,
407
+ roles=roles,
408
+ created_ats=created_ats,
409
+ )
410
+ logger.info(f"Successfully embedded {len(message_texts)} messages for agent {agent_id}")
411
+ except Exception as e:
412
+ logger.error(f"Failed to embed messages in Turbopuffer: {e}")
413
+
414
+ if strict_mode:
415
+ raise # Re-raise the exception in strict mode
416
+
173
417
  return result
174
418
 
175
419
  @enforce_types
@@ -185,9 +429,9 @@ class MessageManager:
185
429
  # modify the tool call for send_message
186
430
  # TODO: fix this if we add parallel tool calls
187
431
  # TODO: note this only works if the AssistantMessage is generated by the standard send_message
188
- assert (
189
- message.tool_calls[0].function.name == "send_message"
190
- ), f"Expected the first tool call to be send_message, but got {message.tool_calls[0].function.name}"
432
+ assert message.tool_calls[0].function.name == "send_message", (
433
+ f"Expected the first tool call to be send_message, but got {message.tool_calls[0].function.name}"
434
+ )
191
435
  original_args = json.loads(message.tool_calls[0].function.arguments)
192
436
  original_args["message"] = letta_message_update.content # override the assistant message
193
437
  update_tool_call = message.tool_calls[0].__deepcopy__()
@@ -224,9 +468,9 @@ class MessageManager:
224
468
  # modify the tool call for send_message
225
469
  # TODO: fix this if we add parallel tool calls
226
470
  # TODO: note this only works if the AssistantMessage is generated by the standard send_message
227
- assert (
228
- message.tool_calls[0].function.name == "send_message"
229
- ), f"Expected the first tool call to be send_message, but got {message.tool_calls[0].function.name}"
471
+ assert message.tool_calls[0].function.name == "send_message", (
472
+ f"Expected the first tool call to be send_message, but got {message.tool_calls[0].function.name}"
473
+ )
230
474
  original_args = json.loads(message.tool_calls[0].function.arguments)
231
475
  original_args["message"] = letta_message_update.content # override the assistant message
232
476
  update_tool_call = message.tool_calls[0].__deepcopy__()
@@ -270,7 +514,14 @@ class MessageManager:
270
514
 
271
515
  @enforce_types
272
516
  @trace_method
273
- async def update_message_by_id_async(self, message_id: str, message_update: MessageUpdate, actor: PydanticUser) -> PydanticMessage:
517
+ async def update_message_by_id_async(
518
+ self,
519
+ message_id: str,
520
+ message_update: MessageUpdate,
521
+ actor: PydanticUser,
522
+ embedding_config: Optional[EmbeddingConfig] = None,
523
+ strict_mode: bool = False,
524
+ ) -> PydanticMessage:
274
525
  """
275
526
  Updates an existing record in the database with values from the provided record object.
276
527
  Async version of the function above.
@@ -287,6 +538,49 @@ class MessageManager:
287
538
  await message.update_async(db_session=session, actor=actor, no_commit=True, no_refresh=True)
288
539
  pydantic_message = message.to_pydantic()
289
540
  await session.commit()
541
+
542
+ # update message in turbopuffer if enabled (delete and re-insert)
543
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
544
+
545
+ if should_use_tpuf_for_messages() and embedding_config and pydantic_message.agent_id:
546
+ try:
547
+ # extract text content from updated message
548
+ text = self._extract_message_text(pydantic_message)
549
+
550
+ # only update in turbopuffer if there's text content (role filtering is handled in _extract_message_text)
551
+ if text:
552
+ tpuf_client = TurbopufferClient()
553
+
554
+ # delete old message from turbopuffer
555
+ await tpuf_client.delete_messages(
556
+ agent_id=pydantic_message.agent_id, organization_id=actor.organization_id, message_ids=[message_id]
557
+ )
558
+
559
+ # generate new embedding
560
+ from letta.llm_api.llm_client import LLMClient
561
+
562
+ embedding_client = LLMClient.create(
563
+ provider_type=embedding_config.embedding_endpoint_type,
564
+ actor=actor,
565
+ )
566
+ embeddings = await embedding_client.request_embeddings([text], embedding_config)
567
+
568
+ # re-insert with updated content
569
+ await tpuf_client.insert_messages(
570
+ agent_id=pydantic_message.agent_id,
571
+ message_texts=[text],
572
+ embeddings=embeddings,
573
+ message_ids=[message_id],
574
+ organization_id=actor.organization_id,
575
+ roles=[pydantic_message.role],
576
+ created_ats=[pydantic_message.created_at],
577
+ )
578
+ logger.info(f"Successfully updated message {message_id} in Turbopuffer")
579
+ except Exception as e:
580
+ logger.error(f"Failed to update message in Turbopuffer: {e}")
581
+ if strict_mode:
582
+ raise # Re-raise the exception in strict mode
583
+
290
584
  return pydantic_message
291
585
 
292
586
  def _update_message_by_id_impl(
@@ -326,6 +620,41 @@ class MessageManager:
326
620
  actor=actor,
327
621
  )
328
622
  msg.hard_delete(session, actor=actor)
623
+ # Note: Turbopuffer deletion requires async, use delete_message_by_id_async for full deletion
624
+ except NoResultFound:
625
+ raise ValueError(f"Message with id {message_id} not found.")
626
+
627
+ @enforce_types
628
+ @trace_method
629
+ async def delete_message_by_id_async(self, message_id: str, actor: PydanticUser, strict_mode: bool = False) -> bool:
630
+ """Delete a message (async version with turbopuffer support)."""
631
+ async with db_registry.async_session() as session:
632
+ try:
633
+ msg = await MessageModel.read_async(
634
+ db_session=session,
635
+ identifier=message_id,
636
+ actor=actor,
637
+ )
638
+ agent_id = msg.agent_id
639
+ await msg.hard_delete_async(session, actor=actor)
640
+
641
+ # delete from turbopuffer if enabled
642
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
643
+
644
+ if should_use_tpuf_for_messages() and agent_id:
645
+ try:
646
+ tpuf_client = TurbopufferClient()
647
+ await tpuf_client.delete_messages(
648
+ agent_id=agent_id, organization_id=actor.organization_id, message_ids=[message_id]
649
+ )
650
+ logger.info(f"Successfully deleted message {message_id} from Turbopuffer")
651
+ except Exception as e:
652
+ logger.error(f"Failed to delete message from Turbopuffer: {e}")
653
+ if strict_mode:
654
+ raise # Re-raise the exception in strict mode
655
+
656
+ return True
657
+
329
658
  except NoResultFound:
330
659
  raise ValueError(f"Message with id {message_id} not found.")
331
660
 
@@ -626,7 +955,9 @@ class MessageManager:
626
955
 
627
956
  @enforce_types
628
957
  @trace_method
629
- async def delete_all_messages_for_agent_async(self, agent_id: str, actor: PydanticUser, exclude_ids: Optional[List[str]] = None) -> int:
958
+ async def delete_all_messages_for_agent_async(
959
+ self, agent_id: str, actor: PydanticUser, exclude_ids: Optional[List[str]] = None, strict_mode: bool = False
960
+ ) -> int:
630
961
  """
631
962
  Efficiently deletes all messages associated with a given agent_id,
632
963
  while enforcing permission checks and avoiding any ORM‑level loads.
@@ -650,12 +981,31 @@ class MessageManager:
650
981
  # 4) commit once
651
982
  await session.commit()
652
983
 
653
- # 5) return the number of rows deleted
984
+ # 5) delete from turbopuffer if enabled
985
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
986
+
987
+ if should_use_tpuf_for_messages():
988
+ try:
989
+ tpuf_client = TurbopufferClient()
990
+ if exclude_ids:
991
+ # if we're excluding some IDs, we can't use delete_all
992
+ # would need to query all messages first then delete specific ones
993
+ # for now, log a warning
994
+ logger.warning(f"Turbopuffer deletion with exclude_ids not fully supported, using delete_all for agent {agent_id}")
995
+ # delete all messages for the agent from turbopuffer
996
+ await tpuf_client.delete_all_messages(agent_id, actor.organization_id)
997
+ logger.info(f"Successfully deleted all messages for agent {agent_id} from Turbopuffer")
998
+ except Exception as e:
999
+ logger.error(f"Failed to delete messages from Turbopuffer: {e}")
1000
+ if strict_mode:
1001
+ raise # Re-raise the exception in strict mode
1002
+
1003
+ # 6) return the number of rows deleted
654
1004
  return result.rowcount
655
1005
 
656
1006
  @enforce_types
657
1007
  @trace_method
658
- async def delete_messages_by_ids_async(self, message_ids: List[str], actor: PydanticUser) -> int:
1008
+ async def delete_messages_by_ids_async(self, message_ids: List[str], actor: PydanticUser, strict_mode: bool = False) -> int:
659
1009
  """
660
1010
  Efficiently deletes messages by their specific IDs,
661
1011
  while enforcing permission checks.
@@ -664,6 +1014,20 @@ class MessageManager:
664
1014
  return 0
665
1015
 
666
1016
  async with db_registry.async_session() as session:
1017
+ # get agent_ids BEFORE deleting (for turbopuffer)
1018
+ agent_ids = []
1019
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
1020
+
1021
+ if should_use_tpuf_for_messages():
1022
+ agent_query = (
1023
+ select(MessageModel.agent_id)
1024
+ .where(MessageModel.id.in_(message_ids))
1025
+ .where(MessageModel.organization_id == actor.organization_id)
1026
+ .distinct()
1027
+ )
1028
+ agent_result = await session.execute(agent_query)
1029
+ agent_ids = [row[0] for row in agent_result.fetchall() if row[0]]
1030
+
667
1031
  # issue a CORE DELETE against the mapped class for specific message IDs
668
1032
  stmt = delete(MessageModel).where(MessageModel.id.in_(message_ids)).where(MessageModel.organization_id == actor.organization_id)
669
1033
  result = await session.execute(stmt)
@@ -671,5 +1035,162 @@ class MessageManager:
671
1035
  # commit once
672
1036
  await session.commit()
673
1037
 
1038
+ # delete from turbopuffer if enabled
1039
+ if should_use_tpuf_for_messages() and agent_ids:
1040
+ try:
1041
+ tpuf_client = TurbopufferClient()
1042
+ # delete from each affected agent's namespace
1043
+ for agent_id in agent_ids:
1044
+ await tpuf_client.delete_messages(agent_id=agent_id, organization_id=actor.organization_id, message_ids=message_ids)
1045
+ logger.info(f"Successfully deleted {len(message_ids)} messages from Turbopuffer")
1046
+ except Exception as e:
1047
+ logger.error(f"Failed to delete messages from Turbopuffer: {e}")
1048
+ if strict_mode:
1049
+ raise # Re-raise the exception in strict mode
1050
+
674
1051
  # return the number of rows deleted
675
1052
  return result.rowcount
1053
+
1054
+ @enforce_types
1055
+ @trace_method
1056
+ async def search_messages_async(
1057
+ self,
1058
+ agent_id: str,
1059
+ actor: PydanticUser,
1060
+ query_text: Optional[str] = None,
1061
+ query_embedding: Optional[List[float]] = None,
1062
+ search_mode: str = "hybrid",
1063
+ roles: Optional[List[MessageRole]] = None,
1064
+ limit: int = 50,
1065
+ start_date: Optional[datetime] = None,
1066
+ end_date: Optional[datetime] = None,
1067
+ embedding_config: Optional[EmbeddingConfig] = None,
1068
+ ) -> List[Tuple[PydanticMessage, dict]]:
1069
+ """
1070
+ Search messages using Turbopuffer if enabled, otherwise fall back to SQL search.
1071
+
1072
+ Args:
1073
+ agent_id: ID of the agent whose messages to search
1074
+ actor: User performing the search
1075
+ query_text: Text query for full-text search
1076
+ query_embedding: Optional pre-computed embedding for vector search
1077
+ search_mode: "vector", "fts", "hybrid", or "timestamp" (default: "hybrid")
1078
+ roles: Optional list of message roles to filter by
1079
+ limit: Maximum number of results to return
1080
+ start_date: Optional filter for messages created after this date
1081
+ end_date: Optional filter for messages created before this date
1082
+ embedding_config: Optional embedding configuration for generating query embedding
1083
+
1084
+ Returns:
1085
+ List of tuples (message, metadata) where metadata contains relevance scores
1086
+ """
1087
+ from letta.helpers.tpuf_client import TurbopufferClient, should_use_tpuf_for_messages
1088
+
1089
+ # check if we should use turbopuffer
1090
+ if should_use_tpuf_for_messages():
1091
+ try:
1092
+ # generate embedding if needed and not provided
1093
+ if search_mode in ["vector", "hybrid"] and query_embedding is None and query_text:
1094
+ if not embedding_config:
1095
+ # fall back to SQL search if no embedding config
1096
+ logger.warning("No embedding config provided for vector search, falling back to SQL")
1097
+ return await self.list_messages_for_agent_async(
1098
+ agent_id=agent_id,
1099
+ actor=actor,
1100
+ query_text=query_text,
1101
+ roles=roles,
1102
+ limit=limit,
1103
+ ascending=False,
1104
+ )
1105
+
1106
+ # generate embedding from query text
1107
+ from letta.llm_api.llm_client import LLMClient
1108
+
1109
+ embedding_client = LLMClient.create(
1110
+ provider_type=embedding_config.embedding_endpoint_type,
1111
+ actor=actor,
1112
+ )
1113
+ embeddings = await embedding_client.request_embeddings([query_text], embedding_config)
1114
+ query_embedding = embeddings[0]
1115
+
1116
+ # use turbopuffer for search
1117
+ tpuf_client = TurbopufferClient()
1118
+ results = await tpuf_client.query_messages(
1119
+ agent_id=agent_id,
1120
+ organization_id=actor.organization_id,
1121
+ query_embedding=query_embedding,
1122
+ query_text=query_text,
1123
+ search_mode=search_mode,
1124
+ top_k=limit,
1125
+ roles=roles,
1126
+ start_date=start_date,
1127
+ end_date=end_date,
1128
+ )
1129
+
1130
+ # create message-like objects using turbopuffer data (which already has properly extracted text)
1131
+ if results:
1132
+ # create simplified message objects from turbopuffer data
1133
+ from letta.schemas.letta_message_content import TextContent
1134
+ from letta.schemas.message import Message as PydanticMessage
1135
+
1136
+ message_tuples = []
1137
+ for msg_dict, score, metadata in results:
1138
+ # create a message object with the properly extracted text from turbopuffer
1139
+ message = PydanticMessage(
1140
+ id=msg_dict["id"],
1141
+ agent_id=agent_id,
1142
+ role=MessageRole(msg_dict["role"]),
1143
+ content=[TextContent(text=msg_dict["text"])],
1144
+ created_at=msg_dict["created_at"],
1145
+ updated_at=msg_dict["created_at"], # use created_at as fallback
1146
+ created_by_id=actor.id,
1147
+ last_updated_by_id=actor.id,
1148
+ )
1149
+ # Return tuple of (message, metadata)
1150
+ message_tuples.append((message, metadata))
1151
+
1152
+ return message_tuples
1153
+ else:
1154
+ return []
1155
+
1156
+ except Exception as e:
1157
+ logger.error(f"Failed to search messages with Turbopuffer, falling back to SQL: {e}")
1158
+ # fall back to SQL search
1159
+ messages = await self.list_messages_for_agent_async(
1160
+ agent_id=agent_id,
1161
+ actor=actor,
1162
+ query_text=query_text,
1163
+ roles=roles,
1164
+ limit=limit,
1165
+ ascending=False,
1166
+ )
1167
+ combined_messages = self._combine_assistant_tool_messages(messages)
1168
+ # Add basic metadata for SQL fallback
1169
+ message_tuples = []
1170
+ for message in combined_messages:
1171
+ metadata = {
1172
+ "search_mode": "sql_fallback",
1173
+ "combined_score": None, # SQL doesn't provide scores
1174
+ }
1175
+ message_tuples.append((message, metadata))
1176
+ return message_tuples
1177
+ else:
1178
+ # use sql-based search
1179
+ messages = await self.list_messages_for_agent_async(
1180
+ agent_id=agent_id,
1181
+ actor=actor,
1182
+ query_text=query_text,
1183
+ roles=roles,
1184
+ limit=limit,
1185
+ ascending=False,
1186
+ )
1187
+ combined_messages = self._combine_assistant_tool_messages(messages)
1188
+ # Add basic metadata for SQL search
1189
+ message_tuples = []
1190
+ for message in combined_messages:
1191
+ metadata = {
1192
+ "search_mode": "sql",
1193
+ "combined_score": None, # SQL doesn't provide scores
1194
+ }
1195
+ message_tuples.append((message, metadata))
1196
+ return message_tuples
@@ -4,8 +4,7 @@ from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
4
4
  from letta.orm.errors import NoResultFound
5
5
  from letta.orm.organization import Organization as OrganizationModel
6
6
  from letta.otel.tracing import trace_method
7
- from letta.schemas.organization import Organization as PydanticOrganization
8
- from letta.schemas.organization import OrganizationUpdate
7
+ from letta.schemas.organization import Organization as PydanticOrganization, OrganizationUpdate
9
8
  from letta.server.db import db_registry
10
9
  from letta.utils import enforce_types
11
10