letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -14
  3. letta/agents/base_agent.py +18 -0
  4. letta/agents/helpers.py +32 -7
  5. letta/agents/letta_agent.py +953 -762
  6. letta/agents/voice_agent.py +1 -1
  7. letta/client/streaming.py +0 -1
  8. letta/constants.py +11 -8
  9. letta/errors.py +9 -0
  10. letta/functions/function_sets/base.py +77 -69
  11. letta/functions/function_sets/builtin.py +41 -22
  12. letta/functions/function_sets/multi_agent.py +1 -2
  13. letta/functions/schema_generator.py +0 -1
  14. letta/helpers/converters.py +8 -3
  15. letta/helpers/datetime_helpers.py +5 -4
  16. letta/helpers/message_helper.py +1 -2
  17. letta/helpers/pinecone_utils.py +0 -1
  18. letta/helpers/tool_rule_solver.py +10 -0
  19. letta/helpers/tpuf_client.py +848 -0
  20. letta/interface.py +8 -8
  21. letta/interfaces/anthropic_streaming_interface.py +7 -0
  22. letta/interfaces/openai_streaming_interface.py +29 -6
  23. letta/llm_api/anthropic_client.py +188 -18
  24. letta/llm_api/azure_client.py +0 -1
  25. letta/llm_api/bedrock_client.py +1 -2
  26. letta/llm_api/deepseek_client.py +319 -5
  27. letta/llm_api/google_vertex_client.py +75 -17
  28. letta/llm_api/groq_client.py +0 -1
  29. letta/llm_api/helpers.py +2 -2
  30. letta/llm_api/llm_api_tools.py +1 -50
  31. letta/llm_api/llm_client.py +6 -8
  32. letta/llm_api/mistral.py +1 -1
  33. letta/llm_api/openai.py +16 -13
  34. letta/llm_api/openai_client.py +31 -16
  35. letta/llm_api/together_client.py +0 -1
  36. letta/llm_api/xai_client.py +0 -1
  37. letta/local_llm/chat_completion_proxy.py +7 -6
  38. letta/local_llm/settings/settings.py +1 -1
  39. letta/orm/__init__.py +1 -0
  40. letta/orm/agent.py +8 -6
  41. letta/orm/archive.py +9 -1
  42. letta/orm/block.py +3 -4
  43. letta/orm/block_history.py +3 -1
  44. letta/orm/group.py +2 -3
  45. letta/orm/identity.py +1 -2
  46. letta/orm/job.py +1 -2
  47. letta/orm/llm_batch_items.py +1 -2
  48. letta/orm/message.py +8 -4
  49. letta/orm/mixins.py +18 -0
  50. letta/orm/organization.py +2 -0
  51. letta/orm/passage.py +8 -1
  52. letta/orm/passage_tag.py +55 -0
  53. letta/orm/sandbox_config.py +1 -3
  54. letta/orm/step.py +1 -2
  55. letta/orm/tool.py +1 -0
  56. letta/otel/resource.py +2 -2
  57. letta/plugins/plugins.py +1 -1
  58. letta/prompts/prompt_generator.py +10 -2
  59. letta/schemas/agent.py +11 -0
  60. letta/schemas/archive.py +4 -0
  61. letta/schemas/block.py +13 -0
  62. letta/schemas/embedding_config.py +0 -1
  63. letta/schemas/enums.py +24 -7
  64. letta/schemas/group.py +12 -0
  65. letta/schemas/letta_message.py +55 -1
  66. letta/schemas/letta_message_content.py +28 -0
  67. letta/schemas/letta_request.py +21 -4
  68. letta/schemas/letta_stop_reason.py +9 -1
  69. letta/schemas/llm_config.py +24 -8
  70. letta/schemas/mcp.py +0 -3
  71. letta/schemas/memory.py +14 -0
  72. letta/schemas/message.py +245 -141
  73. letta/schemas/openai/chat_completion_request.py +2 -1
  74. letta/schemas/passage.py +1 -0
  75. letta/schemas/providers/bedrock.py +1 -1
  76. letta/schemas/providers/openai.py +2 -2
  77. letta/schemas/tool.py +11 -5
  78. letta/schemas/tool_execution_result.py +0 -1
  79. letta/schemas/tool_rule.py +71 -0
  80. letta/serialize_schemas/marshmallow_agent.py +1 -2
  81. letta/server/rest_api/app.py +3 -3
  82. letta/server/rest_api/auth/index.py +0 -1
  83. letta/server/rest_api/interface.py +3 -11
  84. letta/server/rest_api/redis_stream_manager.py +3 -4
  85. letta/server/rest_api/routers/v1/agents.py +143 -84
  86. letta/server/rest_api/routers/v1/blocks.py +1 -1
  87. letta/server/rest_api/routers/v1/folders.py +1 -1
  88. letta/server/rest_api/routers/v1/groups.py +23 -22
  89. letta/server/rest_api/routers/v1/internal_templates.py +68 -0
  90. letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
  91. letta/server/rest_api/routers/v1/sources.py +1 -1
  92. letta/server/rest_api/routers/v1/tools.py +167 -15
  93. letta/server/rest_api/streaming_response.py +4 -3
  94. letta/server/rest_api/utils.py +75 -18
  95. letta/server/server.py +24 -35
  96. letta/services/agent_manager.py +359 -45
  97. letta/services/agent_serialization_manager.py +23 -3
  98. letta/services/archive_manager.py +72 -3
  99. letta/services/block_manager.py +1 -2
  100. letta/services/context_window_calculator/token_counter.py +11 -6
  101. letta/services/file_manager.py +1 -3
  102. letta/services/files_agents_manager.py +2 -4
  103. letta/services/group_manager.py +73 -12
  104. letta/services/helpers/agent_manager_helper.py +5 -5
  105. letta/services/identity_manager.py +8 -3
  106. letta/services/job_manager.py +2 -14
  107. letta/services/llm_batch_manager.py +1 -3
  108. letta/services/mcp/base_client.py +1 -2
  109. letta/services/mcp_manager.py +5 -6
  110. letta/services/message_manager.py +536 -15
  111. letta/services/organization_manager.py +1 -2
  112. letta/services/passage_manager.py +287 -12
  113. letta/services/provider_manager.py +1 -3
  114. letta/services/sandbox_config_manager.py +12 -7
  115. letta/services/source_manager.py +1 -2
  116. letta/services/step_manager.py +0 -1
  117. letta/services/summarizer/summarizer.py +4 -2
  118. letta/services/telemetry_manager.py +1 -3
  119. letta/services/tool_executor/builtin_tool_executor.py +136 -316
  120. letta/services/tool_executor/core_tool_executor.py +231 -74
  121. letta/services/tool_executor/files_tool_executor.py +2 -2
  122. letta/services/tool_executor/mcp_tool_executor.py +0 -1
  123. letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
  124. letta/services/tool_executor/sandbox_tool_executor.py +0 -1
  125. letta/services/tool_executor/tool_execution_sandbox.py +2 -3
  126. letta/services/tool_manager.py +181 -64
  127. letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
  128. letta/services/user_manager.py +1 -2
  129. letta/settings.py +5 -3
  130. letta/streaming_interface.py +3 -3
  131. letta/system.py +1 -1
  132. letta/utils.py +0 -1
  133. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
  134. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
  135. letta/llm_api/deepseek.py +0 -303
  136. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
  137. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
  138. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
@@ -20,6 +20,16 @@ class BaseToolRule(LettaBase):
20
20
  description="Optional Jinja2 template for generating agent prompt about this tool rule. Template can use variables like 'tool_name' and rule-specific attributes.",
21
21
  )
22
22
 
23
+ def __hash__(self):
24
+ """Base hash using tool_name and type."""
25
+ return hash((self.tool_name, self.type))
26
+
27
+ def __eq__(self, other):
28
+ """Base equality using tool_name and type."""
29
+ if not isinstance(other, BaseToolRule):
30
+ return False
31
+ return self.tool_name == other.tool_name and self.type == other.type
32
+
23
33
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> set[str]:
24
34
  raise NotImplementedError
25
35
 
@@ -54,6 +64,16 @@ class ChildToolRule(BaseToolRule):
54
64
  description="Optional Jinja2 template for generating agent prompt about this tool rule.",
55
65
  )
56
66
 
67
+ def __hash__(self):
68
+ """Hash including children list (sorted for consistency)."""
69
+ return hash((self.tool_name, self.type, tuple(sorted(self.children))))
70
+
71
+ def __eq__(self, other):
72
+ """Equality including children list."""
73
+ if not isinstance(other, ChildToolRule):
74
+ return False
75
+ return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
76
+
57
77
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
58
78
  last_tool = tool_call_history[-1] if tool_call_history else None
59
79
  return set(self.children) if last_tool == self.tool_name else available_tools
@@ -71,6 +91,16 @@ class ParentToolRule(BaseToolRule):
71
91
  description="Optional Jinja2 template for generating agent prompt about this tool rule.",
72
92
  )
73
93
 
94
+ def __hash__(self):
95
+ """Hash including children list (sorted for consistency)."""
96
+ return hash((self.tool_name, self.type, tuple(sorted(self.children))))
97
+
98
+ def __eq__(self, other):
99
+ """Equality including children list."""
100
+ if not isinstance(other, ParentToolRule):
101
+ return False
102
+ return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
103
+
74
104
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
75
105
  last_tool = tool_call_history[-1] if tool_call_history else None
76
106
  return set(self.children) if last_tool == self.tool_name else available_tools - set(self.children)
@@ -90,6 +120,24 @@ class ConditionalToolRule(BaseToolRule):
90
120
  description="Optional Jinja2 template for generating agent prompt about this tool rule.",
91
121
  )
92
122
 
123
+ def __hash__(self):
124
+ """Hash including all configuration fields."""
125
+ # convert dict to sorted tuple of items for consistent hashing
126
+ mapping_items = tuple(sorted(self.child_output_mapping.items()))
127
+ return hash((self.tool_name, self.type, self.default_child, mapping_items, self.require_output_mapping))
128
+
129
+ def __eq__(self, other):
130
+ """Equality including all configuration fields."""
131
+ if not isinstance(other, ConditionalToolRule):
132
+ return False
133
+ return (
134
+ self.tool_name == other.tool_name
135
+ and self.type == other.type
136
+ and self.default_child == other.default_child
137
+ and self.child_output_mapping == other.child_output_mapping
138
+ and self.require_output_mapping == other.require_output_mapping
139
+ )
140
+
93
141
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
94
142
  """Determine valid tools based on function output mapping."""
95
143
  if not tool_call_history or tool_call_history[-1] != self.tool_name:
@@ -203,6 +251,16 @@ class MaxCountPerStepToolRule(BaseToolRule):
203
251
  description="Optional Jinja2 template for generating agent prompt about this tool rule.",
204
252
  )
205
253
 
254
+ def __hash__(self):
255
+ """Hash including max_count_limit."""
256
+ return hash((self.tool_name, self.type, self.max_count_limit))
257
+
258
+ def __eq__(self, other):
259
+ """Equality including max_count_limit."""
260
+ if not isinstance(other, MaxCountPerStepToolRule):
261
+ return False
262
+ return self.tool_name == other.tool_name and self.type == other.type and self.max_count_limit == other.max_count_limit
263
+
206
264
  def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
207
265
  """Restricts the tool if it has been called max_count_limit times in the current step."""
208
266
  count = tool_call_history.count(self.tool_name)
@@ -214,6 +272,18 @@ class MaxCountPerStepToolRule(BaseToolRule):
214
272
  return available_tools
215
273
 
216
274
 
275
+ class RequiresApprovalToolRule(BaseToolRule):
276
+ """
277
+ Represents a tool rule configuration which requires approval before the tool can be invoked.
278
+ """
279
+
280
+ type: Literal[ToolRuleType.requires_approval] = ToolRuleType.requires_approval
281
+
282
+ def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
283
+ """Does not enforce any restrictions on which tools are valid"""
284
+ return available_tools
285
+
286
+
217
287
  ToolRule = Annotated[
218
288
  Union[
219
289
  ChildToolRule,
@@ -224,6 +294,7 @@ ToolRule = Annotated[
224
294
  RequiredBeforeExitToolRule,
225
295
  MaxCountPerStepToolRule,
226
296
  ParentToolRule,
297
+ RequiresApprovalToolRule,
227
298
  ],
228
299
  Field(discriminator="type"),
229
300
  ]
@@ -5,8 +5,7 @@ from sqlalchemy import func
5
5
  from sqlalchemy.orm import sessionmaker
6
6
 
7
7
  import letta
8
- from letta.orm import Agent
9
- from letta.orm import Message as MessageModel
8
+ from letta.orm import Agent, Message as MessageModel
10
9
  from letta.schemas.agent import AgentState as PydanticAgentState
11
10
  from letta.schemas.user import User
12
11
  from letta.serialize_schemas.marshmallow_agent_environment_variable import SerializedAgentEnvironmentVariableSchema
@@ -261,7 +261,7 @@ def create_application() -> "FastAPI":
261
261
 
262
262
  @app.exception_handler(BedrockPermissionError)
263
263
  async def bedrock_permission_error_handler(request, exc: BedrockPermissionError):
264
- logger.error(f"Bedrock permission denied.")
264
+ logger.error("Bedrock permission denied.")
265
265
  if SENTRY_ENABLED:
266
266
  sentry_sdk.capture_exception(exc)
267
267
 
@@ -433,10 +433,10 @@ def start_server(
433
433
  if IS_WINDOWS:
434
434
  # Windows doesn't those the fancy unicode characters
435
435
  print(f"Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
436
- print(f"View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
436
+ print("View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
437
437
  else:
438
438
  print(f"▶ Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
439
- print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
439
+ print("▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
440
440
 
441
441
  if importlib.util.find_spec("granian") is not None and settings.use_granian:
442
442
  # Experimental Granian engine
@@ -22,7 +22,6 @@ class AuthRequest(BaseModel):
22
22
 
23
23
 
24
24
  def setup_auth_router(server: SyncServer, interface: QueuingInterface, password: str) -> APIRouter:
25
-
26
25
  @router.post("/auth", tags=["auth"], response_model=AuthResponse)
27
26
  def authenticate_user(request: AuthRequest) -> AuthResponse:
28
27
  """
@@ -377,9 +377,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
377
377
  ):
378
378
  """Add an item to the deque"""
379
379
  assert self._active, "Generator is inactive"
380
- assert (
381
- isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus)
382
- ), f"Wrong type: {type(item)}"
380
+ assert isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus), (
381
+ f"Wrong type: {type(item)}"
382
+ )
383
383
 
384
384
  self._chunks.append(item)
385
385
  self._event.set() # Signal that new data is available
@@ -731,13 +731,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
731
731
 
732
732
  # If we have main_json, we should output a ToolCallMessage
733
733
  elif updates_main_json:
734
-
735
734
  # If there's something in the function_name buffer, we should release it first
736
735
  # NOTE: we could output it as part of a chunk that has both name and args,
737
736
  # however the frontend may expect name first, then args, so to be
738
737
  # safe we'll output name first in a separate chunk
739
738
  if self.function_name_buffer:
740
-
741
739
  # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
742
740
  if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
743
741
  processed_chunk = None
@@ -778,7 +776,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
778
776
  # If there was nothing in the name buffer, we can proceed to
779
777
  # output the arguments chunk as a ToolCallMessage
780
778
  else:
781
-
782
779
  # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
783
780
  if self.use_assistant_message and (
784
781
  self.last_flushed_function_name is not None
@@ -860,7 +857,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
860
857
  # clear buffers
861
858
  self.function_id_buffer = None
862
859
  else:
863
-
864
860
  # There may be a buffer from a previous chunk, for example
865
861
  # if the previous chunk had arguments but we needed to flush name
866
862
  if self.function_args_buffer:
@@ -997,7 +993,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
997
993
  # Otherwise, do simple chunks of ToolCallMessage
998
994
 
999
995
  else:
1000
-
1001
996
  tool_call_delta = {}
1002
997
  if tool_call.id:
1003
998
  tool_call_delta["id"] = tool_call.id
@@ -1073,7 +1068,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1073
1068
  tool_call = message_delta.tool_calls[0]
1074
1069
 
1075
1070
  if tool_call.function:
1076
-
1077
1071
  # Track the function name while streaming
1078
1072
  # If we were previously on a 'send_message', we need to 'toggle' into 'content' mode
1079
1073
  if tool_call.function.name:
@@ -1154,7 +1148,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1154
1148
  def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
1155
1149
  """Letta generates some internal monologue"""
1156
1150
  if not self.streaming_mode:
1157
-
1158
1151
  # create a fake "chunk" of a stream
1159
1152
  # processed_chunk = {
1160
1153
  # "internal_monologue": msg,
@@ -1268,7 +1261,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1268
1261
  print(f"Failed to parse function message: {e}")
1269
1262
 
1270
1263
  else:
1271
-
1272
1264
  try:
1273
1265
  func_args = parse_json(function_call.function.arguments)
1274
1266
  except:
@@ -140,9 +140,7 @@ class RedisSSEStreamWriter:
140
140
 
141
141
  self.last_flush[run_id] = time.time()
142
142
 
143
- logger.debug(
144
- f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, " f"seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}"
145
- )
143
+ logger.debug(f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}")
146
144
 
147
145
  if chunks[-1].get("complete") == "true":
148
146
  self._cleanup_run(run_id)
@@ -227,7 +225,8 @@ async def create_background_stream_processor(
227
225
  except Exception as e:
228
226
  logger.error(f"Error processing stream for run {run_id}: {e}")
229
227
  # Write error chunk
230
- error_chunk = {"error": {"message": str(e)}}
228
+ # error_chunk = {"error": {"message": str(e)}}
229
+ error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
231
230
  await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
232
231
  finally:
233
232
  if should_stop_writer:
@@ -2,7 +2,7 @@ import asyncio
2
2
  import json
3
3
  import traceback
4
4
  from datetime import datetime, timezone
5
- from typing import Annotated, Any, Dict, List, Optional, Union
5
+ from typing import Annotated, Any, Dict, List, Literal, Optional, Union
6
6
 
7
7
  from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Request, UploadFile, status
8
8
  from fastapi.responses import JSONResponse
@@ -32,9 +32,15 @@ from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
32
32
  from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
33
33
  from letta.schemas.letta_request import LettaAsyncRequest, LettaRequest, LettaStreamingRequest
34
34
  from letta.schemas.letta_response import LettaResponse
35
- from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
35
+ from letta.schemas.memory import (
36
+ ArchivalMemorySearchResponse,
37
+ ArchivalMemorySearchResult,
38
+ ContextWindowOverview,
39
+ CreateArchivalMemory,
40
+ Memory,
41
+ )
36
42
  from letta.schemas.message import MessageCreate
37
- from letta.schemas.passage import Passage, PassageUpdate
43
+ from letta.schemas.passage import Passage
38
44
  from letta.schemas.run import Run
39
45
  from letta.schemas.source import Source
40
46
  from letta.schemas.tool import Tool
@@ -155,8 +161,8 @@ async def export_agent_serialized(
155
161
  server: "SyncServer" = Depends(get_letta_server),
156
162
  actor_id: str | None = Header(None, alias="user_id"),
157
163
  use_legacy_format: bool = Query(
158
- True,
159
- description="If true, exports using the legacy single-agent format. If false, exports using the new multi-entity format.",
164
+ False,
165
+ description="If true, exports using the legacy single-agent format (v1). If false, exports using the new multi-entity format (v2).",
160
166
  ),
161
167
  # do not remove, used to autogeneration of spec
162
168
  # TODO: Think of a better way to export AgentFileSchema
@@ -252,6 +258,7 @@ async def import_agent(
252
258
  project_id: str | None = None,
253
259
  strip_messages: bool = False,
254
260
  env_vars: Optional[dict[str, Any]] = None,
261
+ override_embedding_handle: Optional[str] = None,
255
262
  ) -> List[str]:
256
263
  """
257
264
  Import an agent using the new AgentFileSchema format.
@@ -262,12 +269,19 @@ async def import_agent(
262
269
  raise HTTPException(status_code=422, detail=f"Invalid agent file schema: {e!s}")
263
270
 
264
271
  try:
272
+ if override_embedding_handle:
273
+ embedding_config_override = await server.get_cached_embedding_config_async(actor=actor, handle=override_embedding_handle)
274
+ else:
275
+ embedding_config_override = None
276
+
265
277
  import_result = await server.agent_serialization_manager.import_file(
266
278
  schema=agent_schema,
267
279
  actor=actor,
268
280
  append_copy_suffix=append_copy_suffix,
269
281
  override_existing_tools=override_existing_tools,
270
282
  env_vars=env_vars,
283
+ override_embedding_config=embedding_config_override,
284
+ project_id=project_id,
271
285
  )
272
286
 
273
287
  if not import_result.success:
@@ -296,11 +310,16 @@ async def import_agent_serialized(
296
310
  file: UploadFile = File(...),
297
311
  server: "SyncServer" = Depends(get_letta_server),
298
312
  actor_id: str | None = Header(None, alias="user_id"),
313
+ x_override_embedding_model: str | None = Header(None, alias="x-override-embedding-model"),
299
314
  append_copy_suffix: bool = Form(True, description='If set to True, appends "_copy" to the end of the agent name.'),
300
315
  override_existing_tools: bool = Form(
301
316
  True,
302
317
  description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
303
318
  ),
319
+ override_embedding_handle: Optional[str] = Form(
320
+ None,
321
+ description="Override import with specific embedding handle.",
322
+ ),
304
323
  project_id: str | None = Form(None, description="The project ID to associate the uploaded agent with."),
305
324
  strip_messages: bool = Form(
306
325
  False,
@@ -333,6 +352,9 @@ async def import_agent_serialized(
333
352
  if not isinstance(env_vars, dict):
334
353
  raise HTTPException(status_code=400, detail="env_vars_json must be a valid JSON string")
335
354
 
355
+ # Prioritize header over form data for override_embedding_handle
356
+ final_override_embedding_handle = x_override_embedding_model or override_embedding_handle
357
+
336
358
  # Check if the JSON is AgentFileSchema or AgentSchema
337
359
  # TODO: This is kind of hacky, but should work as long as dont' change the schema
338
360
  if "agents" in agent_json and isinstance(agent_json.get("agents"), list):
@@ -346,6 +368,7 @@ async def import_agent_serialized(
346
368
  project_id=project_id,
347
369
  strip_messages=strip_messages,
348
370
  env_vars=env_vars,
371
+ override_embedding_handle=final_override_embedding_handle,
349
372
  )
350
373
  else:
351
374
  # This is a legacy AgentSchema
@@ -464,6 +487,25 @@ async def detach_tool(
464
487
  return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
465
488
 
466
489
 
490
+ @router.patch("/{agent_id}/tools/approval/{tool_name}", response_model=AgentState, operation_id="modify_approval")
491
+ async def modify_approval(
492
+ agent_id: str,
493
+ tool_name: str,
494
+ requires_approval: bool,
495
+ server: "SyncServer" = Depends(get_letta_server),
496
+ actor_id: str | None = Header(None, alias="user_id"),
497
+ ):
498
+ """
499
+ Attach a tool to an agent.
500
+ """
501
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
502
+ await server.agent_manager.modify_approvals_async(
503
+ agent_id=agent_id, tool_name=tool_name, requires_approval=requires_approval, actor=actor
504
+ )
505
+ # TODO: Unfortunately we need this to preserve our current API behavior
506
+ return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
507
+
508
+
467
509
  @router.patch("/{agent_id}/sources/attach/{source_id}", response_model=AgentState, operation_id="attach_source_to_agent")
468
510
  async def attach_source(
469
511
  agent_id: str,
@@ -937,22 +979,62 @@ async def create_passage(
937
979
  """
938
980
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
939
981
 
940
- return await server.insert_archival_memory_async(agent_id=agent_id, memory_contents=request.text, actor=actor)
982
+ return await server.insert_archival_memory_async(
983
+ agent_id=agent_id, memory_contents=request.text, actor=actor, tags=request.tags, created_at=request.created_at
984
+ )
941
985
 
942
986
 
943
- @router.patch("/{agent_id}/archival-memory/{memory_id}", response_model=list[Passage], operation_id="modify_passage")
944
- def modify_passage(
987
+ @router.get("/{agent_id}/archival-memory/search", response_model=ArchivalMemorySearchResponse, operation_id="search_archival_memory")
988
+ async def search_archival_memory(
945
989
  agent_id: str,
946
- memory_id: str,
947
- passage: PassageUpdate = Body(...),
990
+ query: str = Query(..., description="String to search for using semantic similarity"),
991
+ tags: Optional[List[str]] = Query(None, description="Optional list of tags to filter search results"),
992
+ tag_match_mode: Literal["any", "all"] = Query(
993
+ "any", description="How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags"
994
+ ),
995
+ top_k: Optional[int] = Query(None, description="Maximum number of results to return. Uses system default if not specified"),
996
+ start_datetime: Optional[datetime] = Query(None, description="Filter results to passages created after this datetime"),
997
+ end_datetime: Optional[datetime] = Query(None, description="Filter results to passages created before this datetime"),
948
998
  server: "SyncServer" = Depends(get_letta_server),
949
- actor_id: str | None = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
999
+ actor_id: str | None = Header(None, alias="user_id"),
950
1000
  ):
951
1001
  """
952
- Modify a memory in the agent's archival memory store.
1002
+ Search archival memory using semantic (embedding-based) search with optional temporal filtering.
1003
+
1004
+ This endpoint allows manual triggering of archival memory searches, enabling users to query
1005
+ an agent's archival memory store directly via the API. The search uses the same functionality
1006
+ as the agent's archival_memory_search tool but is accessible for external API usage.
953
1007
  """
954
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
955
- return server.modify_archival_memory(agent_id=agent_id, memory_id=memory_id, passage=passage, actor=actor)
1008
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1009
+
1010
+ try:
1011
+ # convert datetime to string in ISO 8601 format
1012
+ start_datetime = start_datetime.isoformat() if start_datetime else None
1013
+ end_datetime = end_datetime.isoformat() if end_datetime else None
1014
+
1015
+ # Use the shared agent manager method
1016
+ formatted_results, count = await server.agent_manager.search_agent_archival_memory_async(
1017
+ agent_id=agent_id,
1018
+ actor=actor,
1019
+ query=query,
1020
+ tags=tags,
1021
+ tag_match_mode=tag_match_mode,
1022
+ top_k=top_k,
1023
+ start_datetime=start_datetime,
1024
+ end_datetime=end_datetime,
1025
+ )
1026
+
1027
+ # Convert to proper response schema
1028
+ search_results = [ArchivalMemorySearchResult(**result) for result in formatted_results]
1029
+
1030
+ return ArchivalMemorySearchResponse(results=search_results, count=count)
1031
+
1032
+ except NoResultFound as e:
1033
+ raise HTTPException(status_code=404, detail=f"Agent with id={agent_id} not found for user_id={actor.id}.")
1034
+ except ValueError as e:
1035
+ raise HTTPException(status_code=400, detail=str(e))
1036
+ except Exception as e:
1037
+ raise HTTPException(status_code=500, detail=f"Internal server error during archival memory search: {str(e)}")
956
1038
 
957
1039
 
958
1040
  # TODO(ethan): query or path parameter for memory_id?
@@ -1049,6 +1131,8 @@ async def send_message(
1049
1131
  Process a user message and return the agent's response.
1050
1132
  This endpoint accepts a message from a user and processes it through the agent.
1051
1133
  """
1134
+ if len(request.messages) == 0:
1135
+ raise ValueError("Messages must not be empty")
1052
1136
  request_start_timestamp_ns = get_utc_timestamp_ns()
1053
1137
  MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
1054
1138
 
@@ -1067,6 +1151,7 @@ async def send_message(
1067
1151
  "azure",
1068
1152
  "xai",
1069
1153
  "groq",
1154
+ "deepseek",
1070
1155
  ]
1071
1156
 
1072
1157
  # Create a new run for execution tracking
@@ -1197,6 +1282,9 @@ async def send_message_streaming(
1197
1282
  request_start_timestamp_ns = get_utc_timestamp_ns()
1198
1283
  MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
1199
1284
 
1285
+ # TODO (cliandy): clean this up
1286
+ redis_client = await get_redis_client()
1287
+
1200
1288
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1201
1289
  # TODO: This is redundant, remove soon
1202
1290
  agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
@@ -1212,8 +1300,9 @@ async def send_message_streaming(
1212
1300
  "azure",
1213
1301
  "xai",
1214
1302
  "groq",
1303
+ "deepseek",
1215
1304
  ]
1216
- model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"]
1305
+ model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
1217
1306
 
1218
1307
  # Create a new job for execution tracking
1219
1308
  if settings.track_agent_run:
@@ -1236,14 +1325,11 @@ async def send_message_streaming(
1236
1325
  ),
1237
1326
  actor=actor,
1238
1327
  )
1328
+ job_update_metadata = None
1329
+ await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
1239
1330
  else:
1240
1331
  run = None
1241
1332
 
1242
- job_update_metadata = None
1243
- # TODO (cliandy): clean this up
1244
- redis_client = await get_redis_client()
1245
- await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
1246
-
1247
1333
  try:
1248
1334
  if agent_eligible and model_compatible:
1249
1335
  if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1281,6 +1367,23 @@ async def send_message_streaming(
1281
1367
  ),
1282
1368
  )
1283
1369
 
1370
+ if request.stream_tokens and model_compatible_token_streaming:
1371
+ raw_stream = agent_loop.step_stream(
1372
+ input_messages=request.messages,
1373
+ max_steps=request.max_steps,
1374
+ use_assistant_message=request.use_assistant_message,
1375
+ request_start_timestamp_ns=request_start_timestamp_ns,
1376
+ include_return_message_types=request.include_return_message_types,
1377
+ )
1378
+ else:
1379
+ raw_stream = agent_loop.step_stream_no_tokens(
1380
+ request.messages,
1381
+ max_steps=request.max_steps,
1382
+ use_assistant_message=request.use_assistant_message,
1383
+ request_start_timestamp_ns=request_start_timestamp_ns,
1384
+ include_return_message_types=request.include_return_message_types,
1385
+ )
1386
+
1284
1387
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
1285
1388
 
1286
1389
  if request.background and settings.track_agent_run:
@@ -1294,23 +1397,6 @@ async def send_message_streaming(
1294
1397
  ),
1295
1398
  )
1296
1399
 
1297
- if request.stream_tokens and model_compatible_token_streaming:
1298
- raw_stream = agent_loop.step_stream(
1299
- input_messages=request.messages,
1300
- max_steps=request.max_steps,
1301
- use_assistant_message=request.use_assistant_message,
1302
- request_start_timestamp_ns=request_start_timestamp_ns,
1303
- include_return_message_types=request.include_return_message_types,
1304
- )
1305
- else:
1306
- raw_stream = agent_loop.step_stream_no_tokens(
1307
- request.messages,
1308
- max_steps=request.max_steps,
1309
- use_assistant_message=request.use_assistant_message,
1310
- request_start_timestamp_ns=request_start_timestamp_ns,
1311
- include_return_message_types=request.include_return_message_types,
1312
- )
1313
-
1314
1400
  asyncio.create_task(
1315
1401
  create_background_stream_processor(
1316
1402
  stream_generator=raw_stream,
@@ -1319,55 +1405,21 @@ async def send_message_streaming(
1319
1405
  )
1320
1406
  )
1321
1407
 
1322
- stream = redis_sse_stream_generator(
1408
+ raw_stream = redis_sse_stream_generator(
1323
1409
  redis_client=redis_client,
1324
1410
  run_id=run.id,
1325
1411
  )
1326
1412
 
1327
- if request.include_pings and settings.enable_keepalive:
1328
- stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
1329
-
1330
- return StreamingResponseWithStatusCode(
1331
- stream,
1332
- media_type="text/event-stream",
1333
- )
1334
-
1335
- if request.stream_tokens and model_compatible_token_streaming:
1336
- raw_stream = agent_loop.step_stream(
1337
- input_messages=request.messages,
1338
- max_steps=request.max_steps,
1339
- use_assistant_message=request.use_assistant_message,
1340
- request_start_timestamp_ns=request_start_timestamp_ns,
1341
- include_return_message_types=request.include_return_message_types,
1342
- )
1343
- # Conditionally wrap with keepalive based on request parameter
1344
- if request.include_pings and settings.enable_keepalive:
1345
- stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
1346
- else:
1347
- stream = raw_stream
1348
-
1349
- result = StreamingResponseWithStatusCode(
1350
- stream,
1351
- media_type="text/event-stream",
1352
- )
1413
+ # Conditionally wrap with keepalive based on request parameter
1414
+ if request.include_pings and settings.enable_keepalive:
1415
+ stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
1353
1416
  else:
1354
- raw_stream = agent_loop.step_stream_no_tokens(
1355
- request.messages,
1356
- max_steps=request.max_steps,
1357
- use_assistant_message=request.use_assistant_message,
1358
- request_start_timestamp_ns=request_start_timestamp_ns,
1359
- include_return_message_types=request.include_return_message_types,
1360
- )
1361
- # Conditionally wrap with keepalive based on request parameter
1362
- if request.include_pings and settings.enable_keepalive:
1363
- stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
1364
- else:
1365
- stream = raw_stream
1366
-
1367
- result = StreamingResponseWithStatusCode(
1368
- stream,
1369
- media_type="text/event-stream",
1370
- )
1417
+ stream = raw_stream
1418
+
1419
+ result = StreamingResponseWithStatusCode(
1420
+ stream,
1421
+ media_type="text/event-stream",
1422
+ )
1371
1423
  else:
1372
1424
  result = await server.send_message_to_agent(
1373
1425
  agent_id=agent_id,
@@ -1382,11 +1434,13 @@ async def send_message_streaming(
1382
1434
  request_start_timestamp_ns=request_start_timestamp_ns,
1383
1435
  include_return_message_types=request.include_return_message_types,
1384
1436
  )
1385
- job_status = JobStatus.running
1437
+ if settings.track_agent_run:
1438
+ job_status = JobStatus.running
1386
1439
  return result
1387
1440
  except Exception as e:
1388
- job_update_metadata = {"error": str(e)}
1389
- job_status = JobStatus.failed
1441
+ if settings.track_agent_run:
1442
+ job_update_metadata = {"error": str(e)}
1443
+ job_status = JobStatus.failed
1390
1444
  raise
1391
1445
  finally:
1392
1446
  if settings.track_agent_run:
@@ -1469,7 +1523,10 @@ async def _process_message_background(
1469
1523
  "google_vertex",
1470
1524
  "bedrock",
1471
1525
  "ollama",
1526
+ "azure",
1527
+ "xai",
1472
1528
  "groq",
1529
+ "deepseek",
1473
1530
  ]
1474
1531
  if agent_eligible and model_compatible:
1475
1532
  if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1660,6 +1717,7 @@ async def preview_raw_payload(
1660
1717
  "azure",
1661
1718
  "xai",
1662
1719
  "groq",
1720
+ "deepseek",
1663
1721
  ]
1664
1722
 
1665
1723
  if agent_eligible and model_compatible:
@@ -1731,6 +1789,7 @@ async def summarize_agent_conversation(
1731
1789
  "azure",
1732
1790
  "xai",
1733
1791
  "groq",
1792
+ "deepseek",
1734
1793
  ]
1735
1794
 
1736
1795
  if agent_eligible and model_compatible:
@@ -34,7 +34,7 @@ async def list_blocks(
34
34
  ),
35
35
  label_search: Optional[str] = Query(
36
36
  None,
37
- description=("Search blocks by label. If provided, returns blocks that match this label. " "This is a full-text search on labels."),
37
+ description=("Search blocks by label. If provided, returns blocks that match this label. This is a full-text search on labels."),
38
38
  ),
39
39
  description_search: Optional[str] = Query(
40
40
  None,