letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +10 -14
- letta/agents/base_agent.py +18 -0
- letta/agents/helpers.py +32 -7
- letta/agents/letta_agent.py +953 -762
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +0 -1
- letta/constants.py +11 -8
- letta/errors.py +9 -0
- letta/functions/function_sets/base.py +77 -69
- letta/functions/function_sets/builtin.py +41 -22
- letta/functions/function_sets/multi_agent.py +1 -2
- letta/functions/schema_generator.py +0 -1
- letta/helpers/converters.py +8 -3
- letta/helpers/datetime_helpers.py +5 -4
- letta/helpers/message_helper.py +1 -2
- letta/helpers/pinecone_utils.py +0 -1
- letta/helpers/tool_rule_solver.py +10 -0
- letta/helpers/tpuf_client.py +848 -0
- letta/interface.py +8 -8
- letta/interfaces/anthropic_streaming_interface.py +7 -0
- letta/interfaces/openai_streaming_interface.py +29 -6
- letta/llm_api/anthropic_client.py +188 -18
- letta/llm_api/azure_client.py +0 -1
- letta/llm_api/bedrock_client.py +1 -2
- letta/llm_api/deepseek_client.py +319 -5
- letta/llm_api/google_vertex_client.py +75 -17
- letta/llm_api/groq_client.py +0 -1
- letta/llm_api/helpers.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -50
- letta/llm_api/llm_client.py +6 -8
- letta/llm_api/mistral.py +1 -1
- letta/llm_api/openai.py +16 -13
- letta/llm_api/openai_client.py +31 -16
- letta/llm_api/together_client.py +0 -1
- letta/llm_api/xai_client.py +0 -1
- letta/local_llm/chat_completion_proxy.py +7 -6
- letta/local_llm/settings/settings.py +1 -1
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +8 -6
- letta/orm/archive.py +9 -1
- letta/orm/block.py +3 -4
- letta/orm/block_history.py +3 -1
- letta/orm/group.py +2 -3
- letta/orm/identity.py +1 -2
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -2
- letta/orm/message.py +8 -4
- letta/orm/mixins.py +18 -0
- letta/orm/organization.py +2 -0
- letta/orm/passage.py +8 -1
- letta/orm/passage_tag.py +55 -0
- letta/orm/sandbox_config.py +1 -3
- letta/orm/step.py +1 -2
- letta/orm/tool.py +1 -0
- letta/otel/resource.py +2 -2
- letta/plugins/plugins.py +1 -1
- letta/prompts/prompt_generator.py +10 -2
- letta/schemas/agent.py +11 -0
- letta/schemas/archive.py +4 -0
- letta/schemas/block.py +13 -0
- letta/schemas/embedding_config.py +0 -1
- letta/schemas/enums.py +24 -7
- letta/schemas/group.py +12 -0
- letta/schemas/letta_message.py +55 -1
- letta/schemas/letta_message_content.py +28 -0
- letta/schemas/letta_request.py +21 -4
- letta/schemas/letta_stop_reason.py +9 -1
- letta/schemas/llm_config.py +24 -8
- letta/schemas/mcp.py +0 -3
- letta/schemas/memory.py +14 -0
- letta/schemas/message.py +245 -141
- letta/schemas/openai/chat_completion_request.py +2 -1
- letta/schemas/passage.py +1 -0
- letta/schemas/providers/bedrock.py +1 -1
- letta/schemas/providers/openai.py +2 -2
- letta/schemas/tool.py +11 -5
- letta/schemas/tool_execution_result.py +0 -1
- letta/schemas/tool_rule.py +71 -0
- letta/serialize_schemas/marshmallow_agent.py +1 -2
- letta/server/rest_api/app.py +3 -3
- letta/server/rest_api/auth/index.py +0 -1
- letta/server/rest_api/interface.py +3 -11
- letta/server/rest_api/redis_stream_manager.py +3 -4
- letta/server/rest_api/routers/v1/agents.py +143 -84
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/folders.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +23 -22
- letta/server/rest_api/routers/v1/internal_templates.py +68 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
- letta/server/rest_api/routers/v1/sources.py +1 -1
- letta/server/rest_api/routers/v1/tools.py +167 -15
- letta/server/rest_api/streaming_response.py +4 -3
- letta/server/rest_api/utils.py +75 -18
- letta/server/server.py +24 -35
- letta/services/agent_manager.py +359 -45
- letta/services/agent_serialization_manager.py +23 -3
- letta/services/archive_manager.py +72 -3
- letta/services/block_manager.py +1 -2
- letta/services/context_window_calculator/token_counter.py +11 -6
- letta/services/file_manager.py +1 -3
- letta/services/files_agents_manager.py +2 -4
- letta/services/group_manager.py +73 -12
- letta/services/helpers/agent_manager_helper.py +5 -5
- letta/services/identity_manager.py +8 -3
- letta/services/job_manager.py +2 -14
- letta/services/llm_batch_manager.py +1 -3
- letta/services/mcp/base_client.py +1 -2
- letta/services/mcp_manager.py +5 -6
- letta/services/message_manager.py +536 -15
- letta/services/organization_manager.py +1 -2
- letta/services/passage_manager.py +287 -12
- letta/services/provider_manager.py +1 -3
- letta/services/sandbox_config_manager.py +12 -7
- letta/services/source_manager.py +1 -2
- letta/services/step_manager.py +0 -1
- letta/services/summarizer/summarizer.py +4 -2
- letta/services/telemetry_manager.py +1 -3
- letta/services/tool_executor/builtin_tool_executor.py +136 -316
- letta/services/tool_executor/core_tool_executor.py +231 -74
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/mcp_tool_executor.py +0 -1
- letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
- letta/services/tool_executor/sandbox_tool_executor.py +0 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -3
- letta/services/tool_manager.py +181 -64
- letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
- letta/services/user_manager.py +1 -2
- letta/settings.py +5 -3
- letta/streaming_interface.py +3 -3
- letta/system.py +1 -1
- letta/utils.py +0 -1
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
- letta/llm_api/deepseek.py +0 -303
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
letta/schemas/tool_rule.py
CHANGED
@@ -20,6 +20,16 @@ class BaseToolRule(LettaBase):
|
|
20
20
|
description="Optional Jinja2 template for generating agent prompt about this tool rule. Template can use variables like 'tool_name' and rule-specific attributes.",
|
21
21
|
)
|
22
22
|
|
23
|
+
def __hash__(self):
|
24
|
+
"""Base hash using tool_name and type."""
|
25
|
+
return hash((self.tool_name, self.type))
|
26
|
+
|
27
|
+
def __eq__(self, other):
|
28
|
+
"""Base equality using tool_name and type."""
|
29
|
+
if not isinstance(other, BaseToolRule):
|
30
|
+
return False
|
31
|
+
return self.tool_name == other.tool_name and self.type == other.type
|
32
|
+
|
23
33
|
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> set[str]:
|
24
34
|
raise NotImplementedError
|
25
35
|
|
@@ -54,6 +64,16 @@ class ChildToolRule(BaseToolRule):
|
|
54
64
|
description="Optional Jinja2 template for generating agent prompt about this tool rule.",
|
55
65
|
)
|
56
66
|
|
67
|
+
def __hash__(self):
|
68
|
+
"""Hash including children list (sorted for consistency)."""
|
69
|
+
return hash((self.tool_name, self.type, tuple(sorted(self.children))))
|
70
|
+
|
71
|
+
def __eq__(self, other):
|
72
|
+
"""Equality including children list."""
|
73
|
+
if not isinstance(other, ChildToolRule):
|
74
|
+
return False
|
75
|
+
return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
|
76
|
+
|
57
77
|
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
|
58
78
|
last_tool = tool_call_history[-1] if tool_call_history else None
|
59
79
|
return set(self.children) if last_tool == self.tool_name else available_tools
|
@@ -71,6 +91,16 @@ class ParentToolRule(BaseToolRule):
|
|
71
91
|
description="Optional Jinja2 template for generating agent prompt about this tool rule.",
|
72
92
|
)
|
73
93
|
|
94
|
+
def __hash__(self):
|
95
|
+
"""Hash including children list (sorted for consistency)."""
|
96
|
+
return hash((self.tool_name, self.type, tuple(sorted(self.children))))
|
97
|
+
|
98
|
+
def __eq__(self, other):
|
99
|
+
"""Equality including children list."""
|
100
|
+
if not isinstance(other, ParentToolRule):
|
101
|
+
return False
|
102
|
+
return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
|
103
|
+
|
74
104
|
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
|
75
105
|
last_tool = tool_call_history[-1] if tool_call_history else None
|
76
106
|
return set(self.children) if last_tool == self.tool_name else available_tools - set(self.children)
|
@@ -90,6 +120,24 @@ class ConditionalToolRule(BaseToolRule):
|
|
90
120
|
description="Optional Jinja2 template for generating agent prompt about this tool rule.",
|
91
121
|
)
|
92
122
|
|
123
|
+
def __hash__(self):
|
124
|
+
"""Hash including all configuration fields."""
|
125
|
+
# convert dict to sorted tuple of items for consistent hashing
|
126
|
+
mapping_items = tuple(sorted(self.child_output_mapping.items()))
|
127
|
+
return hash((self.tool_name, self.type, self.default_child, mapping_items, self.require_output_mapping))
|
128
|
+
|
129
|
+
def __eq__(self, other):
|
130
|
+
"""Equality including all configuration fields."""
|
131
|
+
if not isinstance(other, ConditionalToolRule):
|
132
|
+
return False
|
133
|
+
return (
|
134
|
+
self.tool_name == other.tool_name
|
135
|
+
and self.type == other.type
|
136
|
+
and self.default_child == other.default_child
|
137
|
+
and self.child_output_mapping == other.child_output_mapping
|
138
|
+
and self.require_output_mapping == other.require_output_mapping
|
139
|
+
)
|
140
|
+
|
93
141
|
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
|
94
142
|
"""Determine valid tools based on function output mapping."""
|
95
143
|
if not tool_call_history or tool_call_history[-1] != self.tool_name:
|
@@ -203,6 +251,16 @@ class MaxCountPerStepToolRule(BaseToolRule):
|
|
203
251
|
description="Optional Jinja2 template for generating agent prompt about this tool rule.",
|
204
252
|
)
|
205
253
|
|
254
|
+
def __hash__(self):
|
255
|
+
"""Hash including max_count_limit."""
|
256
|
+
return hash((self.tool_name, self.type, self.max_count_limit))
|
257
|
+
|
258
|
+
def __eq__(self, other):
|
259
|
+
"""Equality including max_count_limit."""
|
260
|
+
if not isinstance(other, MaxCountPerStepToolRule):
|
261
|
+
return False
|
262
|
+
return self.tool_name == other.tool_name and self.type == other.type and self.max_count_limit == other.max_count_limit
|
263
|
+
|
206
264
|
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
|
207
265
|
"""Restricts the tool if it has been called max_count_limit times in the current step."""
|
208
266
|
count = tool_call_history.count(self.tool_name)
|
@@ -214,6 +272,18 @@ class MaxCountPerStepToolRule(BaseToolRule):
|
|
214
272
|
return available_tools
|
215
273
|
|
216
274
|
|
275
|
+
class RequiresApprovalToolRule(BaseToolRule):
|
276
|
+
"""
|
277
|
+
Represents a tool rule configuration which requires approval before the tool can be invoked.
|
278
|
+
"""
|
279
|
+
|
280
|
+
type: Literal[ToolRuleType.requires_approval] = ToolRuleType.requires_approval
|
281
|
+
|
282
|
+
def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
|
283
|
+
"""Does not enforce any restrictions on which tools are valid"""
|
284
|
+
return available_tools
|
285
|
+
|
286
|
+
|
217
287
|
ToolRule = Annotated[
|
218
288
|
Union[
|
219
289
|
ChildToolRule,
|
@@ -224,6 +294,7 @@ ToolRule = Annotated[
|
|
224
294
|
RequiredBeforeExitToolRule,
|
225
295
|
MaxCountPerStepToolRule,
|
226
296
|
ParentToolRule,
|
297
|
+
RequiresApprovalToolRule,
|
227
298
|
],
|
228
299
|
Field(discriminator="type"),
|
229
300
|
]
|
@@ -5,8 +5,7 @@ from sqlalchemy import func
|
|
5
5
|
from sqlalchemy.orm import sessionmaker
|
6
6
|
|
7
7
|
import letta
|
8
|
-
from letta.orm import Agent
|
9
|
-
from letta.orm import Message as MessageModel
|
8
|
+
from letta.orm import Agent, Message as MessageModel
|
10
9
|
from letta.schemas.agent import AgentState as PydanticAgentState
|
11
10
|
from letta.schemas.user import User
|
12
11
|
from letta.serialize_schemas.marshmallow_agent_environment_variable import SerializedAgentEnvironmentVariableSchema
|
letta/server/rest_api/app.py
CHANGED
@@ -261,7 +261,7 @@ def create_application() -> "FastAPI":
|
|
261
261
|
|
262
262
|
@app.exception_handler(BedrockPermissionError)
|
263
263
|
async def bedrock_permission_error_handler(request, exc: BedrockPermissionError):
|
264
|
-
logger.error(
|
264
|
+
logger.error("Bedrock permission denied.")
|
265
265
|
if SENTRY_ENABLED:
|
266
266
|
sentry_sdk.capture_exception(exc)
|
267
267
|
|
@@ -433,10 +433,10 @@ def start_server(
|
|
433
433
|
if IS_WINDOWS:
|
434
434
|
# Windows doesn't those the fancy unicode characters
|
435
435
|
print(f"Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
|
436
|
-
print(
|
436
|
+
print("View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
|
437
437
|
else:
|
438
438
|
print(f"▶ Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
|
439
|
-
print(
|
439
|
+
print("▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
|
440
440
|
|
441
441
|
if importlib.util.find_spec("granian") is not None and settings.use_granian:
|
442
442
|
# Experimental Granian engine
|
@@ -22,7 +22,6 @@ class AuthRequest(BaseModel):
|
|
22
22
|
|
23
23
|
|
24
24
|
def setup_auth_router(server: SyncServer, interface: QueuingInterface, password: str) -> APIRouter:
|
25
|
-
|
26
25
|
@router.post("/auth", tags=["auth"], response_model=AuthResponse)
|
27
26
|
def authenticate_user(request: AuthRequest) -> AuthResponse:
|
28
27
|
"""
|
@@ -377,9 +377,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
377
377
|
):
|
378
378
|
"""Add an item to the deque"""
|
379
379
|
assert self._active, "Generator is inactive"
|
380
|
-
assert (
|
381
|
-
|
382
|
-
)
|
380
|
+
assert isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus), (
|
381
|
+
f"Wrong type: {type(item)}"
|
382
|
+
)
|
383
383
|
|
384
384
|
self._chunks.append(item)
|
385
385
|
self._event.set() # Signal that new data is available
|
@@ -731,13 +731,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
731
731
|
|
732
732
|
# If we have main_json, we should output a ToolCallMessage
|
733
733
|
elif updates_main_json:
|
734
|
-
|
735
734
|
# If there's something in the function_name buffer, we should release it first
|
736
735
|
# NOTE: we could output it as part of a chunk that has both name and args,
|
737
736
|
# however the frontend may expect name first, then args, so to be
|
738
737
|
# safe we'll output name first in a separate chunk
|
739
738
|
if self.function_name_buffer:
|
740
|
-
|
741
739
|
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
742
740
|
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
743
741
|
processed_chunk = None
|
@@ -778,7 +776,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
778
776
|
# If there was nothing in the name buffer, we can proceed to
|
779
777
|
# output the arguments chunk as a ToolCallMessage
|
780
778
|
else:
|
781
|
-
|
782
779
|
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
783
780
|
if self.use_assistant_message and (
|
784
781
|
self.last_flushed_function_name is not None
|
@@ -860,7 +857,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
860
857
|
# clear buffers
|
861
858
|
self.function_id_buffer = None
|
862
859
|
else:
|
863
|
-
|
864
860
|
# There may be a buffer from a previous chunk, for example
|
865
861
|
# if the previous chunk had arguments but we needed to flush name
|
866
862
|
if self.function_args_buffer:
|
@@ -997,7 +993,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
997
993
|
# Otherwise, do simple chunks of ToolCallMessage
|
998
994
|
|
999
995
|
else:
|
1000
|
-
|
1001
996
|
tool_call_delta = {}
|
1002
997
|
if tool_call.id:
|
1003
998
|
tool_call_delta["id"] = tool_call.id
|
@@ -1073,7 +1068,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
1073
1068
|
tool_call = message_delta.tool_calls[0]
|
1074
1069
|
|
1075
1070
|
if tool_call.function:
|
1076
|
-
|
1077
1071
|
# Track the function name while streaming
|
1078
1072
|
# If we were previously on a 'send_message', we need to 'toggle' into 'content' mode
|
1079
1073
|
if tool_call.function.name:
|
@@ -1154,7 +1148,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
1154
1148
|
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
1155
1149
|
"""Letta generates some internal monologue"""
|
1156
1150
|
if not self.streaming_mode:
|
1157
|
-
|
1158
1151
|
# create a fake "chunk" of a stream
|
1159
1152
|
# processed_chunk = {
|
1160
1153
|
# "internal_monologue": msg,
|
@@ -1268,7 +1261,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
1268
1261
|
print(f"Failed to parse function message: {e}")
|
1269
1262
|
|
1270
1263
|
else:
|
1271
|
-
|
1272
1264
|
try:
|
1273
1265
|
func_args = parse_json(function_call.function.arguments)
|
1274
1266
|
except:
|
@@ -140,9 +140,7 @@ class RedisSSEStreamWriter:
|
|
140
140
|
|
141
141
|
self.last_flush[run_id] = time.time()
|
142
142
|
|
143
|
-
logger.debug(
|
144
|
-
f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, " f"seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}"
|
145
|
-
)
|
143
|
+
logger.debug(f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}")
|
146
144
|
|
147
145
|
if chunks[-1].get("complete") == "true":
|
148
146
|
self._cleanup_run(run_id)
|
@@ -227,7 +225,8 @@ async def create_background_stream_processor(
|
|
227
225
|
except Exception as e:
|
228
226
|
logger.error(f"Error processing stream for run {run_id}: {e}")
|
229
227
|
# Write error chunk
|
230
|
-
error_chunk = {"error": {"message": str(e)}}
|
228
|
+
# error_chunk = {"error": {"message": str(e)}}
|
229
|
+
error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
|
231
230
|
await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
|
232
231
|
finally:
|
233
232
|
if should_stop_writer:
|
@@ -2,7 +2,7 @@ import asyncio
|
|
2
2
|
import json
|
3
3
|
import traceback
|
4
4
|
from datetime import datetime, timezone
|
5
|
-
from typing import Annotated, Any, Dict, List, Optional, Union
|
5
|
+
from typing import Annotated, Any, Dict, List, Literal, Optional, Union
|
6
6
|
|
7
7
|
from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Request, UploadFile, status
|
8
8
|
from fastapi.responses import JSONResponse
|
@@ -32,9 +32,15 @@ from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
|
|
32
32
|
from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
|
33
33
|
from letta.schemas.letta_request import LettaAsyncRequest, LettaRequest, LettaStreamingRequest
|
34
34
|
from letta.schemas.letta_response import LettaResponse
|
35
|
-
from letta.schemas.memory import
|
35
|
+
from letta.schemas.memory import (
|
36
|
+
ArchivalMemorySearchResponse,
|
37
|
+
ArchivalMemorySearchResult,
|
38
|
+
ContextWindowOverview,
|
39
|
+
CreateArchivalMemory,
|
40
|
+
Memory,
|
41
|
+
)
|
36
42
|
from letta.schemas.message import MessageCreate
|
37
|
-
from letta.schemas.passage import Passage
|
43
|
+
from letta.schemas.passage import Passage
|
38
44
|
from letta.schemas.run import Run
|
39
45
|
from letta.schemas.source import Source
|
40
46
|
from letta.schemas.tool import Tool
|
@@ -155,8 +161,8 @@ async def export_agent_serialized(
|
|
155
161
|
server: "SyncServer" = Depends(get_letta_server),
|
156
162
|
actor_id: str | None = Header(None, alias="user_id"),
|
157
163
|
use_legacy_format: bool = Query(
|
158
|
-
|
159
|
-
description="If true, exports using the legacy single-agent format. If false, exports using the new multi-entity format.",
|
164
|
+
False,
|
165
|
+
description="If true, exports using the legacy single-agent format (v1). If false, exports using the new multi-entity format (v2).",
|
160
166
|
),
|
161
167
|
# do not remove, used to autogeneration of spec
|
162
168
|
# TODO: Think of a better way to export AgentFileSchema
|
@@ -252,6 +258,7 @@ async def import_agent(
|
|
252
258
|
project_id: str | None = None,
|
253
259
|
strip_messages: bool = False,
|
254
260
|
env_vars: Optional[dict[str, Any]] = None,
|
261
|
+
override_embedding_handle: Optional[str] = None,
|
255
262
|
) -> List[str]:
|
256
263
|
"""
|
257
264
|
Import an agent using the new AgentFileSchema format.
|
@@ -262,12 +269,19 @@ async def import_agent(
|
|
262
269
|
raise HTTPException(status_code=422, detail=f"Invalid agent file schema: {e!s}")
|
263
270
|
|
264
271
|
try:
|
272
|
+
if override_embedding_handle:
|
273
|
+
embedding_config_override = await server.get_cached_embedding_config_async(actor=actor, handle=override_embedding_handle)
|
274
|
+
else:
|
275
|
+
embedding_config_override = None
|
276
|
+
|
265
277
|
import_result = await server.agent_serialization_manager.import_file(
|
266
278
|
schema=agent_schema,
|
267
279
|
actor=actor,
|
268
280
|
append_copy_suffix=append_copy_suffix,
|
269
281
|
override_existing_tools=override_existing_tools,
|
270
282
|
env_vars=env_vars,
|
283
|
+
override_embedding_config=embedding_config_override,
|
284
|
+
project_id=project_id,
|
271
285
|
)
|
272
286
|
|
273
287
|
if not import_result.success:
|
@@ -296,11 +310,16 @@ async def import_agent_serialized(
|
|
296
310
|
file: UploadFile = File(...),
|
297
311
|
server: "SyncServer" = Depends(get_letta_server),
|
298
312
|
actor_id: str | None = Header(None, alias="user_id"),
|
313
|
+
x_override_embedding_model: str | None = Header(None, alias="x-override-embedding-model"),
|
299
314
|
append_copy_suffix: bool = Form(True, description='If set to True, appends "_copy" to the end of the agent name.'),
|
300
315
|
override_existing_tools: bool = Form(
|
301
316
|
True,
|
302
317
|
description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
|
303
318
|
),
|
319
|
+
override_embedding_handle: Optional[str] = Form(
|
320
|
+
None,
|
321
|
+
description="Override import with specific embedding handle.",
|
322
|
+
),
|
304
323
|
project_id: str | None = Form(None, description="The project ID to associate the uploaded agent with."),
|
305
324
|
strip_messages: bool = Form(
|
306
325
|
False,
|
@@ -333,6 +352,9 @@ async def import_agent_serialized(
|
|
333
352
|
if not isinstance(env_vars, dict):
|
334
353
|
raise HTTPException(status_code=400, detail="env_vars_json must be a valid JSON string")
|
335
354
|
|
355
|
+
# Prioritize header over form data for override_embedding_handle
|
356
|
+
final_override_embedding_handle = x_override_embedding_model or override_embedding_handle
|
357
|
+
|
336
358
|
# Check if the JSON is AgentFileSchema or AgentSchema
|
337
359
|
# TODO: This is kind of hacky, but should work as long as dont' change the schema
|
338
360
|
if "agents" in agent_json and isinstance(agent_json.get("agents"), list):
|
@@ -346,6 +368,7 @@ async def import_agent_serialized(
|
|
346
368
|
project_id=project_id,
|
347
369
|
strip_messages=strip_messages,
|
348
370
|
env_vars=env_vars,
|
371
|
+
override_embedding_handle=final_override_embedding_handle,
|
349
372
|
)
|
350
373
|
else:
|
351
374
|
# This is a legacy AgentSchema
|
@@ -464,6 +487,25 @@ async def detach_tool(
|
|
464
487
|
return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
465
488
|
|
466
489
|
|
490
|
+
@router.patch("/{agent_id}/tools/approval/{tool_name}", response_model=AgentState, operation_id="modify_approval")
|
491
|
+
async def modify_approval(
|
492
|
+
agent_id: str,
|
493
|
+
tool_name: str,
|
494
|
+
requires_approval: bool,
|
495
|
+
server: "SyncServer" = Depends(get_letta_server),
|
496
|
+
actor_id: str | None = Header(None, alias="user_id"),
|
497
|
+
):
|
498
|
+
"""
|
499
|
+
Attach a tool to an agent.
|
500
|
+
"""
|
501
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
502
|
+
await server.agent_manager.modify_approvals_async(
|
503
|
+
agent_id=agent_id, tool_name=tool_name, requires_approval=requires_approval, actor=actor
|
504
|
+
)
|
505
|
+
# TODO: Unfortunately we need this to preserve our current API behavior
|
506
|
+
return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
507
|
+
|
508
|
+
|
467
509
|
@router.patch("/{agent_id}/sources/attach/{source_id}", response_model=AgentState, operation_id="attach_source_to_agent")
|
468
510
|
async def attach_source(
|
469
511
|
agent_id: str,
|
@@ -937,22 +979,62 @@ async def create_passage(
|
|
937
979
|
"""
|
938
980
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
939
981
|
|
940
|
-
return await server.insert_archival_memory_async(
|
982
|
+
return await server.insert_archival_memory_async(
|
983
|
+
agent_id=agent_id, memory_contents=request.text, actor=actor, tags=request.tags, created_at=request.created_at
|
984
|
+
)
|
941
985
|
|
942
986
|
|
943
|
-
@router.
|
944
|
-
def
|
987
|
+
@router.get("/{agent_id}/archival-memory/search", response_model=ArchivalMemorySearchResponse, operation_id="search_archival_memory")
|
988
|
+
async def search_archival_memory(
|
945
989
|
agent_id: str,
|
946
|
-
|
947
|
-
|
990
|
+
query: str = Query(..., description="String to search for using semantic similarity"),
|
991
|
+
tags: Optional[List[str]] = Query(None, description="Optional list of tags to filter search results"),
|
992
|
+
tag_match_mode: Literal["any", "all"] = Query(
|
993
|
+
"any", description="How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags"
|
994
|
+
),
|
995
|
+
top_k: Optional[int] = Query(None, description="Maximum number of results to return. Uses system default if not specified"),
|
996
|
+
start_datetime: Optional[datetime] = Query(None, description="Filter results to passages created after this datetime"),
|
997
|
+
end_datetime: Optional[datetime] = Query(None, description="Filter results to passages created before this datetime"),
|
948
998
|
server: "SyncServer" = Depends(get_letta_server),
|
949
|
-
actor_id: str | None = Header(None, alias="user_id"),
|
999
|
+
actor_id: str | None = Header(None, alias="user_id"),
|
950
1000
|
):
|
951
1001
|
"""
|
952
|
-
|
1002
|
+
Search archival memory using semantic (embedding-based) search with optional temporal filtering.
|
1003
|
+
|
1004
|
+
This endpoint allows manual triggering of archival memory searches, enabling users to query
|
1005
|
+
an agent's archival memory store directly via the API. The search uses the same functionality
|
1006
|
+
as the agent's archival_memory_search tool but is accessible for external API usage.
|
953
1007
|
"""
|
954
|
-
actor = server.user_manager.
|
955
|
-
|
1008
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
1009
|
+
|
1010
|
+
try:
|
1011
|
+
# convert datetime to string in ISO 8601 format
|
1012
|
+
start_datetime = start_datetime.isoformat() if start_datetime else None
|
1013
|
+
end_datetime = end_datetime.isoformat() if end_datetime else None
|
1014
|
+
|
1015
|
+
# Use the shared agent manager method
|
1016
|
+
formatted_results, count = await server.agent_manager.search_agent_archival_memory_async(
|
1017
|
+
agent_id=agent_id,
|
1018
|
+
actor=actor,
|
1019
|
+
query=query,
|
1020
|
+
tags=tags,
|
1021
|
+
tag_match_mode=tag_match_mode,
|
1022
|
+
top_k=top_k,
|
1023
|
+
start_datetime=start_datetime,
|
1024
|
+
end_datetime=end_datetime,
|
1025
|
+
)
|
1026
|
+
|
1027
|
+
# Convert to proper response schema
|
1028
|
+
search_results = [ArchivalMemorySearchResult(**result) for result in formatted_results]
|
1029
|
+
|
1030
|
+
return ArchivalMemorySearchResponse(results=search_results, count=count)
|
1031
|
+
|
1032
|
+
except NoResultFound as e:
|
1033
|
+
raise HTTPException(status_code=404, detail=f"Agent with id={agent_id} not found for user_id={actor.id}.")
|
1034
|
+
except ValueError as e:
|
1035
|
+
raise HTTPException(status_code=400, detail=str(e))
|
1036
|
+
except Exception as e:
|
1037
|
+
raise HTTPException(status_code=500, detail=f"Internal server error during archival memory search: {str(e)}")
|
956
1038
|
|
957
1039
|
|
958
1040
|
# TODO(ethan): query or path parameter for memory_id?
|
@@ -1049,6 +1131,8 @@ async def send_message(
|
|
1049
1131
|
Process a user message and return the agent's response.
|
1050
1132
|
This endpoint accepts a message from a user and processes it through the agent.
|
1051
1133
|
"""
|
1134
|
+
if len(request.messages) == 0:
|
1135
|
+
raise ValueError("Messages must not be empty")
|
1052
1136
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
1053
1137
|
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
1054
1138
|
|
@@ -1067,6 +1151,7 @@ async def send_message(
|
|
1067
1151
|
"azure",
|
1068
1152
|
"xai",
|
1069
1153
|
"groq",
|
1154
|
+
"deepseek",
|
1070
1155
|
]
|
1071
1156
|
|
1072
1157
|
# Create a new run for execution tracking
|
@@ -1197,6 +1282,9 @@ async def send_message_streaming(
|
|
1197
1282
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
1198
1283
|
MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
|
1199
1284
|
|
1285
|
+
# TODO (cliandy): clean this up
|
1286
|
+
redis_client = await get_redis_client()
|
1287
|
+
|
1200
1288
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
1201
1289
|
# TODO: This is redundant, remove soon
|
1202
1290
|
agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
|
@@ -1212,8 +1300,9 @@ async def send_message_streaming(
|
|
1212
1300
|
"azure",
|
1213
1301
|
"xai",
|
1214
1302
|
"groq",
|
1303
|
+
"deepseek",
|
1215
1304
|
]
|
1216
|
-
model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"]
|
1305
|
+
model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
|
1217
1306
|
|
1218
1307
|
# Create a new job for execution tracking
|
1219
1308
|
if settings.track_agent_run:
|
@@ -1236,14 +1325,11 @@ async def send_message_streaming(
|
|
1236
1325
|
),
|
1237
1326
|
actor=actor,
|
1238
1327
|
)
|
1328
|
+
job_update_metadata = None
|
1329
|
+
await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
|
1239
1330
|
else:
|
1240
1331
|
run = None
|
1241
1332
|
|
1242
|
-
job_update_metadata = None
|
1243
|
-
# TODO (cliandy): clean this up
|
1244
|
-
redis_client = await get_redis_client()
|
1245
|
-
await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
|
1246
|
-
|
1247
1333
|
try:
|
1248
1334
|
if agent_eligible and model_compatible:
|
1249
1335
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
@@ -1281,6 +1367,23 @@ async def send_message_streaming(
|
|
1281
1367
|
),
|
1282
1368
|
)
|
1283
1369
|
|
1370
|
+
if request.stream_tokens and model_compatible_token_streaming:
|
1371
|
+
raw_stream = agent_loop.step_stream(
|
1372
|
+
input_messages=request.messages,
|
1373
|
+
max_steps=request.max_steps,
|
1374
|
+
use_assistant_message=request.use_assistant_message,
|
1375
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
1376
|
+
include_return_message_types=request.include_return_message_types,
|
1377
|
+
)
|
1378
|
+
else:
|
1379
|
+
raw_stream = agent_loop.step_stream_no_tokens(
|
1380
|
+
request.messages,
|
1381
|
+
max_steps=request.max_steps,
|
1382
|
+
use_assistant_message=request.use_assistant_message,
|
1383
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
1384
|
+
include_return_message_types=request.include_return_message_types,
|
1385
|
+
)
|
1386
|
+
|
1284
1387
|
from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
|
1285
1388
|
|
1286
1389
|
if request.background and settings.track_agent_run:
|
@@ -1294,23 +1397,6 @@ async def send_message_streaming(
|
|
1294
1397
|
),
|
1295
1398
|
)
|
1296
1399
|
|
1297
|
-
if request.stream_tokens and model_compatible_token_streaming:
|
1298
|
-
raw_stream = agent_loop.step_stream(
|
1299
|
-
input_messages=request.messages,
|
1300
|
-
max_steps=request.max_steps,
|
1301
|
-
use_assistant_message=request.use_assistant_message,
|
1302
|
-
request_start_timestamp_ns=request_start_timestamp_ns,
|
1303
|
-
include_return_message_types=request.include_return_message_types,
|
1304
|
-
)
|
1305
|
-
else:
|
1306
|
-
raw_stream = agent_loop.step_stream_no_tokens(
|
1307
|
-
request.messages,
|
1308
|
-
max_steps=request.max_steps,
|
1309
|
-
use_assistant_message=request.use_assistant_message,
|
1310
|
-
request_start_timestamp_ns=request_start_timestamp_ns,
|
1311
|
-
include_return_message_types=request.include_return_message_types,
|
1312
|
-
)
|
1313
|
-
|
1314
1400
|
asyncio.create_task(
|
1315
1401
|
create_background_stream_processor(
|
1316
1402
|
stream_generator=raw_stream,
|
@@ -1319,55 +1405,21 @@ async def send_message_streaming(
|
|
1319
1405
|
)
|
1320
1406
|
)
|
1321
1407
|
|
1322
|
-
|
1408
|
+
raw_stream = redis_sse_stream_generator(
|
1323
1409
|
redis_client=redis_client,
|
1324
1410
|
run_id=run.id,
|
1325
1411
|
)
|
1326
1412
|
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
return StreamingResponseWithStatusCode(
|
1331
|
-
stream,
|
1332
|
-
media_type="text/event-stream",
|
1333
|
-
)
|
1334
|
-
|
1335
|
-
if request.stream_tokens and model_compatible_token_streaming:
|
1336
|
-
raw_stream = agent_loop.step_stream(
|
1337
|
-
input_messages=request.messages,
|
1338
|
-
max_steps=request.max_steps,
|
1339
|
-
use_assistant_message=request.use_assistant_message,
|
1340
|
-
request_start_timestamp_ns=request_start_timestamp_ns,
|
1341
|
-
include_return_message_types=request.include_return_message_types,
|
1342
|
-
)
|
1343
|
-
# Conditionally wrap with keepalive based on request parameter
|
1344
|
-
if request.include_pings and settings.enable_keepalive:
|
1345
|
-
stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
|
1346
|
-
else:
|
1347
|
-
stream = raw_stream
|
1348
|
-
|
1349
|
-
result = StreamingResponseWithStatusCode(
|
1350
|
-
stream,
|
1351
|
-
media_type="text/event-stream",
|
1352
|
-
)
|
1413
|
+
# Conditionally wrap with keepalive based on request parameter
|
1414
|
+
if request.include_pings and settings.enable_keepalive:
|
1415
|
+
stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
|
1353
1416
|
else:
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
|
1358
|
-
|
1359
|
-
|
1360
|
-
)
|
1361
|
-
# Conditionally wrap with keepalive based on request parameter
|
1362
|
-
if request.include_pings and settings.enable_keepalive:
|
1363
|
-
stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
|
1364
|
-
else:
|
1365
|
-
stream = raw_stream
|
1366
|
-
|
1367
|
-
result = StreamingResponseWithStatusCode(
|
1368
|
-
stream,
|
1369
|
-
media_type="text/event-stream",
|
1370
|
-
)
|
1417
|
+
stream = raw_stream
|
1418
|
+
|
1419
|
+
result = StreamingResponseWithStatusCode(
|
1420
|
+
stream,
|
1421
|
+
media_type="text/event-stream",
|
1422
|
+
)
|
1371
1423
|
else:
|
1372
1424
|
result = await server.send_message_to_agent(
|
1373
1425
|
agent_id=agent_id,
|
@@ -1382,11 +1434,13 @@ async def send_message_streaming(
|
|
1382
1434
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
1383
1435
|
include_return_message_types=request.include_return_message_types,
|
1384
1436
|
)
|
1385
|
-
|
1437
|
+
if settings.track_agent_run:
|
1438
|
+
job_status = JobStatus.running
|
1386
1439
|
return result
|
1387
1440
|
except Exception as e:
|
1388
|
-
|
1389
|
-
|
1441
|
+
if settings.track_agent_run:
|
1442
|
+
job_update_metadata = {"error": str(e)}
|
1443
|
+
job_status = JobStatus.failed
|
1390
1444
|
raise
|
1391
1445
|
finally:
|
1392
1446
|
if settings.track_agent_run:
|
@@ -1469,7 +1523,10 @@ async def _process_message_background(
|
|
1469
1523
|
"google_vertex",
|
1470
1524
|
"bedrock",
|
1471
1525
|
"ollama",
|
1526
|
+
"azure",
|
1527
|
+
"xai",
|
1472
1528
|
"groq",
|
1529
|
+
"deepseek",
|
1473
1530
|
]
|
1474
1531
|
if agent_eligible and model_compatible:
|
1475
1532
|
if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
|
@@ -1660,6 +1717,7 @@ async def preview_raw_payload(
|
|
1660
1717
|
"azure",
|
1661
1718
|
"xai",
|
1662
1719
|
"groq",
|
1720
|
+
"deepseek",
|
1663
1721
|
]
|
1664
1722
|
|
1665
1723
|
if agent_eligible and model_compatible:
|
@@ -1731,6 +1789,7 @@ async def summarize_agent_conversation(
|
|
1731
1789
|
"azure",
|
1732
1790
|
"xai",
|
1733
1791
|
"groq",
|
1792
|
+
"deepseek",
|
1734
1793
|
]
|
1735
1794
|
|
1736
1795
|
if agent_eligible and model_compatible:
|
@@ -34,7 +34,7 @@ async def list_blocks(
|
|
34
34
|
),
|
35
35
|
label_search: Optional[str] = Query(
|
36
36
|
None,
|
37
|
-
description=("Search blocks by label. If provided, returns blocks that match this label.
|
37
|
+
description=("Search blocks by label. If provided, returns blocks that match this label. This is a full-text search on labels."),
|
38
38
|
),
|
39
39
|
description_search: Optional[str] = Query(
|
40
40
|
None,
|