PyPI - letta-nightly - Versions diffs - 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl - Mend

letta-nightly 0.11.6.dev20250903104037py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

letta/__init__.py +1 -1
letta/agent.py +10 -14
letta/agents/base_agent.py +18 -0
letta/agents/helpers.py +32 -7
letta/agents/letta_agent.py +953 -762
letta/agents/voice_agent.py +1 -1
letta/client/streaming.py +0 -1
letta/constants.py +11 -8
letta/errors.py +9 -0
letta/functions/function_sets/base.py +77 -69
letta/functions/function_sets/builtin.py +41 -22
letta/functions/function_sets/multi_agent.py +1 -2
letta/functions/schema_generator.py +0 -1
letta/helpers/converters.py +8 -3
letta/helpers/datetime_helpers.py +5 -4
letta/helpers/message_helper.py +1 -2
letta/helpers/pinecone_utils.py +0 -1
letta/helpers/tool_rule_solver.py +10 -0
letta/helpers/tpuf_client.py +848 -0
letta/interface.py +8 -8
letta/interfaces/anthropic_streaming_interface.py +7 -0
letta/interfaces/openai_streaming_interface.py +29 -6
letta/llm_api/anthropic_client.py +188 -18
letta/llm_api/azure_client.py +0 -1
letta/llm_api/bedrock_client.py +1 -2
letta/llm_api/deepseek_client.py +319 -5
letta/llm_api/google_vertex_client.py +75 -17
letta/llm_api/groq_client.py +0 -1
letta/llm_api/helpers.py +2 -2
letta/llm_api/llm_api_tools.py +1 -50
letta/llm_api/llm_client.py +6 -8
letta/llm_api/mistral.py +1 -1
letta/llm_api/openai.py +16 -13
letta/llm_api/openai_client.py +31 -16
letta/llm_api/together_client.py +0 -1
letta/llm_api/xai_client.py +0 -1
letta/local_llm/chat_completion_proxy.py +7 -6
letta/local_llm/settings/settings.py +1 -1
letta/orm/__init__.py +1 -0
letta/orm/agent.py +8 -6
letta/orm/archive.py +9 -1
letta/orm/block.py +3 -4
letta/orm/block_history.py +3 -1
letta/orm/group.py +2 -3
letta/orm/identity.py +1 -2
letta/orm/job.py +1 -2
letta/orm/llm_batch_items.py +1 -2
letta/orm/message.py +8 -4
letta/orm/mixins.py +18 -0
letta/orm/organization.py +2 -0
letta/orm/passage.py +8 -1
letta/orm/passage_tag.py +55 -0
letta/orm/sandbox_config.py +1 -3
letta/orm/step.py +1 -2
letta/orm/tool.py +1 -0
letta/otel/resource.py +2 -2
letta/plugins/plugins.py +1 -1
letta/prompts/prompt_generator.py +10 -2
letta/schemas/agent.py +11 -0
letta/schemas/archive.py +4 -0
letta/schemas/block.py +13 -0
letta/schemas/embedding_config.py +0 -1
letta/schemas/enums.py +24 -7
letta/schemas/group.py +12 -0
letta/schemas/letta_message.py +55 -1
letta/schemas/letta_message_content.py +28 -0
letta/schemas/letta_request.py +21 -4
letta/schemas/letta_stop_reason.py +9 -1
letta/schemas/llm_config.py +24 -8
letta/schemas/mcp.py +0 -3
letta/schemas/memory.py +14 -0
letta/schemas/message.py +245 -141
letta/schemas/openai/chat_completion_request.py +2 -1
letta/schemas/passage.py +1 -0
letta/schemas/providers/bedrock.py +1 -1
letta/schemas/providers/openai.py +2 -2
letta/schemas/tool.py +11 -5
letta/schemas/tool_execution_result.py +0 -1
letta/schemas/tool_rule.py +71 -0
letta/serialize_schemas/marshmallow_agent.py +1 -2
letta/server/rest_api/app.py +3 -3
letta/server/rest_api/auth/index.py +0 -1
letta/server/rest_api/interface.py +3 -11
letta/server/rest_api/redis_stream_manager.py +3 -4
letta/server/rest_api/routers/v1/agents.py +143 -84
letta/server/rest_api/routers/v1/blocks.py +1 -1
letta/server/rest_api/routers/v1/folders.py +1 -1
letta/server/rest_api/routers/v1/groups.py +23 -22
letta/server/rest_api/routers/v1/internal_templates.py +68 -0
letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
letta/server/rest_api/routers/v1/sources.py +1 -1
letta/server/rest_api/routers/v1/tools.py +167 -15
letta/server/rest_api/streaming_response.py +4 -3
letta/server/rest_api/utils.py +75 -18
letta/server/server.py +24 -35
letta/services/agent_manager.py +359 -45
letta/services/agent_serialization_manager.py +23 -3
letta/services/archive_manager.py +72 -3
letta/services/block_manager.py +1 -2
letta/services/context_window_calculator/token_counter.py +11 -6
letta/services/file_manager.py +1 -3
letta/services/files_agents_manager.py +2 -4
letta/services/group_manager.py +73 -12
letta/services/helpers/agent_manager_helper.py +5 -5
letta/services/identity_manager.py +8 -3
letta/services/job_manager.py +2 -14
letta/services/llm_batch_manager.py +1 -3
letta/services/mcp/base_client.py +1 -2
letta/services/mcp_manager.py +5 -6
letta/services/message_manager.py +536 -15
letta/services/organization_manager.py +1 -2
letta/services/passage_manager.py +287 -12
letta/services/provider_manager.py +1 -3
letta/services/sandbox_config_manager.py +12 -7
letta/services/source_manager.py +1 -2
letta/services/step_manager.py +0 -1
letta/services/summarizer/summarizer.py +4 -2
letta/services/telemetry_manager.py +1 -3
letta/services/tool_executor/builtin_tool_executor.py +136 -316
letta/services/tool_executor/core_tool_executor.py +231 -74
letta/services/tool_executor/files_tool_executor.py +2 -2
letta/services/tool_executor/mcp_tool_executor.py +0 -1
letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
letta/services/tool_executor/sandbox_tool_executor.py +0 -1
letta/services/tool_executor/tool_execution_sandbox.py +2 -3
letta/services/tool_manager.py +181 -64
letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
letta/services/user_manager.py +1 -2
letta/settings.py +5 -3
letta/streaming_interface.py +3 -3
letta/system.py +1 -1
letta/utils.py +0 -1
{letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
{letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
letta/llm_api/deepseek.py +0 -303
{letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
{letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0

letta/schemas/tool_rule.py CHANGED Viewed

@@ -20,6 +20,16 @@ class BaseToolRule(LettaBase):
         description="Optional Jinja2 template for generating agent prompt about this tool rule. Template can use variables like 'tool_name' and rule-specific attributes.",
     )
+    def __hash__(self):
+        """Base hash using tool_name and type."""
+        return hash((self.tool_name, self.type))
+    def __eq__(self, other):
+        """Base equality using tool_name and type."""
+        if not isinstance(other, BaseToolRule):
+            return False
+        return self.tool_name == other.tool_name and self.type == other.type
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> set[str]:
         raise NotImplementedError
@@ -54,6 +64,16 @@ class ChildToolRule(BaseToolRule):
         description="Optional Jinja2 template for generating agent prompt about this tool rule.",
     )
+    def __hash__(self):
+        """Hash including children list (sorted for consistency)."""
+        return hash((self.tool_name, self.type, tuple(sorted(self.children))))
+    def __eq__(self, other):
+        """Equality including children list."""
+        if not isinstance(other, ChildToolRule):
+            return False
+        return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
         last_tool = tool_call_history[-1] if tool_call_history else None
         return set(self.children) if last_tool == self.tool_name else available_tools
@@ -71,6 +91,16 @@ class ParentToolRule(BaseToolRule):
         description="Optional Jinja2 template for generating agent prompt about this tool rule.",
     )
+    def __hash__(self):
+        """Hash including children list (sorted for consistency)."""
+        return hash((self.tool_name, self.type, tuple(sorted(self.children))))
+    def __eq__(self, other):
+        """Equality including children list."""
+        if not isinstance(other, ParentToolRule):
+            return False
+        return self.tool_name == other.tool_name and self.type == other.type and sorted(self.children) == sorted(other.children)
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
         last_tool = tool_call_history[-1] if tool_call_history else None
         return set(self.children) if last_tool == self.tool_name else available_tools - set(self.children)
@@ -90,6 +120,24 @@ class ConditionalToolRule(BaseToolRule):
         description="Optional Jinja2 template for generating agent prompt about this tool rule.",
     )
+    def __hash__(self):
+        """Hash including all configuration fields."""
+        # convert dict to sorted tuple of items for consistent hashing
+        mapping_items = tuple(sorted(self.child_output_mapping.items()))
+        return hash((self.tool_name, self.type, self.default_child, mapping_items, self.require_output_mapping))
+    def __eq__(self, other):
+        """Equality including all configuration fields."""
+        if not isinstance(other, ConditionalToolRule):
+            return False
+        return (
+            self.tool_name == other.tool_name
+            and self.type == other.type
+            and self.default_child == other.default_child
+            and self.child_output_mapping == other.child_output_mapping
+            and self.require_output_mapping == other.require_output_mapping
+        )
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
         """Determine valid tools based on function output mapping."""
         if not tool_call_history or tool_call_history[-1] != self.tool_name:
@@ -203,6 +251,16 @@ class MaxCountPerStepToolRule(BaseToolRule):
         description="Optional Jinja2 template for generating agent prompt about this tool rule.",
     )
+    def __hash__(self):
+        """Hash including max_count_limit."""
+        return hash((self.tool_name, self.type, self.max_count_limit))
+    def __eq__(self, other):
+        """Equality including max_count_limit."""
+        if not isinstance(other, MaxCountPerStepToolRule):
+            return False
+        return self.tool_name == other.tool_name and self.type == other.type and self.max_count_limit == other.max_count_limit
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
         """Restricts the tool if it has been called max_count_limit times in the current step."""
         count = tool_call_history.count(self.tool_name)
@@ -214,6 +272,18 @@ class MaxCountPerStepToolRule(BaseToolRule):
         return available_tools
+class RequiresApprovalToolRule(BaseToolRule):
+    """
+    Represents a tool rule configuration which requires approval before the tool can be invoked.
+    """
+    type: Literal[ToolRuleType.requires_approval] = ToolRuleType.requires_approval
+    def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> Set[str]:
+        """Does not enforce any restrictions on which tools are valid"""
+        return available_tools
 ToolRule = Annotated[
     Union[
         ChildToolRule,
@@ -224,6 +294,7 @@ ToolRule = Annotated[
         RequiredBeforeExitToolRule,
         MaxCountPerStepToolRule,
         ParentToolRule,
+        RequiresApprovalToolRule,
     ],
     Field(discriminator="type"),
 ]

letta/serialize_schemas/marshmallow_agent.py CHANGED Viewed

@@ -5,8 +5,7 @@ from sqlalchemy import func
 from sqlalchemy.orm import sessionmaker
 import letta
-from letta.orm import Agent
-from letta.orm import Message as MessageModel
+from letta.orm import Agent, Message as MessageModel
 from letta.schemas.agent import AgentState as PydanticAgentState
 from letta.schemas.user import User
 from letta.serialize_schemas.marshmallow_agent_environment_variable import SerializedAgentEnvironmentVariableSchema

letta/server/rest_api/app.py CHANGED Viewed

@@ -261,7 +261,7 @@ def create_application() -> "FastAPI":
     @app.exception_handler(BedrockPermissionError)
     async def bedrock_permission_error_handler(request, exc: BedrockPermissionError):
-        logger.error(f"Bedrock permission denied.")
+        logger.error("Bedrock permission denied.")
         if SENTRY_ENABLED:
             sentry_sdk.capture_exception(exc)
@@ -433,10 +433,10 @@ def start_server(
         if IS_WINDOWS:
             # Windows doesn't those the fancy unicode characters
             print(f"Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
-            print(f"View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
+            print("View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
         else:
             print(f"▶ Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
-            print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
+            print("▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
         if importlib.util.find_spec("granian") is not None and settings.use_granian:
             # Experimental Granian engine

letta/server/rest_api/auth/index.py CHANGED Viewed

@@ -22,7 +22,6 @@ class AuthRequest(BaseModel):
 def setup_auth_router(server: SyncServer, interface: QueuingInterface, password: str) -> APIRouter:
     @router.post("/auth", tags=["auth"], response_model=AuthResponse)
     def authenticate_user(request: AuthRequest) -> AuthResponse:
         """

letta/server/rest_api/interface.py CHANGED Viewed

@@ -377,9 +377,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     ):
         """Add an item to the deque"""
         assert self._active, "Generator is inactive"
-        assert (
-            isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus)
-        ), f"Wrong type: {type(item)}"
+        assert isinstance(item, LettaMessage) or isinstance(item, LegacyLettaMessage) or isinstance(item, MessageStreamStatus), (
+            f"Wrong type: {type(item)}"
+        )
         self._chunks.append(item)
         self._event.set()  # Signal that new data is available
@@ -731,13 +731,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     # If we have main_json, we should output a ToolCallMessage
                     elif updates_main_json:
                         # If there's something in the function_name buffer, we should release it first
                         # NOTE: we could output it as part of a chunk that has both name and args,
                         #       however the frontend may expect name first, then args, so to be
                         #       safe we'll output name first in a separate chunk
                         if self.function_name_buffer:
                             # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
                             if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
                                 processed_chunk = None
@@ -778,7 +776,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         # If there was nothing in the name buffer, we can proceed to
                         # output the arguments chunk as a ToolCallMessage
                         else:
                             # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
                             if self.use_assistant_message and (
                                 self.last_flushed_function_name is not None
@@ -860,7 +857,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                     # clear buffers
                                     self.function_id_buffer = None
                             else:
                                 # There may be a buffer from a previous chunk, for example
                                 # if the previous chunk had arguments but we needed to flush name
                                 if self.function_args_buffer:
@@ -997,7 +993,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             # Otherwise, do simple chunks of ToolCallMessage
             else:
                 tool_call_delta = {}
                 if tool_call.id:
                     tool_call_delta["id"] = tool_call.id
@@ -1073,7 +1068,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             tool_call = message_delta.tool_calls[0]
             if tool_call.function:
                 # Track the function name while streaming
                 # If we were previously on a 'send_message', we need to 'toggle' into 'content' mode
                 if tool_call.function.name:
@@ -1154,7 +1148,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         """Letta generates some internal monologue"""
         if not self.streaming_mode:
             # create a fake "chunk" of a stream
             # processed_chunk = {
             #     "internal_monologue": msg,
@@ -1268,7 +1261,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             print(f"Failed to parse function message: {e}")
                 else:
                     try:
                         func_args = parse_json(function_call.function.arguments)
                     except:

letta/server/rest_api/redis_stream_manager.py CHANGED Viewed

@@ -140,9 +140,7 @@ class RedisSSEStreamWriter:
             self.last_flush[run_id] = time.time()
-            logger.debug(
-                f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, " f"seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}"
-            )
+            logger.debug(f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}")
             if chunks[-1].get("complete") == "true":
                 self._cleanup_run(run_id)
@@ -227,7 +225,8 @@ async def create_background_stream_processor(
     except Exception as e:
         logger.error(f"Error processing stream for run {run_id}: {e}")
         # Write error chunk
-        error_chunk = {"error": {"message": str(e)}}
+        # error_chunk = {"error": {"message": str(e)}}
+        error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
         await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
     finally:
         if should_stop_writer:

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 import json
 import traceback
 from datetime import datetime, timezone
-from typing import Annotated, Any, Dict, List, Optional, Union
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
 from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Request, UploadFile, status
 from fastapi.responses import JSONResponse
@@ -32,9 +32,15 @@ from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
 from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion, MessageType
 from letta.schemas.letta_request import LettaAsyncRequest, LettaRequest, LettaStreamingRequest
 from letta.schemas.letta_response import LettaResponse
-from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
+from letta.schemas.memory import (
+    ArchivalMemorySearchResponse,
+    ArchivalMemorySearchResult,
+    ContextWindowOverview,
+    CreateArchivalMemory,
+    Memory,
+)
 from letta.schemas.message import MessageCreate
-from letta.schemas.passage import Passage, PassageUpdate
+from letta.schemas.passage import Passage
 from letta.schemas.run import Run
 from letta.schemas.source import Source
 from letta.schemas.tool import Tool
@@ -155,8 +161,8 @@ async def export_agent_serialized(
     server: "SyncServer" = Depends(get_letta_server),
     actor_id: str | None = Header(None, alias="user_id"),
     use_legacy_format: bool = Query(
-        True,
-        description="If true, exports using the legacy single-agent format. If false, exports using the new multi-entity format.",
+        False,
+        description="If true, exports using the legacy single-agent format (v1). If false, exports using the new multi-entity format (v2).",
     ),
     # do not remove, used to autogeneration of spec
     # TODO: Think of a better way to export AgentFileSchema
@@ -252,6 +258,7 @@ async def import_agent(
     project_id: str | None = None,
     strip_messages: bool = False,
     env_vars: Optional[dict[str, Any]] = None,
+    override_embedding_handle: Optional[str] = None,
 ) -> List[str]:
     """
     Import an agent using the new AgentFileSchema format.
@@ -262,12 +269,19 @@ async def import_agent(
         raise HTTPException(status_code=422, detail=f"Invalid agent file schema: {e!s}")
     try:
+        if override_embedding_handle:
+            embedding_config_override = await server.get_cached_embedding_config_async(actor=actor, handle=override_embedding_handle)
+        else:
+            embedding_config_override = None
         import_result = await server.agent_serialization_manager.import_file(
             schema=agent_schema,
             actor=actor,
             append_copy_suffix=append_copy_suffix,
             override_existing_tools=override_existing_tools,
             env_vars=env_vars,
+            override_embedding_config=embedding_config_override,
+            project_id=project_id,
         )
         if not import_result.success:
@@ -296,11 +310,16 @@ async def import_agent_serialized(
     file: UploadFile = File(...),
     server: "SyncServer" = Depends(get_letta_server),
     actor_id: str | None = Header(None, alias="user_id"),
+    x_override_embedding_model: str | None = Header(None, alias="x-override-embedding-model"),
     append_copy_suffix: bool = Form(True, description='If set to True, appends "_copy" to the end of the agent name.'),
     override_existing_tools: bool = Form(
         True,
         description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
     ),
+    override_embedding_handle: Optional[str] = Form(
+        None,
+        description="Override import with specific embedding handle.",
+    ),
     project_id: str | None = Form(None, description="The project ID to associate the uploaded agent with."),
     strip_messages: bool = Form(
         False,
@@ -333,6 +352,9 @@ async def import_agent_serialized(
         if not isinstance(env_vars, dict):
             raise HTTPException(status_code=400, detail="env_vars_json must be a valid JSON string")
+    # Prioritize header over form data for override_embedding_handle
+    final_override_embedding_handle = x_override_embedding_model or override_embedding_handle
     # Check if the JSON is AgentFileSchema or AgentSchema
     # TODO: This is kind of hacky, but should work as long as dont' change the schema
     if "agents" in agent_json and isinstance(agent_json.get("agents"), list):
@@ -346,6 +368,7 @@ async def import_agent_serialized(
             project_id=project_id,
             strip_messages=strip_messages,
             env_vars=env_vars,
+            override_embedding_handle=final_override_embedding_handle,
         )
     else:
         # This is a legacy AgentSchema
@@ -464,6 +487,25 @@ async def detach_tool(
     return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
+@router.patch("/{agent_id}/tools/approval/{tool_name}", response_model=AgentState, operation_id="modify_approval")
+async def modify_approval(
+    agent_id: str,
+    tool_name: str,
+    requires_approval: bool,
+    server: "SyncServer" = Depends(get_letta_server),
+    actor_id: str | None = Header(None, alias="user_id"),
+):
+    """
+    Attach a tool to an agent.
+    """
+    actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
+    await server.agent_manager.modify_approvals_async(
+        agent_id=agent_id, tool_name=tool_name, requires_approval=requires_approval, actor=actor
+    )
+    # TODO: Unfortunately we need this to preserve our current API behavior
+    return await server.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
 @router.patch("/{agent_id}/sources/attach/{source_id}", response_model=AgentState, operation_id="attach_source_to_agent")
 async def attach_source(
     agent_id: str,
@@ -937,22 +979,62 @@ async def create_passage(
     """
     actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
-    return await server.insert_archival_memory_async(agent_id=agent_id, memory_contents=request.text, actor=actor)
+    return await server.insert_archival_memory_async(
+        agent_id=agent_id, memory_contents=request.text, actor=actor, tags=request.tags, created_at=request.created_at
+    )
-@router.patch("/{agent_id}/archival-memory/{memory_id}", response_model=list[Passage], operation_id="modify_passage")
-def modify_passage(
+@router.get("/{agent_id}/archival-memory/search", response_model=ArchivalMemorySearchResponse, operation_id="search_archival_memory")
+async def search_archival_memory(
     agent_id: str,
-    memory_id: str,
-    passage: PassageUpdate = Body(...),
+    query: str = Query(..., description="String to search for using semantic similarity"),
+    tags: Optional[List[str]] = Query(None, description="Optional list of tags to filter search results"),
+    tag_match_mode: Literal["any", "all"] = Query(
+        "any", description="How to match tags - 'any' to match passages with any of the tags, 'all' to match only passages with all tags"
+    ),
+    top_k: Optional[int] = Query(None, description="Maximum number of results to return. Uses system default if not specified"),
+    start_datetime: Optional[datetime] = Query(None, description="Filter results to passages created after this datetime"),
+    end_datetime: Optional[datetime] = Query(None, description="Filter results to passages created before this datetime"),
     server: "SyncServer" = Depends(get_letta_server),
-    actor_id: str | None = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
+    actor_id: str | None = Header(None, alias="user_id"),
 ):
     """
-    Modify a memory in the agent's archival memory store.
+    Search archival memory using semantic (embedding-based) search with optional temporal filtering.
+    This endpoint allows manual triggering of archival memory searches, enabling users to query
+    an agent's archival memory store directly via the API. The search uses the same functionality
+    as the agent's archival_memory_search tool but is accessible for external API usage.
     """
-    actor = server.user_manager.get_user_or_default(user_id=actor_id)
-    return server.modify_archival_memory(agent_id=agent_id, memory_id=memory_id, passage=passage, actor=actor)
+    actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
+    try:
+        # convert datetime to string in ISO 8601 format
+        start_datetime = start_datetime.isoformat() if start_datetime else None
+        end_datetime = end_datetime.isoformat() if end_datetime else None
+        # Use the shared agent manager method
+        formatted_results, count = await server.agent_manager.search_agent_archival_memory_async(
+            agent_id=agent_id,
+            actor=actor,
+            query=query,
+            tags=tags,
+            tag_match_mode=tag_match_mode,
+            top_k=top_k,
+            start_datetime=start_datetime,
+            end_datetime=end_datetime,
+        )
+        # Convert to proper response schema
+        search_results = [ArchivalMemorySearchResult(**result) for result in formatted_results]
+        return ArchivalMemorySearchResponse(results=search_results, count=count)
+    except NoResultFound as e:
+        raise HTTPException(status_code=404, detail=f"Agent with id={agent_id} not found for user_id={actor.id}.")
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal server error during archival memory search: {str(e)}")
 # TODO(ethan): query or path parameter for memory_id?
@@ -1049,6 +1131,8 @@ async def send_message(
     Process a user message and return the agent's response.
     This endpoint accepts a message from a user and processes it through the agent.
     """
+    if len(request.messages) == 0:
+        raise ValueError("Messages must not be empty")
     request_start_timestamp_ns = get_utc_timestamp_ns()
     MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
@@ -1067,6 +1151,7 @@ async def send_message(
         "azure",
         "xai",
         "groq",
+        "deepseek",
     ]
     # Create a new run for execution tracking
@@ -1197,6 +1282,9 @@ async def send_message_streaming(
     request_start_timestamp_ns = get_utc_timestamp_ns()
     MetricRegistry().user_message_counter.add(1, get_ctx_attributes())
+    # TODO (cliandy): clean this up
+    redis_client = await get_redis_client()
     actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
     # TODO: This is redundant, remove soon
     agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
@@ -1212,8 +1300,9 @@ async def send_message_streaming(
         "azure",
         "xai",
         "groq",
+        "deepseek",
     ]
-    model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"]
+    model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
     # Create a new job for execution tracking
     if settings.track_agent_run:
@@ -1236,14 +1325,11 @@ async def send_message_streaming(
             ),
             actor=actor,
         )
+        job_update_metadata = None
+        await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
     else:
         run = None
-    job_update_metadata = None
-    # TODO (cliandy): clean this up
-    redis_client = await get_redis_client()
-    await redis_client.set(f"{REDIS_RUN_ID_PREFIX}:{agent_id}", run.id if run else None)
     try:
         if agent_eligible and model_compatible:
             if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1281,6 +1367,23 @@ async def send_message_streaming(
                     ),
                 )
+            if request.stream_tokens and model_compatible_token_streaming:
+                raw_stream = agent_loop.step_stream(
+                    input_messages=request.messages,
+                    max_steps=request.max_steps,
+                    use_assistant_message=request.use_assistant_message,
+                    request_start_timestamp_ns=request_start_timestamp_ns,
+                    include_return_message_types=request.include_return_message_types,
+                )
+            else:
+                raw_stream = agent_loop.step_stream_no_tokens(
+                    request.messages,
+                    max_steps=request.max_steps,
+                    use_assistant_message=request.use_assistant_message,
+                    request_start_timestamp_ns=request_start_timestamp_ns,
+                    include_return_message_types=request.include_return_message_types,
+                )
             from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
             if request.background and settings.track_agent_run:
@@ -1294,23 +1397,6 @@ async def send_message_streaming(
                         ),
                     )
-                if request.stream_tokens and model_compatible_token_streaming:
-                    raw_stream = agent_loop.step_stream(
-                        input_messages=request.messages,
-                        max_steps=request.max_steps,
-                        use_assistant_message=request.use_assistant_message,
-                        request_start_timestamp_ns=request_start_timestamp_ns,
-                        include_return_message_types=request.include_return_message_types,
-                    )
-                else:
-                    raw_stream = agent_loop.step_stream_no_tokens(
-                        request.messages,
-                        max_steps=request.max_steps,
-                        use_assistant_message=request.use_assistant_message,
-                        request_start_timestamp_ns=request_start_timestamp_ns,
-                        include_return_message_types=request.include_return_message_types,
-                    )
                 asyncio.create_task(
                     create_background_stream_processor(
                         stream_generator=raw_stream,
@@ -1319,55 +1405,21 @@ async def send_message_streaming(
                     )
                 )
-                stream = redis_sse_stream_generator(
+                raw_stream = redis_sse_stream_generator(
                     redis_client=redis_client,
                     run_id=run.id,
                 )
-                if request.include_pings and settings.enable_keepalive:
-                    stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
-                return StreamingResponseWithStatusCode(
-                    stream,
-                    media_type="text/event-stream",
-                )
-            if request.stream_tokens and model_compatible_token_streaming:
-                raw_stream = agent_loop.step_stream(
-                    input_messages=request.messages,
-                    max_steps=request.max_steps,
-                    use_assistant_message=request.use_assistant_message,
-                    request_start_timestamp_ns=request_start_timestamp_ns,
-                    include_return_message_types=request.include_return_message_types,
-                )
-                # Conditionally wrap with keepalive based on request parameter
-                if request.include_pings and settings.enable_keepalive:
-                    stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
-                else:
-                    stream = raw_stream
-                result = StreamingResponseWithStatusCode(
-                    stream,
-                    media_type="text/event-stream",
-                )
+            # Conditionally wrap with keepalive based on request parameter
+            if request.include_pings and settings.enable_keepalive:
+                stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
             else:
-                raw_stream = agent_loop.step_stream_no_tokens(
-                    request.messages,
-                    max_steps=request.max_steps,
-                    use_assistant_message=request.use_assistant_message,
-                    request_start_timestamp_ns=request_start_timestamp_ns,
-                    include_return_message_types=request.include_return_message_types,
-                )
-                # Conditionally wrap with keepalive based on request parameter
-                if request.include_pings and settings.enable_keepalive:
-                    stream = add_keepalive_to_stream(raw_stream, keepalive_interval=settings.keepalive_interval)
-                else:
-                    stream = raw_stream
-                result = StreamingResponseWithStatusCode(
-                    stream,
-                    media_type="text/event-stream",
-                )
+                stream = raw_stream
+            result = StreamingResponseWithStatusCode(
+                stream,
+                media_type="text/event-stream",
+            )
         else:
             result = await server.send_message_to_agent(
                 agent_id=agent_id,
@@ -1382,11 +1434,13 @@ async def send_message_streaming(
                 request_start_timestamp_ns=request_start_timestamp_ns,
                 include_return_message_types=request.include_return_message_types,
             )
-        job_status = JobStatus.running
+        if settings.track_agent_run:
+            job_status = JobStatus.running
         return result
     except Exception as e:
-        job_update_metadata = {"error": str(e)}
-        job_status = JobStatus.failed
+        if settings.track_agent_run:
+            job_update_metadata = {"error": str(e)}
+            job_status = JobStatus.failed
         raise
     finally:
         if settings.track_agent_run:
@@ -1469,7 +1523,10 @@ async def _process_message_background(
             "google_vertex",
             "bedrock",
             "ollama",
+            "azure",
+            "xai",
             "groq",
+            "deepseek",
         ]
         if agent_eligible and model_compatible:
             if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
@@ -1660,6 +1717,7 @@ async def preview_raw_payload(
         "azure",
         "xai",
         "groq",
+        "deepseek",
     ]
     if agent_eligible and model_compatible:
@@ -1731,6 +1789,7 @@ async def summarize_agent_conversation(
         "azure",
         "xai",
         "groq",
+        "deepseek",
     ]
     if agent_eligible and model_compatible:

letta/server/rest_api/routers/v1/blocks.py CHANGED Viewed

@@ -34,7 +34,7 @@ async def list_blocks(
     ),
     label_search: Optional[str] = Query(
         None,
-        description=("Search blocks by label. If provided, returns blocks that match this label. " "This is a full-text search on labels."),
+        description=("Search blocks by label. If provided, returns blocks that match this label. This is a full-text search on labels."),
     ),
     description_search: Optional[str] = Query(
         None,

letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

letta-nightly 0.11.6.dev20250903104037py3-none-any.whl → 0.11.7.dev20250904045700py3-none-any.whl