PyPI - letta-nightly - Versions diffs - 0.7.0.dev20250423003112__py3-none-any.whl → 0.7.2.dev20250423222439__py3-none-any.whl - Mend

letta-nightly 0.7.0.dev20250423003112py3-none-any.whl → 0.7.2.dev20250423222439py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

letta/__init__.py +1 -1
letta/agent.py +113 -81
letta/agents/letta_agent.py +2 -2
letta/agents/letta_agent_batch.py +38 -34
letta/client/client.py +10 -2
letta/constants.py +4 -3
letta/functions/function_sets/multi_agent.py +1 -3
letta/functions/helpers.py +3 -3
letta/groups/dynamic_multi_agent.py +58 -59
letta/groups/round_robin_multi_agent.py +43 -49
letta/groups/sleeptime_multi_agent.py +28 -18
letta/groups/supervisor_multi_agent.py +21 -20
letta/helpers/composio_helpers.py +1 -1
letta/helpers/converters.py +29 -0
letta/helpers/datetime_helpers.py +9 -0
letta/helpers/message_helper.py +1 -0
letta/helpers/tool_execution_helper.py +3 -3
letta/jobs/llm_batch_job_polling.py +2 -1
letta/llm_api/anthropic.py +10 -6
letta/llm_api/anthropic_client.py +2 -2
letta/llm_api/cohere.py +2 -2
letta/llm_api/google_ai_client.py +2 -2
letta/llm_api/google_vertex_client.py +2 -2
letta/llm_api/openai.py +11 -4
letta/llm_api/openai_client.py +34 -2
letta/local_llm/chat_completion_proxy.py +2 -2
letta/orm/agent.py +8 -1
letta/orm/custom_columns.py +15 -0
letta/schemas/agent.py +6 -0
letta/schemas/letta_message_content.py +2 -1
letta/schemas/llm_config.py +12 -2
letta/schemas/message.py +18 -0
letta/schemas/openai/chat_completion_response.py +52 -3
letta/schemas/response_format.py +78 -0
letta/schemas/tool_execution_result.py +14 -0
letta/server/rest_api/chat_completions_interface.py +2 -2
letta/server/rest_api/interface.py +3 -2
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +1 -1
letta/server/rest_api/routers/v1/agents.py +4 -4
letta/server/rest_api/routers/v1/groups.py +2 -2
letta/server/rest_api/routers/v1/messages.py +41 -19
letta/server/server.py +24 -57
letta/services/agent_manager.py +6 -1
letta/services/llm_batch_manager.py +28 -26
letta/services/tool_executor/tool_execution_manager.py +37 -28
letta/services/tool_executor/tool_execution_sandbox.py +35 -16
letta/services/tool_executor/tool_executor.py +299 -68
letta/services/tool_sandbox/base.py +3 -2
letta/services/tool_sandbox/e2b_sandbox.py +5 -4
letta/services/tool_sandbox/local_sandbox.py +11 -6
{letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/METADATA +1 -1
{letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/RECORD +55 -53
{letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.0.dev20250423003112.dist-info → letta_nightly-0.7.2.dev20250423222439.dist-info}/entry_points.txt +0 -0

letta/groups/supervisor_multi_agent.py CHANGED Viewed

@@ -9,7 +9,7 @@ from letta.interface import AgentInterface
 from letta.orm import User
 from letta.orm.enums import ToolType
 from letta.schemas.letta_message_content import TextContent
-from letta.schemas.message import Message, MessageCreate
+from letta.schemas.message import MessageCreate
 from letta.schemas.tool import Tool
 from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule
 from letta.schemas.usage import LettaUsageStatistics
@@ -37,17 +37,18 @@ class SupervisorMultiAgent(Agent):
     def step(
         self,
-        messages: List[MessageCreate],
+        input_messages: List[MessageCreate],
         chaining: bool = True,
         max_chaining_steps: Optional[int] = None,
         put_inner_thoughts_first: bool = True,
         assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
         **kwargs,
     ) -> LettaUsageStatistics:
+        # Load settings
         token_streaming = self.interface.streaming_mode if hasattr(self.interface, "streaming_mode") else False
         metadata = self.interface.metadata if hasattr(self.interface, "metadata") else None
-        # add multi agent tool
+        # Prepare supervisor agent
         if self.tool_manager.get_tool_by_name(tool_name="send_message_to_all_agents_in_group", actor=self.user) is None:
             multi_agent_tool = Tool(
                 name=send_message_to_all_agents_in_group.__name__,
@@ -64,7 +65,6 @@ class SupervisorMultiAgent(Agent):
             )
             self.agent_state = self.agent_manager.attach_tool(agent_id=self.agent_state.id, tool_id=multi_agent_tool.id, actor=self.user)
-        # override tool rules
         old_tool_rules = self.agent_state.tool_rules
         self.agent_state.tool_rules = [
             InitToolRule(
@@ -79,24 +79,25 @@ class SupervisorMultiAgent(Agent):
             ),
         ]
-        supervisor_messages = [
-            Message(
-                agent_id=self.agent_state.id,
-                role="user",
-                content=[TextContent(text=message.content)],
-                name=None,
-                model=None,
-                tool_calls=None,
-                tool_call_id=None,
-                group_id=self.group_id,
-                otid=message.otid,
-            )
-            for message in messages
-        ]
+        # Prepare new messages
+        new_messages = []
+        for message in input_messages:
+            if isinstance(message.content, str):
+                message.content = [TextContent(text=message.content)]
+            message.group_id = self.group_id
+            new_messages.append(message)
         try:
-            supervisor_agent = Agent(agent_state=self.agent_state, interface=self.interface, user=self.user)
+            # Load supervisor agent
+            supervisor_agent = Agent(
+                agent_state=self.agent_state,
+                interface=self.interface,
+                user=self.user,
+            )
+            # Perform supervisor step
             usage_stats = supervisor_agent.step(
-                messages=supervisor_messages,
+                input_messages=new_messages,
                 chaining=chaining,
                 max_chaining_steps=max_chaining_steps,
                 stream=token_streaming,

letta/helpers/composio_helpers.py CHANGED Viewed

@@ -10,7 +10,7 @@ def get_composio_api_key(actor: User, logger: Optional[Logger] = None) -> Option
     api_keys = SandboxConfigManager().list_sandbox_env_vars_by_key(key="COMPOSIO_API_KEY", actor=actor)
     if not api_keys:
         if logger:
-            logger.warning(f"No API keys found for Composio. Defaulting to the environment variable...")
+            logger.debug(f"No API keys found for Composio. Defaulting to the environment variable...")
         if tool_settings.composio_api_key:
             return tool_settings.composio_api_key
         else:

letta/helpers/converters.py CHANGED Viewed

@@ -22,6 +22,13 @@ from letta.schemas.letta_message_content import (
 )
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import ToolReturn
+from letta.schemas.response_format import (
+    JsonObjectResponseFormat,
+    JsonSchemaResponseFormat,
+    ResponseFormatType,
+    ResponseFormatUnion,
+    TextResponseFormat,
+)
 from letta.schemas.tool_rule import (
     ChildToolRule,
     ConditionalToolRule,
@@ -371,3 +378,25 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
         return None
     return AgentStepState(**data)
+# --------------------------
+# Response Format Serialization
+# --------------------------
+def serialize_response_format(response_format: Optional[ResponseFormatUnion]) -> Optional[Dict[str, Any]]:
+    if not response_format:
+        return None
+    return response_format.model_dump(mode="json")
+def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormatUnion]:
+    if not data:
+        return None
+    if data["type"] == ResponseFormatType.text:
+        return TextResponseFormat(**data)
+    if data["type"] == ResponseFormatType.json_schema:
+        return JsonSchemaResponseFormat(**data)
+    if data["type"] == ResponseFormatType.json_object:
+        return JsonObjectResponseFormat(**data)

letta/helpers/datetime_helpers.py CHANGED Viewed

@@ -66,6 +66,15 @@ def get_utc_time() -> datetime:
     return datetime.now(timezone.utc)
+def get_utc_time_int() -> int:
+    return int(get_utc_time().timestamp())
+def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
+    """Convert Unix timestamp in seconds to UTC datetime object"""
+    return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 def format_datetime(dt):
     return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")

letta/helpers/message_helper.py CHANGED Viewed

@@ -40,4 +40,5 @@ def prepare_input_message_create(
         tool_call_id=None,
         otid=message.otid,
         sender_id=message.sender_id,
+        group_id=message.group_id,
     )

letta/helpers/tool_execution_helper.py CHANGED Viewed

@@ -160,12 +160,12 @@ def execute_external_tool(
             else:
                 agent_state_copy = None
-            sandbox_run_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
-            function_response, updated_agent_state = sandbox_run_result.func_return, sandbox_run_result.agent_state
+            tool_execution_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
+            function_response, updated_agent_state = tool_execution_result.func_return, tool_execution_result.agent_state
             # TODO: Bring this back
             # if allow_agent_state_modifications and updated_agent_state is not None:
             #     self.update_memory_if_changed(updated_agent_state.memory)
-            return function_response, sandbox_run_result
+            return function_response, tool_execution_result
     except Exception as e:
         # Need to catch error here, or else trunction wont happen
         # TODO: modify to function execution error

letta/jobs/llm_batch_job_polling.py CHANGED Viewed

@@ -73,7 +73,8 @@ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: st
     """
     updates = []
     try:
-        async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
+        results = await server.anthropic_async_client.beta.messages.batches.results(batch_resp_id)
+        async for item_result in results:
             # Here, custom_id should be the agent_id
             item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
             updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))

letta/llm_api/anthropic.py CHANGED Viewed

@@ -20,7 +20,7 @@ from anthropic.types.beta import (
 )
 from letta.errors import BedrockError, BedrockPermissionError
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
 from letta.llm_api.aws_bedrock import get_bedrock_client
 from letta.llm_api.helpers import add_inner_thoughts_to_functions
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion(
     return ChatCompletionResponse(
         id=response.id,
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=response.model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,
@@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
                 'logprobs': None
             }
         ],
-        'created': datetime.datetime(2025, 1, 24, 0, 18, 55, tzinfo=TzInfo(UTC)),
+        'created': 1713216662,
         'model': 'gpt-4o-mini-2024-07-18',
         'system_fingerprint': 'fp_bd83329f63',
         'object': 'chat.completion.chunk'
@@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
     return ChatCompletionChunkResponse(
         id=message_id,
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=model,
         output_tokens=completion_chunk_tokens,
     )
@@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream(
     chat_completion_response = ChatCompletionResponse(
         id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
         choices=[],
-        created=dummy_message.created_at,
+        created=int(dummy_message.created_at.timestamp()),
         model=chat_completion_request.model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,
@@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream(
                     message_type = stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
-                        message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
+                        message_date=(
+                            timestamp_to_datetime(chat_completion_response.created)
+                            if create_message_datetime
+                            else timestamp_to_datetime(chat_completion_chunk.created)
+                        ),
                         # if extended_thinking is on, then reasoning_content will be flowing as chunks
                         # TODO handle emitting redacted reasoning content (e.g. as concat?)
                         expect_reasoning_content=extended_thinking,

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -22,7 +22,7 @@ from letta.errors import (
     LLMServerError,
     LLMUnprocessableEntityError,
 )
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -403,7 +403,7 @@ class AnthropicClient(LLMClientBase):
         chat_completion_response = ChatCompletionResponse(
             id=response.id,
             choices=[choice],
-            created=get_utc_time(),
+            created=get_utc_time_int(),
             model=response.model,
             usage=UsageStatistics(
                 prompt_tokens=prompt_tokens,

letta/llm_api/cohere.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import List, Optional, Union
 import requests
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.local_llm.utils import count_tokens
 from letta.schemas.message import Message
@@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion(
     return ChatCompletionResponse(
         id=response_json["response_id"],
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ import requests
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
 from letta.constants import NON_USER_MSG_PREFIX
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.llm_api.helpers import make_post_request
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -260,7 +260,7 @@ class GoogleAIClient(LLMClientBase):
                 id=response_id,
                 choices=choices,
                 model=self.llm_config.model,  # NOTE: Google API doesn't pass back model in the response
-                created=get_utc_time(),
+                created=get_utc_time_int(),
                 usage=usage,
             )
         except KeyError as e:

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import List, Optional
 from google import genai
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.llm_api.google_ai_client import GoogleAIClient
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
@@ -234,7 +234,7 @@ class GoogleVertexClient(GoogleAIClient):
                 id=response_id,
                 choices=choices,
                 model=self.llm_config.model,  # NOTE: Google API doesn't pass back model in the response
-                created=get_utc_time(),
+                created=get_utc_time_int(),
                 usage=usage,
             )
         except KeyError as e:

letta/llm_api/openai.py CHANGED Viewed

@@ -4,7 +4,9 @@ from typing import Generator, List, Optional, Union
 import requests
 from openai import OpenAI
+from letta.helpers.datetime_helpers import timestamp_to_datetime
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
+from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
@@ -135,7 +137,7 @@ def build_openai_chat_completions_request(
             tool_choice=tool_choice,
             user=str(user_id),
             max_completion_tokens=llm_config.max_tokens,
-            temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
+            temperature=llm_config.temperature if supports_temperature_param(model) else None,
             reasoning_effort=llm_config.reasoning_effort,
         )
     else:
@@ -237,7 +239,7 @@ def openai_chat_completions_process_stream(
     chat_completion_response = ChatCompletionResponse(
         id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
         choices=[],
-        created=dummy_message.created_at,  # NOTE: doesn't matter since both will do get_utc_time()
+        created=int(dummy_message.created_at.timestamp()),  # NOTE: doesn't matter since both will do get_utc_time()
         model=chat_completion_request.model,
         usage=UsageStatistics(
             completion_tokens=0,
@@ -274,7 +276,11 @@ def openai_chat_completions_process_stream(
                     message_type = stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
-                        message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
+                        message_date=(
+                            timestamp_to_datetime(chat_completion_response.created)
+                            if create_message_datetime
+                            else timestamp_to_datetime(chat_completion_chunk.created)
+                        ),
                         expect_reasoning_content=expect_reasoning_content,
                         name=name,
                         message_index=message_idx,
@@ -489,6 +495,7 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
     #         except ValueError as e:
     #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
-    if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
+    if not supports_parallel_tool_calling(chat_completion_request.model):
         data.pop("parallel_tool_calls", None)
     return data

letta/llm_api/openai_client.py CHANGED Viewed

@@ -34,6 +34,33 @@ from letta.settings import model_settings
 logger = get_logger(__name__)
+def is_openai_reasoning_model(model: str) -> bool:
+    """Utility function to check if the model is a 'reasoner'"""
+    # NOTE: needs to be updated with new model releases
+    return model.startswith("o1") or model.startswith("o3")
+def supports_temperature_param(model: str) -> bool:
+    """Certain OpenAI models don't support configuring the temperature.
+    Example error: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', 'code': 'unsupported_parameter'}}
+    """
+    if is_openai_reasoning_model(model):
+        return False
+    else:
+        return True
+def supports_parallel_tool_calling(model: str) -> bool:
+    """Certain OpenAI models don't support parallel tool calls."""
+    if is_openai_reasoning_model(model):
+        return False
+    else:
+        return True
 class OpenAIClient(LLMClientBase):
     def _prepare_client_kwargs(self) -> dict:
         api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -66,7 +93,8 @@ class OpenAIClient(LLMClientBase):
                 put_inner_thoughts_first=True,
             )
-        use_developer_message = llm_config.model.startswith("o1") or llm_config.model.startswith("o3")  # o-series models
+        use_developer_message = is_openai_reasoning_model(llm_config.model)
         openai_message_list = [
             cast_message_to_subtype(
                 m.to_openai_dict(
@@ -103,7 +131,7 @@ class OpenAIClient(LLMClientBase):
             tool_choice=tool_choice,
             user=str(),
             max_completion_tokens=llm_config.max_tokens,
-            temperature=llm_config.temperature,
+            temperature=llm_config.temperature if supports_temperature_param(model) else None,
         )
         if "inference.memgpt.ai" in llm_config.model_endpoint:
@@ -160,6 +188,10 @@ class OpenAIClient(LLMClientBase):
                 response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
             )
+        # If we used a reasoning model, create a content part for the ommitted reasoning
+        if is_openai_reasoning_model(self.llm_config.model):
+            chat_completion_response.choices[0].message.ommitted_reasoning_content = True
         return chat_completion_response
     def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:

letta/local_llm/chat_completion_proxy.py CHANGED Viewed

@@ -6,7 +6,7 @@ import requests
 from letta.constants import CLI_WARNING_PREFIX
 from letta.errors import LocalLLMConnectionError, LocalLLMError
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.local_llm.constants import DEFAULT_WRAPPER
 from letta.local_llm.function_parser import patch_function
@@ -241,7 +241,7 @@ def get_chat_completion(
                 ),
             )
         ],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=model,
         # "This fingerprint represents the backend configuration that the model runs with."
         # system_fingerprint=user if user is not None else "null",

letta/orm/agent.py CHANGED Viewed

@@ -5,7 +5,7 @@ from sqlalchemy import JSON, Boolean, Index, String
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from letta.orm.block import Block
-from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
+from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ResponseFormatColumn, ToolRulesColumn
 from letta.orm.identity import Identity
 from letta.orm.mixins import OrganizationMixin
 from letta.orm.organization import Organization
@@ -15,6 +15,7 @@ from letta.schemas.agent import AgentType, get_prompt_template_for_agent_type
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
+from letta.schemas.response_format import ResponseFormatUnion
 from letta.schemas.tool_rule import ToolRule
 if TYPE_CHECKING:
@@ -48,6 +49,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     # This is dangerously flexible with the JSON type
     message_ids: Mapped[Optional[List[str]]] = mapped_column(JSON, nullable=True, doc="List of message IDs in in-context memory.")
+    # Response Format
+    response_format: Mapped[Optional[ResponseFormatUnion]] = mapped_column(
+        ResponseFormatColumn, nullable=True, doc="The response format for the agent."
+    )
     # Metadata and configs
     metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True, doc="metadata for the agent.")
     llm_config: Mapped[Optional[LLMConfig]] = mapped_column(
@@ -168,6 +174,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
             "multi_agent_group": None,
             "tool_exec_environment_variables": [],
             "enable_sleeptime": None,
+            "response_format": self.response_format,
         }
         # Optional fields: only included if requested

letta/orm/custom_columns.py CHANGED Viewed

@@ -9,6 +9,7 @@ from letta.helpers.converters import (
     deserialize_llm_config,
     deserialize_message_content,
     deserialize_poll_batch_response,
+    deserialize_response_format,
     deserialize_tool_calls,
     deserialize_tool_returns,
     deserialize_tool_rules,
@@ -20,6 +21,7 @@ from letta.helpers.converters import (
     serialize_llm_config,
     serialize_message_content,
     serialize_poll_batch_response,
+    serialize_response_format,
     serialize_tool_calls,
     serialize_tool_returns,
     serialize_tool_rules,
@@ -168,3 +170,16 @@ class AgentStepStateColumn(TypeDecorator):
     def process_result_value(self, value, dialect):
         return deserialize_agent_step_state(value)
+class ResponseFormatColumn(TypeDecorator):
+    """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
+    impl = JSON
+    cache_ok = True
+    def process_bind_param(self, value, dialect):
+        return serialize_response_format(value)
+    def process_result_value(self, value, dialect):
+        return deserialize_response_format(value)

letta/schemas/agent.py CHANGED Viewed

@@ -14,6 +14,7 @@ from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
 from letta.schemas.message import Message, MessageCreate
 from letta.schemas.openai.chat_completion_response import UsageStatistics
+from letta.schemas.response_format import ResponseFormatUnion
 from letta.schemas.source import Source
 from letta.schemas.tool import Tool
 from letta.schemas.tool_rule import ToolRule
@@ -66,6 +67,9 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
     # llm information
     llm_config: LLMConfig = Field(..., description="The LLM configuration used by the agent.")
     embedding_config: EmbeddingConfig = Field(..., description="The embedding configuration used by the agent.")
+    response_format: Optional[ResponseFormatUnion] = Field(
+        None, description="The response format used by the agent when returning from `send_message`."
+    )
     # This is an object representing the in-process state of a running `Agent`
     # Field in this object can be theoretically edited by tools, and will be persisted by the ORM
@@ -180,6 +184,7 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
         description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
     )
     enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
+    response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
     @field_validator("name")
     @classmethod
@@ -259,6 +264,7 @@ class UpdateAgent(BaseModel):
         None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
     )
     enable_sleeptime: Optional[bool] = Field(None, description="If set to True, memory management will move to a background agent thread.")
+    response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the agent.")
     class Config:
         extra = "ignore"  # Ignores extra fields

letta/schemas/letta_message_content.py CHANGED Viewed

@@ -145,7 +145,8 @@ class OmittedReasoningContent(MessageContent):
     type: Literal[MessageContentType.omitted_reasoning] = Field(
         MessageContentType.omitted_reasoning, description="Indicates this is an omitted reasoning step."
     )
-    tokens: int = Field(..., description="The reasoning token count for intermediate reasoning content.")
+    # NOTE: dropping because we don't track this kind of information for the other reasoning types
+    # tokens: int = Field(..., description="The reasoning token count for intermediate reasoning content.")
 LettaMessageContentUnion = Annotated[

letta/schemas/llm_config.py CHANGED Viewed

@@ -81,8 +81,11 @@ class LLMConfig(BaseModel):
     @model_validator(mode="before")
     @classmethod
     def set_default_enable_reasoner(cls, values):
-        if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
-            values["enable_reasoner"] = True
+        # NOTE: this is really only applicable for models that can toggle reasoning on-and-off, like 3.7
+        # We can also use this field to identify if a model is a "reasoning" model (o1/o3, etc.) if we want
+        # if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
+        #     values["enable_reasoner"] = True
+        #     values["put_inner_thoughts_in_kwargs"] = False
         return values
     @model_validator(mode="before")
@@ -100,6 +103,13 @@ class LLMConfig(BaseModel):
         if values.get("put_inner_thoughts_in_kwargs") is None:
             values["put_inner_thoughts_in_kwargs"] = False if model in avoid_put_inner_thoughts_in_kwargs else True
+        # For the o1/o3 series from OpenAI, set to False by default
+        # We can set this flag to `true` if desired, which will enable "double-think"
+        from letta.llm_api.openai_client import is_openai_reasoning_model
+        if is_openai_reasoning_model(model):
+            values["put_inner_thoughts_in_kwargs"] = False
         return values
     @model_validator(mode="after")

letta/schemas/message.py CHANGED Viewed

@@ -31,6 +31,7 @@ from letta.schemas.letta_message import (
 )
 from letta.schemas.letta_message_content import (
     LettaMessageContentUnion,
+    OmittedReasoningContent,
     ReasoningContent,
     RedactedReasoningContent,
     TextContent,
@@ -82,6 +83,7 @@ class MessageCreate(BaseModel):
     name: Optional[str] = Field(None, description="The name of the participant.")
     otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
     sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
+    group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
     def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
         data = super().model_dump(**kwargs)
@@ -294,6 +296,18 @@ class Message(BaseMessage):
                                     sender_id=self.sender_id,
                                 )
                             )
+                        elif isinstance(content_part, OmittedReasoningContent):
+                            # Special case for "hidden reasoning" models like o1/o3
+                            # NOTE: we also have to think about how to return this during streaming
+                            messages.append(
+                                HiddenReasoningMessage(
+                                    id=self.id,
+                                    date=self.created_at,
+                                    state="omitted",
+                                    name=self.name,
+                                    otid=otid,
+                                )
+                            )
                         else:
                             warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
@@ -463,6 +477,10 @@ class Message(BaseMessage):
                     data=openai_message_dict["redacted_reasoning_content"] if "redacted_reasoning_content" in openai_message_dict else None,
                 ),
             )
+        if "omitted_reasoning_content" in openai_message_dict and openai_message_dict["omitted_reasoning_content"]:
+            content.append(
+                OmittedReasoningContent(),
+            )
         # If we're going from deprecated function form
         if openai_message_dict["role"] == "function":

letta-nightly 0.7.0.dev20250423003112__py3-none-any.whl → 0.7.2.dev20250423222439__py3-none-any.whl

letta-nightly 0.7.0.dev20250423003112py3-none-any.whl → 0.7.2.dev20250423222439py3-none-any.whl