PyPI - letta-nightly - Versions diffs - 0.6.43.dev20250324104208__py3-none-any.whl → 0.6.44.dev20250325050316__py3-none-any.whl - Mend

letta-nightly 0.6.43.dev20250324104208py3-none-any.whl → 0.6.44.dev20250325050316py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (42) hide show

letta/__init__.py +1 -1
letta/agent.py +106 -104
letta/agents/voice_agent.py +1 -1
letta/client/streaming.py +3 -1
letta/functions/function_sets/base.py +2 -1
letta/functions/function_sets/multi_agent.py +51 -40
letta/functions/helpers.py +26 -22
letta/helpers/message_helper.py +41 -0
letta/llm_api/anthropic.py +150 -44
letta/llm_api/aws_bedrock.py +5 -3
letta/llm_api/azure_openai.py +0 -1
letta/llm_api/llm_api_tools.py +4 -0
letta/orm/organization.py +1 -0
letta/orm/sqlalchemy_base.py +2 -4
letta/schemas/agent.py +8 -0
letta/schemas/letta_message.py +8 -4
letta/schemas/llm_config.py +6 -0
letta/schemas/message.py +143 -24
letta/schemas/openai/chat_completion_response.py +5 -0
letta/schemas/organization.py +7 -0
letta/schemas/providers.py +17 -0
letta/schemas/tool.py +5 -1
letta/schemas/usage.py +5 -1
letta/serialize_schemas/pydantic_agent_schema.py +1 -1
letta/server/rest_api/interface.py +44 -7
letta/server/rest_api/routers/v1/agents.py +13 -2
letta/server/rest_api/routers/v1/organizations.py +19 -1
letta/server/rest_api/utils.py +1 -1
letta/server/server.py +49 -70
letta/services/agent_manager.py +6 -2
letta/services/helpers/agent_manager_helper.py +24 -38
letta/services/message_manager.py +7 -6
letta/services/organization_manager.py +13 -0
letta/services/tool_execution_sandbox.py +5 -1
letta/services/tool_executor/__init__.py +0 -0
letta/services/tool_executor/tool_execution_manager.py +74 -0
letta/services/tool_executor/tool_executor.py +380 -0
{letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/METADATA +2 -3
{letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/RECORD +42 -38
{letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.43.dev20250324104208.dist-info → letta_nightly-0.6.44.dev20250325050316.dist-info}/entry_points.txt +0 -0

letta/llm_api/anthropic.py CHANGED Viewed

@@ -13,7 +13,9 @@ from anthropic.types.beta import (
     BetaRawMessageDeltaEvent,
     BetaRawMessageStartEvent,
     BetaRawMessageStopEvent,
+    BetaRedactedThinkingBlock,
     BetaTextBlock,
+    BetaThinkingBlock,
     BetaToolUseBlock,
 )
@@ -345,43 +347,32 @@ def convert_anthropic_response_to_chatcompletion(
     finish_reason = remap_finish_reason(response.stop_reason)
     content = None
+    reasoning_content = None
+    reasoning_content_signature = None
+    redacted_reasoning_content = None
     tool_calls = None
     if len(response.content) > 1:
-        # inner mono + function call
-        assert len(response.content) == 2
-        text_block = response.content[0]
-        tool_block = response.content[1]
-        assert text_block.type == "text"
-        assert tool_block.type == "tool_use"
-        content = strip_xml_tags(string=text_block.text, tag=inner_thoughts_xml_tag)
-        tool_calls = [
-            ToolCall(
-                id=tool_block.id,
-                type="function",
-                function=FunctionCall(
-                    name=tool_block.name,
-                    arguments=json.dumps(tool_block.input, indent=2),
-                ),
-            )
-        ]
-    elif len(response.content) == 1:
-        block = response.content[0]
-        if block.type == "tool_use":
-            # function call only
-            tool_calls = [
-                ToolCall(
-                    id=block.id,
-                    type="function",
-                    function=FunctionCall(
-                        name=block.name,
-                        arguments=json.dumps(block.input, indent=2),
-                    ),
-                )
-            ]
-        else:
-            # inner mono only
-            content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag)
+        for content_part in response.content:
+            if content_part.type == "text":
+                content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
+            if content_part.type == "tool_use":
+                tool_calls = [
+                    ToolCall(
+                        id=content_part.id,
+                        type="function",
+                        function=FunctionCall(
+                            name=content_part.name,
+                            arguments=json.dumps(content_part.input, indent=2),
+                        ),
+                    )
+                ]
+            if content_part.type == "thinking":
+                reasoning_content = content_part.thinking
+                reasoning_content_signature = content_part.signature
+            if content_part.type == "redacted_thinking":
+                redacted_reasoning_content = content_part.data
     else:
         raise RuntimeError("Unexpected empty content in response")
@@ -392,6 +383,9 @@ def convert_anthropic_response_to_chatcompletion(
         message=ChoiceMessage(
             role=response.role,
             content=content,
+            reasoning_content=reasoning_content,
+            reasoning_content_signature=reasoning_content_signature,
+            redacted_reasoning_content=redacted_reasoning_content,
             tool_calls=tool_calls,
         ),
     )
@@ -462,7 +456,31 @@ def convert_anthropic_stream_event_to_chatcompletion(
     """
     # Get finish reason
     finish_reason = None
-    if isinstance(event, BetaRawMessageDeltaEvent):
+    completion_chunk_tokens = 0
+    # Get content and tool calls
+    content = None
+    reasoning_content = None
+    reasoning_content_signature = None
+    redacted_reasoning_content = None  # NOTE called "data" in the stream
+    tool_calls = None
+    if isinstance(event, BetaRawMessageStartEvent):
+        """
+        BetaRawMessageStartEvent(
+            message=BetaMessage(
+                content=[],
+                usage=BetaUsage(
+                    input_tokens=3086,
+                    output_tokens=1,
+                ),
+                ...,
+            ),
+            type='message_start'
+        )
+        """
+        completion_chunk_tokens += event.message.usage.output_tokens
+    elif isinstance(event, BetaRawMessageDeltaEvent):
         """
         BetaRawMessageDeltaEvent(
             delta=Delta(
@@ -474,11 +492,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
         )
         """
         finish_reason = remap_finish_reason(event.delta.stop_reason)
+        completion_chunk_tokens += event.usage.output_tokens
-    # Get content and tool calls
-    content = None
-    tool_calls = None
-    if isinstance(event, BetaRawContentBlockDeltaEvent):
+    elif isinstance(event, BetaRawContentBlockDeltaEvent):
         """
         BetaRawContentBlockDeltaEvent(
             delta=BetaInputJSONDelta(
@@ -501,9 +517,24 @@ def convert_anthropic_stream_event_to_chatcompletion(
         )
         """
+        # ReACT COT
         if event.delta.type == "text_delta":
             content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
+        # Extended thought COT
+        elif event.delta.type == "thinking_delta":
+            # Redacted doesn't come in the delta chunks, comes all at once
+            # "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
+            # Thinking might start with ""
+            if len(event.delta.thinking) > 0:
+                reasoning_content = event.delta.thinking
+        # Extended thought COT signature
+        elif event.delta.type == "signature_delta":
+            if len(event.delta.signature) > 0:
+                reasoning_content_signature = event.delta.signature
+        # Tool calling
         elif event.delta.type == "input_json_delta":
             tool_calls = [
                 ToolCallDelta(
@@ -514,6 +545,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
                     ),
                 )
             ]
+        else:
+            warnings.warn("Unexpected delta type: " + event.delta.type)
     elif isinstance(event, BetaRawContentBlockStartEvent):
         """
         BetaRawContentBlockStartEvent(
@@ -551,6 +585,15 @@ def convert_anthropic_stream_event_to_chatcompletion(
             ]
         elif isinstance(event.content_block, BetaTextBlock):
             content = event.content_block.text
+        elif isinstance(event.content_block, BetaThinkingBlock):
+            reasoning_content = event.content_block.thinking
+        elif isinstance(event.content_block, BetaRedactedThinkingBlock):
+            redacted_reasoning_content = event.content_block.data
+        else:
+            warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
+    else:
+        warnings.warn("Unexpected event type: " + event.type)
     # Initialize base response
     choice = ChunkChoice(
@@ -558,6 +601,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
         finish_reason=finish_reason,
         delta=MessageDelta(
             content=content,
+            reasoning_content=reasoning_content,
+            reasoning_content_signature=reasoning_content_signature,
+            redacted_reasoning_content=redacted_reasoning_content,
             tool_calls=tool_calls,
         ),
     )
@@ -566,6 +612,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
         choices=[choice],
         created=get_utc_time(),
         model=model,
+        output_tokens=completion_chunk_tokens,
     )
@@ -577,8 +624,20 @@ def _prepare_anthropic_request(
     # if true, put COT inside the tool calls instead of inside the content
     put_inner_thoughts_in_kwargs: bool = False,
     bedrock: bool = False,
+    # extended thinking related fields
+    # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
+    extended_thinking: bool = False,
+    max_reasoning_tokens: Optional[int] = None,
 ) -> dict:
     """Prepare the request data for Anthropic API format."""
+    if extended_thinking:
+        assert (
+            max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
+        ), "max tokens must be greater than thinking budget"
+        assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
+        # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
+        # Silently disable prefix_fill for now
+        prefix_fill = False
     # if needed, put inner thoughts as a kwarg for all tools
     if data.tools and put_inner_thoughts_in_kwargs:
@@ -595,6 +654,14 @@ def _prepare_anthropic_request(
     # pydantic -> dict
     data = data.model_dump(exclude_none=True)
+    if extended_thinking:
+        data["thinking"] = {
+            "type": "enabled",
+            "budget_tokens": max_reasoning_tokens,
+        }
+        # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
+        data["temperature"] = 1.0
     if "functions" in data:
         raise ValueError(f"'functions' unexpected in Anthropic API payload")
@@ -665,6 +732,8 @@ def anthropic_chat_completions_request(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
     put_inner_thoughts_in_kwargs: bool = False,
+    extended_thinking: bool = False,
+    max_reasoning_tokens: Optional[int] = None,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> ChatCompletionResponse:
     """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -678,6 +747,8 @@ def anthropic_chat_completions_request(
         data=data,
         inner_thoughts_xml_tag=inner_thoughts_xml_tag,
         put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+        extended_thinking=extended_thinking,
+        max_reasoning_tokens=max_reasoning_tokens,
     )
     log_event(name="llm_request_sent", attributes=data)
     response = anthropic_client.beta.messages.create(
@@ -717,6 +788,8 @@ def anthropic_chat_completions_request_stream(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
     put_inner_thoughts_in_kwargs: bool = False,
+    extended_thinking: bool = False,
+    max_reasoning_tokens: Optional[int] = None,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> Generator[ChatCompletionChunkResponse, None, None]:
     """Stream chat completions from Anthropic API.
@@ -728,6 +801,8 @@ def anthropic_chat_completions_request_stream(
         data=data,
         inner_thoughts_xml_tag=inner_thoughts_xml_tag,
         put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+        extended_thinking=extended_thinking,
+        max_reasoning_tokens=max_reasoning_tokens,
     )
     anthropic_override_key = ProviderManager().get_anthropic_override_key()
@@ -777,6 +852,8 @@ def anthropic_chat_completions_process_stream(
     stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
     put_inner_thoughts_in_kwargs: bool = False,
+    extended_thinking: bool = False,
+    max_reasoning_tokens: Optional[int] = None,
     create_message_id: bool = True,
     create_message_datetime: bool = True,
     betas: List[str] = ["tools-2024-04-04"],
@@ -839,7 +916,6 @@ def anthropic_chat_completions_process_stream(
         created=dummy_message.created_at,
         model=chat_completion_request.model,
         usage=UsageStatistics(
-            completion_tokens=0,
             prompt_tokens=prompt_tokens,
             total_tokens=prompt_tokens,
         ),
@@ -850,13 +926,15 @@ def anthropic_chat_completions_process_stream(
     if stream_interface:
         stream_interface.stream_start()
-    n_chunks = 0
+    completion_tokens = 0
     try:
         for chunk_idx, chat_completion_chunk in enumerate(
             anthropic_chat_completions_request_stream(
                 data=chat_completion_request,
                 inner_thoughts_xml_tag=inner_thoughts_xml_tag,
                 put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+                extended_thinking=extended_thinking,
+                max_reasoning_tokens=max_reasoning_tokens,
                 betas=betas,
             )
         ):
@@ -868,6 +946,9 @@ def anthropic_chat_completions_process_stream(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
+                        # if extended_thinking is on, then reasoning_content will be flowing as chunks
+                        # TODO handle emitting redacted reasoning content (e.g. as concat?)
+                        expect_reasoning_content=extended_thinking,
                     )
                 elif isinstance(stream_interface, AgentRefreshStreamingInterface):
                     stream_interface.process_refresh(chat_completion_response)
@@ -908,6 +989,30 @@ def anthropic_chat_completions_process_stream(
                     else:
                         accum_message.content += content_delta
+                # NOTE: for extended_thinking mode
+                if extended_thinking and message_delta.reasoning_content is not None:
+                    reasoning_content_delta = message_delta.reasoning_content
+                    if accum_message.reasoning_content is None:
+                        accum_message.reasoning_content = reasoning_content_delta
+                    else:
+                        accum_message.reasoning_content += reasoning_content_delta
+                # NOTE: extended_thinking sends a signature
+                if extended_thinking and message_delta.reasoning_content_signature is not None:
+                    reasoning_content_signature_delta = message_delta.reasoning_content_signature
+                    if accum_message.reasoning_content_signature is None:
+                        accum_message.reasoning_content_signature = reasoning_content_signature_delta
+                    else:
+                        accum_message.reasoning_content_signature += reasoning_content_signature_delta
+                # NOTE: extended_thinking also has the potential for redacted_reasoning_content
+                if extended_thinking and message_delta.redacted_reasoning_content is not None:
+                    redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
+                    if accum_message.redacted_reasoning_content is None:
+                        accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
+                    else:
+                        accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
                 # TODO(charles) make sure this works for parallel tool calling?
                 if message_delta.tool_calls is not None:
                     tool_calls_delta = message_delta.tool_calls
@@ -966,7 +1071,8 @@ def anthropic_chat_completions_process_stream(
             chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
             # increment chunk counter
-            n_chunks += 1
+            if chat_completion_chunk.output_tokens is not None:
+                completion_tokens += chat_completion_chunk.output_tokens
     except Exception as e:
         if stream_interface:
@@ -990,8 +1096,8 @@ def anthropic_chat_completions_process_stream(
     # compute token usage before returning
     # TODO try actually computing the #tokens instead of assuming the chunks is the same
-    chat_completion_response.usage.completion_tokens = n_chunks
-    chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
+    chat_completion_response.usage.completion_tokens = completion_tokens
+    chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
     assert len(chat_completion_response.choices) > 0, chat_completion_response

letta/llm_api/aws_bedrock.py CHANGED Viewed

@@ -3,17 +3,19 @@ from typing import Any, Dict, List
 from anthropic import AnthropicBedrock
-from letta.settings import model_settings
 from letta.log import get_logger
+from letta.settings import model_settings
 logger = get_logger(__name__)
 def has_valid_aws_credentials() -> bool:
     """
     Check if AWS credentials are properly configured.
     """
-    valid_aws_credentials = os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
+    valid_aws_credentials = (
+        os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
+    )
     return valid_aws_credentials

letta/llm_api/azure_openai.py CHANGED Viewed

@@ -3,7 +3,6 @@ from collections import defaultdict
 import requests
 from openai import AzureOpenAI
 from letta.llm_api.openai import prepare_openai_payload
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -406,6 +406,8 @@ def create(
                 chat_completion_request=chat_completion_request,
                 put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
                 stream_interface=stream_interface,
+                extended_thinking=llm_config.enable_reasoner,
+                max_reasoning_tokens=llm_config.max_reasoning_tokens,
             )
         else:
@@ -413,6 +415,8 @@ def create(
             response = anthropic_chat_completions_request(
                 data=chat_completion_request,
                 put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+                extended_thinking=llm_config.enable_reasoner,
+                max_reasoning_tokens=llm_config.max_reasoning_tokens,
             )
         if llm_config.put_inner_thoughts_in_kwargs:

letta/orm/organization.py CHANGED Viewed

@@ -23,6 +23,7 @@ class Organization(SqlalchemyBase):
     __pydantic_model__ = PydanticOrganization
     name: Mapped[str] = mapped_column(doc="The display name of the organization.")
+    privileged_tools: Mapped[bool] = mapped_column(doc="Whether the organization has access to privileged tools.")
     # relationships
     users: Mapped[List["User"]] = relationship("User", back_populates="organization", cascade="all, delete-orphan")

letta/orm/sqlalchemy_base.py CHANGED Viewed

@@ -361,14 +361,12 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
                 if identifier_set != results_set:
                     # Construct a detailed error message based on query conditions
                     conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
-                    logger.warning(
-                        f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}"
-                    )
+                    logger.debug(f"{cls.__name__} not found with {conditions_str}. Queried ids: {identifier_set}, Found ids: {results_set}")
             return results
         # Construct a detailed error message based on query conditions
         conditions_str = ", ".join(query_conditions) if query_conditions else "no specific conditions"
-        logger.warning(f"{cls.__name__} not found with {conditions_str}")
+        logger.debug(f"{cls.__name__} not found with {conditions_str}")
         return []
     @handle_db_timeout

letta/schemas/agent.py CHANGED Viewed

@@ -147,6 +147,14 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
     )
     context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
     embedding_chunk_size: Optional[int] = Field(DEFAULT_EMBEDDING_CHUNK_SIZE, description="The embedding chunk size used by the agent.")
+    max_tokens: Optional[int] = Field(
+        None,
+        description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
+    )
+    max_reasoning_tokens: Optional[int] = Field(
+        None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
+    )
+    enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
     from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
     template: bool = Field(False, description="Whether the agent is a template")
     project: Optional[str] = Field(

letta/schemas/letta_message.py CHANGED Viewed

@@ -88,11 +88,13 @@ class ReasoningMessage(LettaMessage):
         source (Literal["reasoner_model", "non_reasoner_model"]): Whether the reasoning
             content was generated natively by a reasoner model or derived via prompting
         reasoning (str): The internal reasoning of the agent
+        signature (Optional[str]): The model-generated signature of the reasoning step
     """
     message_type: Literal["reasoning_message"] = "reasoning_message"
     source: Literal["reasoner_model", "non_reasoner_model"] = "non_reasoner_model"
     reasoning: str
+    signature: Optional[str] = None
 class HiddenReasoningMessage(LettaMessage):
@@ -106,12 +108,12 @@ class HiddenReasoningMessage(LettaMessage):
         name (Optional[str]): The name of the sender of the message
         state (Literal["redacted", "omitted"]): Whether the reasoning
             content was redacted by the provider or simply omitted by the API
-        reasoning (str): The internal reasoning of the agent
+        hidden_reasoning (Optional[str]): The internal reasoning of the agent
     """
-    message_type: Literal["reasoning_message"] = "reasoning_message"
+    message_type: Literal["hidden_reasoning_message"] = "hidden_reasoning_message"
     state: Literal["redacted", "omitted"]
-    reasoning: str
+    hidden_reasoning: Optional[str] = None
 class ToolCall(BaseModel):
@@ -229,7 +231,7 @@ class AssistantMessage(LettaMessage):
 # NOTE: use Pydantic's discriminated unions feature: https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions
 LettaMessageUnion = Annotated[
-    Union[SystemMessage, UserMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
+    Union[SystemMessage, UserMessage, ReasoningMessage, HiddenReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
     Field(discriminator="message_type"),
 ]
@@ -240,6 +242,7 @@ def create_letta_message_union_schema():
             {"$ref": "#/components/schemas/SystemMessage"},
             {"$ref": "#/components/schemas/UserMessage"},
             {"$ref": "#/components/schemas/ReasoningMessage"},
+            {"$ref": "#/components/schemas/HiddenReasoningMessage"},
             {"$ref": "#/components/schemas/ToolCallMessage"},
             {"$ref": "#/components/schemas/ToolReturnMessage"},
             {"$ref": "#/components/schemas/AssistantMessage"},
@@ -250,6 +253,7 @@ def create_letta_message_union_schema():
                 "system_message": "#/components/schemas/SystemMessage",
                 "user_message": "#/components/schemas/UserMessage",
                 "reasoning_message": "#/components/schemas/ReasoningMessage",
+                "hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
                 "tool_call_message": "#/components/schemas/ToolCallMessage",
                 "tool_return_message": "#/components/schemas/ToolReturnMessage",
                 "assistant_message": "#/components/schemas/AssistantMessage",

letta/schemas/llm_config.py CHANGED Viewed

@@ -60,6 +60,12 @@ class LLMConfig(BaseModel):
         4096,
         description="The maximum number of tokens to generate. If not set, the model will use its default value.",
     )
+    enable_reasoner: bool = Field(
+        False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
+    )
+    max_reasoning_tokens: int = Field(
+        0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
+    )
     # FIXME hack to silence pydantic protected namespace warning
     model_config = ConfigDict(protected_namespaces=())

letta-nightly 0.6.43.dev20250324104208__py3-none-any.whl → 0.6.44.dev20250325050316__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.43.dev20250324104208py3-none-any.whl → 0.6.44.dev20250325050316py3-none-any.whl