PyPI - letta-nightly - Versions diffs - 0.11.2.dev20250810104230__py3-none-any.whl → 0.11.3.dev20250812002120__py3-none-any.whl - Mend

letta-nightly 0.11.2.dev20250810104230py3-none-any.whl → 0.11.3.dev20250812002120py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

letta/__init__.py +1 -1
letta/agents/letta_agent.py +22 -10
letta/constants.py +7 -0
letta/functions/function_sets/base.py +1 -1
letta/helpers/converters.py +19 -0
letta/helpers/json_helpers.py +1 -1
letta/helpers/tool_rule_solver.py +48 -96
letta/interfaces/openai_streaming_interface.py +9 -0
letta/llm_api/anthropic_client.py +9 -2
letta/llm_api/google_vertex_client.py +17 -4
letta/llm_api/llm_client_base.py +4 -0
letta/llm_api/openai_client.py +4 -1
letta/log.py +3 -1
letta/schemas/enums.py +4 -3
letta/schemas/llm_config.py +35 -25
letta/schemas/response_format.py +5 -6
letta/schemas/tool_rule.py +8 -1
letta/services/agent_manager.py +2 -3
letta/services/mcp/base_client.py +6 -2
letta/services/mcp_manager.py +11 -5
letta/services/tool_executor/tool_execution_sandbox.py +8 -4
letta/services/tool_manager.py +66 -42
letta/services/tool_sandbox/e2b_sandbox.py +4 -2
letta/services/tool_sandbox/modal_sandbox.py +4 -4
letta/settings.py +2 -1
{letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/METADATA +1 -1
{letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/RECORD +30 -30
{letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.2.dev20250810104230.dist-info → letta_nightly-0.11.3.dev20250812002120.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ try:
     __version__ = version("letta")
 except PackageNotFoundError:
     # Fallback for development installations
-    __version__ = "0.11.2"
+    __version__ = "0.11.3"
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]

letta/agents/letta_agent.py CHANGED Viewed

@@ -220,6 +220,7 @@ class LettaAgent(BaseAgent):
             actor=self.actor,
         )
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
         # span for request
@@ -367,6 +368,7 @@ class LettaAgent(BaseAgent):
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -429,7 +431,7 @@ class LettaAgent(BaseAgent):
                         self.logger.error("Invalid StepProgression value")
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -447,7 +449,7 @@ class LettaAgent(BaseAgent):
                 force=False,
             )
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
         # Return back usage
         for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
@@ -485,6 +487,7 @@ class LettaAgent(BaseAgent):
         request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
         for i in range(max_steps):
             # If dry run, build request data and return it without making LLM call
@@ -622,6 +625,7 @@ class LettaAgent(BaseAgent):
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -680,7 +684,7 @@ class LettaAgent(BaseAgent):
                         self.logger.error("Invalid StepProgression value")
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -698,7 +702,7 @@ class LettaAgent(BaseAgent):
                 force=False,
             )
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
         return current_in_context_messages, new_in_context_messages, stop_reason, usage
@@ -748,6 +752,7 @@ class LettaAgent(BaseAgent):
             actor=self.actor,
         )
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
         first_chunk, request_span = True, None
         if request_start_timestamp_ns:
@@ -977,6 +982,7 @@ class LettaAgent(BaseAgent):
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -1039,7 +1045,7 @@ class LettaAgent(BaseAgent):
                     # Do tracking for failure cases. Can consolidate with success conditions later.
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -1056,20 +1062,28 @@ class LettaAgent(BaseAgent):
                 force=False,
             )
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
         for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
             yield f"data: {finish_chunk}\n\n"
-    async def _log_request(self, request_start_timestamp_ns: int, request_span: "Span | None"):
+    async def _log_request(
+        self, request_start_timestamp_ns: int, request_span: "Span | None", job_update_metadata: dict | None, is_error: bool
+    ):
         if request_start_timestamp_ns:
             now_ns, now = get_utc_timestamp_ns(), get_utc_time()
             duration_ns = now_ns - request_start_timestamp_ns
             if request_span:
                 request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
             await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
-            if self.current_run_id:
+            if settings.track_agent_run and self.current_run_id:
                 await self.job_manager.record_response_duration(self.current_run_id, duration_ns, self.actor)
+                await self.job_manager.safe_update_job_status_async(
+                    job_id=self.current_run_id,
+                    new_status=JobStatus.failed if is_error else JobStatus.completed,
+                    actor=self.actor,
+                    metadata=job_update_metadata,
+                )
         if request_span:
             request_span.end()
@@ -1507,8 +1521,6 @@ class LettaAgent(BaseAgent):
                 status="error",
             )
-        print(target_tool)
         # TODO: This temp. Move this logic and code to executors
         if agent_step_span:

letta/constants.py CHANGED Viewed

@@ -208,6 +208,13 @@ LLM_MAX_TOKENS = {
     "deepseek-chat": 64000,
     "deepseek-reasoner": 64000,
     ## OpenAI models: https://platform.openai.com/docs/models/overview
+    # gpt-5
+    "gpt-5": 400000,
+    "gpt-5-2025-08-07": 400000,
+    "gpt-5-mini": 400000,
+    "gpt-5-mini-2025-08-07": 400000,
+    "gpt-5-nano": 400000,
+    "gpt-5-nano-2025-08-07": 400000,
     # reasoners
     "o1": 200000,
     # "o1-pro": 200000,  # responses API only

letta/functions/function_sets/base.py CHANGED Viewed

@@ -264,7 +264,7 @@ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_li
     Args:
         label (str): Section of the memory to be edited, identified by its label.
-        new_str (str): The text to insert.
+        new_str (str): The text to insert. Do not include line number prefixes.
         insert_line (int): The line number after which to insert the text (0 for beginning of file). Defaults to -1 (end of the file).
     Returns:

letta/helpers/converters.py CHANGED Viewed

@@ -395,6 +395,24 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
     if not data:
         return None
+    if solver_data := data.get("tool_rules_solver"):
+        # Get existing tool_rules or reconstruct from categorized fields for backwards compatibility
+        tool_rules_data = solver_data.get("tool_rules", [])
+        if not tool_rules_data:
+            for field_name in (
+                "init_tool_rules",
+                "continue_tool_rules",
+                "child_based_tool_rules",
+                "parent_tool_rules",
+                "terminal_tool_rules",
+                "required_before_exit_tool_rules",
+            ):
+                if field_data := solver_data.get(field_name):
+                    tool_rules_data.extend(field_data)
+        solver_data["tool_rules"] = deserialize_tool_rules(tool_rules_data)
     return AgentStepState(**data)
@@ -418,6 +436,7 @@ def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormat
         return JsonSchemaResponseFormat(**data)
     if data["type"] == ResponseFormatType.json_object:
         return JsonObjectResponseFormat(**data)
+    raise ValueError(f"Unknown Response Format type: {data['type']}")
 # --------------------------

letta/helpers/json_helpers.py CHANGED Viewed

@@ -15,7 +15,7 @@ def json_dumps(data, indent=2) -> str:
             try:
                 return obj.decode("utf-8")
             except Exception:
-                print(f"Error decoding bytes as utf-8: {obj}")
+                # TODO: this is to handle Gemini thought signatures, b64 decode this back to bytes when sending back to Gemini
                 return base64.b64encode(obj).decode("utf-8")
         raise TypeError(f"Type {type(obj)} not serializable")

letta/helpers/tool_rule_solver.py CHANGED Viewed

@@ -1,11 +1,9 @@
-from typing import List, Optional, Union
+from typing import TypeAlias
 from pydantic import BaseModel, Field
 from letta.schemas.block import Block
-from letta.schemas.enums import ToolRuleType
 from letta.schemas.tool_rule import (
-    BaseToolRule,
     ChildToolRule,
     ConditionalToolRule,
     ContinueToolRule,
@@ -14,88 +12,61 @@ from letta.schemas.tool_rule import (
     ParentToolRule,
     RequiredBeforeExitToolRule,
     TerminalToolRule,
+    ToolRule,
 )
+ToolName: TypeAlias = str
-class ToolRuleValidationError(Exception):
-    """Custom exception for tool rule validation errors in ToolRulesSolver."""
-    def __init__(self, message: str):
-        super().__init__(f"ToolRuleValidationError: {message}")
+COMPILED_PROMPT_DESCRIPTION = "The following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls."
 class ToolRulesSolver(BaseModel):
-    init_tool_rules: List[InitToolRule] = Field(
-        default_factory=list, description="Initial tool rules to be used at the start of tool execution."
+    tool_rules: list[ToolRule] | None = Field(default=None, description="Input list of tool rules")
+    # Categorized fields
+    init_tool_rules: list[InitToolRule] = Field(
+        default_factory=list, description="Initial tool rules to be used at the start of tool execution.", exclude=True
     )
-    continue_tool_rules: List[ContinueToolRule] = Field(
-        default_factory=list, description="Continue tool rules to be used to continue tool execution."
+    continue_tool_rules: list[ContinueToolRule] = Field(
+        default_factory=list, description="Continue tool rules to be used to continue tool execution.", exclude=True
     )
     # TODO: This should be renamed?
     # TODO: These are tools that control the set of allowed functions in the next turn
-    child_based_tool_rules: List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]] = Field(
-        default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions."
+    child_based_tool_rules: list[ChildToolRule | ConditionalToolRule | MaxCountPerStepToolRule] = Field(
+        default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions.", exclude=True
     )
-    parent_tool_rules: List[ParentToolRule] = Field(
-        default_factory=list, description="Filter tool rules to be used to filter out tools from the available set."
+    parent_tool_rules: list[ParentToolRule] = Field(
+        default_factory=list, description="Filter tool rules to be used to filter out tools from the available set.", exclude=True
     )
-    terminal_tool_rules: List[TerminalToolRule] = Field(
-        default_factory=list, description="Terminal tool rules that end the agent loop if called."
+    terminal_tool_rules: list[TerminalToolRule] = Field(
+        default_factory=list, description="Terminal tool rules that end the agent loop if called.", exclude=True
     )
-    required_before_exit_tool_rules: List[RequiredBeforeExitToolRule] = Field(
-        default_factory=list, description="Tool rules that must be called before the agent can exit."
+    required_before_exit_tool_rules: list[RequiredBeforeExitToolRule] = Field(
+        default_factory=list, description="Tool rules that must be called before the agent can exit.", exclude=True
     )
-    tool_call_history: List[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
-    def __init__(
-        self,
-        tool_rules: Optional[List[BaseToolRule]] = None,
-        init_tool_rules: Optional[List[InitToolRule]] = None,
-        continue_tool_rules: Optional[List[ContinueToolRule]] = None,
-        child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
-        parent_tool_rules: Optional[List[ParentToolRule]] = None,
-        terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
-        required_before_exit_tool_rules: Optional[List[RequiredBeforeExitToolRule]] = None,
-        tool_call_history: Optional[List[str]] = None,
-        **kwargs,
-    ):
-        super().__init__(
-            init_tool_rules=init_tool_rules or [],
-            continue_tool_rules=continue_tool_rules or [],
-            child_based_tool_rules=child_based_tool_rules or [],
-            parent_tool_rules=parent_tool_rules or [],
-            terminal_tool_rules=terminal_tool_rules or [],
-            required_before_exit_tool_rules=required_before_exit_tool_rules or [],
-            tool_call_history=tool_call_history or [],
-            **kwargs,
-        )
+    tool_call_history: list[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
-        if tool_rules:
-            for rule in tool_rules:
-                if rule.type == ToolRuleType.run_first:
-                    assert isinstance(rule, InitToolRule)
+    def __init__(self, tool_rules: list[ToolRule] | None = None, **kwargs):
+        super().__init__(tool_rules=tool_rules, **kwargs)
+    def model_post_init(self, __context):
+        if self.tool_rules:
+            for rule in self.tool_rules:
+                if isinstance(rule, InitToolRule):
                     self.init_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.constrain_child_tools:
-                    assert isinstance(rule, ChildToolRule)
+                elif isinstance(rule, ChildToolRule):
                     self.child_based_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.conditional:
-                    assert isinstance(rule, ConditionalToolRule)
-                    self.validate_conditional_tool(rule)
+                elif isinstance(rule, ConditionalToolRule):
                     self.child_based_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.exit_loop:
-                    assert isinstance(rule, TerminalToolRule)
+                elif isinstance(rule, TerminalToolRule):
                     self.terminal_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.continue_loop:
-                    assert isinstance(rule, ContinueToolRule)
+                elif isinstance(rule, ContinueToolRule):
                     self.continue_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.max_count_per_step:
-                    assert isinstance(rule, MaxCountPerStepToolRule)
+                elif isinstance(rule, MaxCountPerStepToolRule):
                     self.child_based_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.parent_last_tool:
-                    assert isinstance(rule, ParentToolRule)
+                elif isinstance(rule, ParentToolRule):
                     self.parent_tool_rules.append(rule)
-                elif rule.type == ToolRuleType.required_before_exit:
-                    assert isinstance(rule, RequiredBeforeExitToolRule)
+                elif isinstance(rule, RequiredBeforeExitToolRule):
                     self.required_before_exit_tool_rules.append(rule)
     def register_tool_call(self, tool_name: str):
@@ -107,12 +78,12 @@ class ToolRulesSolver(BaseModel):
         self.tool_call_history.clear()
     def get_allowed_tool_names(
-        self, available_tools: set[str], error_on_empty: bool = True, last_function_response: str | None = None
-    ) -> List[str]:
+        self, available_tools: set[ToolName], error_on_empty: bool = True, last_function_response: str | None = None
+    ) -> list[ToolName]:
         """Get a list of tool names allowed based on the last tool called.
         The logic is as follows:
-            1. if there are no previous tool calls and we have InitToolRules, those are the only options for the first tool call
+            1. if there are no previous tool calls, and we have InitToolRules, those are the only options for the first tool call
             2. else we take the intersection of the Parent/Child/Conditional/MaxSteps as the options
             3. Continue/Terminal/RequiredBeforeExit rules are applied in the agent loop flow, not to restrict tools
         """
@@ -134,23 +105,23 @@ class ToolRulesSolver(BaseModel):
             return list(final_allowed_tools)
-    def is_terminal_tool(self, tool_name: str) -> bool:
+    def is_terminal_tool(self, tool_name: ToolName) -> bool:
         """Check if the tool is defined as a terminal tool in the terminal tool rules or required-before-exit tool rules."""
         return any(rule.tool_name == tool_name for rule in self.terminal_tool_rules)
-    def has_children_tools(self, tool_name):
+    def has_children_tools(self, tool_name: ToolName):
         """Check if the tool has children tools"""
         return any(rule.tool_name == tool_name for rule in self.child_based_tool_rules)
-    def is_continue_tool(self, tool_name):
+    def is_continue_tool(self, tool_name: ToolName):
         """Check if the tool is defined as a continue tool in the tool rules."""
         return any(rule.tool_name == tool_name for rule in self.continue_tool_rules)
-    def has_required_tools_been_called(self, available_tools: set[str]) -> bool:
+    def has_required_tools_been_called(self, available_tools: set[ToolName]) -> bool:
         """Check if all required-before-exit tools have been called."""
         return len(self.get_uncalled_required_tools(available_tools=available_tools)) == 0
-    def get_uncalled_required_tools(self, available_tools: set[str]) -> List[str]:
+    def get_uncalled_required_tools(self, available_tools: set[ToolName]) -> list[str]:
         """Get the list of required-before-exit tools that have not been called yet."""
         if not self.required_before_exit_tool_rules:
             return []  # No required tools means no uncalled tools
@@ -161,16 +132,12 @@ class ToolRulesSolver(BaseModel):
         # Get required tools that are uncalled AND available
         return list((required_tool_names & available_tools) - called_tool_names)
-    def get_ending_tool_names(self) -> List[str]:
-        """Get the names of tools that are required before exit."""
-        return [rule.tool_name for rule in self.required_before_exit_tool_rules]
-    def compile_tool_rule_prompts(self) -> Optional[Block]:
+    def compile_tool_rule_prompts(self) -> Block | None:
         """
         Compile prompt templates from all tool rules into an ephemeral Block.
         Returns:
-            Optional[str]: Compiled prompt string with tool rule constraints, or None if no templates exist.
+            Block | None: Compiled prompt block with tool rule constraints, or None if no templates exist.
         """
         compiled_prompts = []
@@ -191,20 +158,20 @@ class ToolRulesSolver(BaseModel):
             return Block(
                 label="tool_usage_rules",
                 value="\n".join(compiled_prompts),
-                description="The following constraints define rules for tool usage and guide desired behavior. These rules must be followed to ensure proper tool execution and workflow. A single response may contain multiple tool calls.",
+                description=COMPILED_PROMPT_DESCRIPTION,
             )
         return None
-    def guess_rule_violation(self, tool_name: str) -> List[str]:
+    def guess_rule_violation(self, tool_name: ToolName) -> list[str]:
         """
         Check if the given tool name or the previous tool in history matches any tool rule,
-        and return rendered prompt templates for matching rules.
+        and return rendered prompt templates for matching rule violations.
         Args:
             tool_name: The name of the tool to check for rule violations
         Returns:
-            List of rendered prompt templates from matching tool rules
+            list of rendered prompt templates from matching tool rules
         """
         violated_rules = []
@@ -228,18 +195,3 @@ class ToolRulesSolver(BaseModel):
                     violated_rules.append(rendered_prompt)
         return violated_rules
-    @staticmethod
-    def validate_conditional_tool(rule: ConditionalToolRule):
-        """
-        Validate a conditional tool rule
-        Args:
-            rule (ConditionalToolRule): The conditional tool rule to validate
-        Raises:
-            ToolRuleValidationError: If the rule is invalid
-        """
-        if len(rule.child_output_mapping) == 0:
-            raise ToolRuleValidationError("Conditional tool rule must have at least one child tool.")
-        return True

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -345,6 +345,15 @@ class OpenAIStreamingInterface:
                                         prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
                                         # TODO: Assumes consistent state and that prev_content is subset of new_content
                                         diff = new_content.replace(prev_content, "", 1)
+                                        # quick patch to mitigate double message streaming error
+                                        # TODO: root cause this issue and remove patch
+                                        if diff != "" and "\\n" not in new_content:
+                                            converted_new_content = new_content.replace("\n", "\\n")
+                                            converted_content_diff = converted_new_content.replace(prev_content, "", 1)
+                                            if converted_content_diff == "":
+                                                diff = converted_content_diff
                                         self.current_json_parse_result = parsed_args
                                         if prev_message_type and prev_message_type != "assistant_message":
                                             message_index += 1

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -182,7 +182,7 @@ class AnthropicClient(LLMClientBase):
         }
         # Extended Thinking
-        if llm_config.enable_reasoner:
+        if self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
             data["thinking"] = {
                 "type": "enabled",
                 "budget_tokens": llm_config.max_reasoning_tokens,
@@ -200,7 +200,7 @@ class AnthropicClient(LLMClientBase):
             # Special case for summarization path
             tools_for_request = None
             tool_choice = None
-        elif llm_config.enable_reasoner:
+        elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
             # NOTE: reasoning models currently do not allow for `any`
             tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
             tools_for_request = [OpenAITool(function=f) for f in tools]
@@ -296,6 +296,13 @@ class AnthropicClient(LLMClientBase):
             token_count -= 8
         return token_count
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return (
+            llm_config.model.startswith("claude-3-7-sonnet")
+            or llm_config.model.startswith("claude-sonnet-4")
+            or llm_config.model.startswith("claude-opus-4")
+        )
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
         if isinstance(e, anthropic.APITimeoutError):

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -254,8 +254,11 @@ class GoogleVertexClient(LLMClientBase):
         # If enable_reasoner is False, set thinking_budget to 0
         # Otherwise, use the value from max_reasoning_tokens
         if "flash" in llm_config.model:
+            # Gemini flash models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
             thinking_config = ThinkingConfig(
-                thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
+                thinking_budget=(
+                    llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
+                ),
             )
             request_data["config"]["thinking_config"] = thinking_config.model_dump()
@@ -292,7 +295,6 @@ class GoogleVertexClient(LLMClientBase):
         }
         }
         """
-        # print(response_data)
         response = GenerateContentResponse(**response_data)
         try:
@@ -301,11 +303,11 @@ class GoogleVertexClient(LLMClientBase):
             for candidate in response.candidates:
                 content = candidate.content
-                if content.role is None or content.parts is None:
+                if content is None or content.role is None or content.parts is None:
                     # This means the response is malformed like MALFORMED_FUNCTION_CALL
                     # NOTE: must be a ValueError to trigger a retry
                     if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
-                        raise ValueError(f"Error in response data from LLM: {candidate.finish_message[:350]}...")
+                        raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}...")
                     else:
                         raise ValueError(f"Error in response data from LLM: {response_data}")
@@ -494,6 +496,17 @@ class GoogleVertexClient(LLMClientBase):
             "required": ["name", "args"],
         }
+    def get_thinking_budget(self, model: str) -> bool:
+        if model_settings.gemini_force_minimum_thinking_budget:
+            if all(substring in model for substring in ["2.5", "flash", "lite"]):
+                return 512
+            elif all(substring in model for substring in ["2.5", "flash"]):
+                return 1
+        return 0
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
         # Fallback to base implementation

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -174,6 +174,10 @@ class LLMClientBase:
         """
         raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
+    @abstractmethod
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        raise NotImplementedError
     @abstractmethod
     def handle_llm_error(self, e: Exception) -> Exception:
         """

letta/llm_api/openai_client.py CHANGED Viewed

@@ -276,6 +276,9 @@ class OpenAIClient(LLMClientBase):
         response: ChatCompletion = await client.chat.completions.create(**request_data)
         return response.model_dump()
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        return is_openai_reasoning_model(llm_config.model)
     @trace_method
     def convert_response_to_chat_completion(
         self,
@@ -298,7 +301,7 @@ class OpenAIClient(LLMClientBase):
             )
         # If we used a reasoning model, create a content part for the ommitted reasoning
-        if is_openai_reasoning_model(llm_config.model):
+        if self.is_reasoning_model(llm_config):
             chat_completion_response.choices[0].message.omitted_reasoning_content = True
         return chat_completion_response

letta/log.py CHANGED Viewed

@@ -61,13 +61,15 @@ DEVELOPMENT_LOGGING = {
     },
 }
+# Configure logging once at module initialization to avoid performance overhead
+dictConfig(DEVELOPMENT_LOGGING)
 def get_logger(name: Optional[str] = None) -> "logging.Logger":
     """returns the project logger, scoped to a child name if provided
     Args:
         name: will define a child logger
     """
-    dictConfig(DEVELOPMENT_LOGGING)
     parent_logger = logging.getLogger("Letta")
     if name:
         return parent_logger.getChild(name)

letta/schemas/enums.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from enum import Enum
+from enum import Enum, StrEnum
 class ProviderType(str, Enum):
@@ -42,7 +42,7 @@ class OptionState(str, Enum):
     DEFAULT = "default"
-class JobStatus(str, Enum):
+class JobStatus(StrEnum):
     """
     Status of the job.
     """
@@ -63,7 +63,8 @@ class JobStatus(str, Enum):
 class AgentStepStatus(str, Enum):
     """
-    Status of the job.
+    Status of agent step.
+    TODO (cliandy): consolidate this with job status
     """
     paused = "paused"

letta-nightly 0.11.2.dev20250810104230__py3-none-any.whl → 0.11.3.dev20250812002120__py3-none-any.whl

letta-nightly 0.11.2.dev20250810104230py3-none-any.whl → 0.11.3.dev20250812002120py3-none-any.whl