PyPI - letta-nightly - Versions diffs - 0.9.1.dev20250731104458__py3-none-any.whl → 0.10.0.dev20250801010504__py3-none-any.whl - Mend

letta-nightly 0.9.1.dev20250731104458py3-none-any.whl → 0.10.0.dev20250801010504py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

letta/__init__.py +2 -1
letta/agent.py +1 -1
letta/agents/base_agent.py +2 -2
letta/agents/letta_agent.py +22 -8
letta/agents/letta_agent_batch.py +2 -2
letta/agents/voice_agent.py +2 -2
letta/client/client.py +0 -11
letta/errors.py +11 -0
letta/functions/function_sets/builtin.py +3 -7
letta/functions/mcp_client/types.py +107 -1
letta/helpers/reasoning_helper.py +48 -0
letta/helpers/tool_execution_helper.py +2 -65
letta/interfaces/openai_streaming_interface.py +38 -2
letta/llm_api/anthropic_client.py +1 -5
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/llm_client.py +1 -1
letta/llm_api/openai_client.py +2 -0
letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +3 -2
letta/orm/agent.py +5 -0
letta/orm/enums.py +0 -1
letta/orm/file.py +0 -1
letta/orm/files_agents.py +9 -9
letta/orm/sandbox_config.py +1 -1
letta/orm/sqlite_functions.py +15 -13
letta/prompts/system/memgpt_generate_tool.txt +139 -0
letta/schemas/agent.py +15 -1
letta/schemas/enums.py +6 -0
letta/schemas/file.py +3 -3
letta/schemas/letta_ping.py +28 -0
letta/schemas/letta_request.py +9 -0
letta/schemas/letta_stop_reason.py +25 -0
letta/schemas/llm_config.py +1 -0
letta/schemas/mcp.py +16 -3
letta/schemas/memory.py +5 -0
letta/schemas/providers/lmstudio.py +7 -0
letta/schemas/providers/ollama.py +11 -8
letta/schemas/sandbox_config.py +17 -7
letta/server/rest_api/app.py +2 -0
letta/server/rest_api/routers/v1/agents.py +93 -30
letta/server/rest_api/routers/v1/blocks.py +52 -0
letta/server/rest_api/routers/v1/sandbox_configs.py +2 -1
letta/server/rest_api/routers/v1/tools.py +43 -101
letta/server/rest_api/streaming_response.py +121 -9
letta/server/server.py +6 -10
letta/services/agent_manager.py +41 -4
letta/services/block_manager.py +63 -1
letta/services/file_processor/chunker/line_chunker.py +20 -19
letta/services/file_processor/file_processor.py +0 -2
letta/services/file_processor/file_types.py +1 -2
letta/services/files_agents_manager.py +46 -6
letta/services/helpers/agent_manager_helper.py +185 -13
letta/services/job_manager.py +4 -4
letta/services/mcp/oauth_utils.py +6 -150
letta/services/mcp_manager.py +120 -2
letta/services/sandbox_config_manager.py +3 -5
letta/services/tool_executor/builtin_tool_executor.py +13 -18
letta/services/tool_executor/files_tool_executor.py +31 -27
letta/services/tool_executor/mcp_tool_executor.py +10 -1
letta/services/tool_executor/{tool_executor.py → sandbox_tool_executor.py} +14 -2
letta/services/tool_executor/tool_execution_manager.py +1 -1
letta/services/tool_executor/tool_execution_sandbox.py +2 -1
letta/services/tool_manager.py +59 -21
letta/services/tool_sandbox/base.py +18 -2
letta/services/tool_sandbox/e2b_sandbox.py +5 -35
letta/services/tool_sandbox/local_sandbox.py +5 -22
letta/services/tool_sandbox/modal_sandbox.py +205 -0
letta/settings.py +27 -8
letta/system.py +1 -4
letta/templates/template_helper.py +5 -0
letta/utils.py +14 -2
{letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/METADATA +7 -3
{letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/RECORD +75 -72
letta/orm/__all__.py +0 -15
{letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/LICENSE +0 -0
{letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/WHEEL +0 -0
{letta_nightly-0.9.1.dev20250731104458.dist-info → letta_nightly-0.10.0.dev20250801010504.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ try:
     __version__ = version("letta")
 except PackageNotFoundError:
     # Fallback for development installations
-    __version__ = "0.9.1"
+    __version__ = "0.10.0"
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]
@@ -24,6 +24,7 @@ from letta.schemas.enums import JobStatus
 from letta.schemas.file import FileMetadata
 from letta.schemas.job import Job
 from letta.schemas.letta_message import LettaMessage
+from letta.schemas.letta_ping import LettaPing
 from letta.schemas.letta_stop_reason import LettaStopReason
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ArchivalMemorySummary, BasicBlockMemory, ChatMemory, Memory, RecallMemorySummary

letta/agent.py CHANGED Viewed

@@ -1298,7 +1298,7 @@ class Agent(BaseAgent):
         )
     async def get_context_window_async(self) -> ContextWindowOverview:
-        if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION":
+        if os.getenv("LETTA_ENVIRONMENT") == "PRODUCTION" and os.getenv("ANTHROPIC_API_KEY"):
             return await self.get_context_window_from_anthropic_async()
         return await self.get_context_window_from_tiktoken_async()

letta/agents/base_agent.py CHANGED Viewed

@@ -17,7 +17,7 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
 from letta.services.agent_manager import AgentManager
-from letta.services.helpers.agent_manager_helper import compile_system_message
+from letta.services.helpers.agent_manager_helper import compile_system_message_async
 from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
 from letta.utils import united_diff
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
             if num_archival_memories is None:
                 num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
-            new_system_message_str = compile_system_message(
+            new_system_message_str = await compile_system_message_async(
                 system_prompt=agent_state.system,
                 in_context_memory=agent_state.memory,
                 in_context_memory_last_edit=memory_edit_timestamp,

letta/agents/letta_agent.py CHANGED Viewed

@@ -22,6 +22,7 @@ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
 from letta.errors import ContextWindowExceededError
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import AsyncTimer, get_utc_time, get_utc_timestamp_ns, ns_to_ms
+from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
 from letta.helpers.tool_execution_helper import enable_strict_mode
 from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
 from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -756,6 +757,9 @@ class LettaAgent(BaseAgent):
                     interface = OpenAIStreamingInterface(
                         use_assistant_message=use_assistant_message,
                         put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
+                        is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
+                        messages=current_in_context_messages + new_in_context_messages,
+                        tools=request_data.get("tools", []),
                     )
                 else:
                     raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
@@ -781,13 +785,20 @@ class LettaAgent(BaseAgent):
                 stream_end_time_ns = get_utc_timestamp_ns()
-                # update usage
+                # Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
+                if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
+                    logger.warning(
+                        f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
+                    )
+                    interface.input_tokens = interface.fallback_input_tokens
+                    interface.output_tokens = interface.fallback_output_tokens
                 usage.step_count += 1
                 usage.completion_tokens += interface.output_tokens
                 usage.prompt_tokens += interface.input_tokens
                 usage.total_tokens += interface.input_tokens + interface.output_tokens
                 MetricRegistry().message_output_tokens.record(
-                    interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
+                    usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
                 )
                 # log LLM request time
@@ -814,9 +825,9 @@ class LettaAgent(BaseAgent):
                     agent_state,
                     tool_rules_solver,
                     UsageStatistics(
-                        completion_tokens=interface.output_tokens,
-                        prompt_tokens=interface.input_tokens,
-                        total_tokens=interface.input_tokens + interface.output_tokens,
+                        completion_tokens=usage.completion_tokens,
+                        prompt_tokens=usage.prompt_tokens,
+                        total_tokens=usage.total_tokens,
                     ),
                     reasoning_content=reasoning_content,
                     pre_computed_assistant_message_id=interface.letta_message_id,
@@ -861,8 +872,8 @@ class LettaAgent(BaseAgent):
                             # "stop_sequence": None,
                             "type": "message",
                             "usage": {
-                                "input_tokens": interface.input_tokens,
-                                "output_tokens": interface.output_tokens,
+                                "input_tokens": usage.prompt_tokens,
+                                "output_tokens": usage.completion_tokens,
                             },
                         },
                         step_id=step_id,
@@ -1130,7 +1141,7 @@ class LettaAgent(BaseAgent):
         return new_in_context_messages
     @trace_method
-    async def summarize_conversation_history(self) -> AgentState:
+    async def summarize_conversation_history(self) -> None:
         """Called when the developer explicitly triggers compaction via the API"""
         agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
         message_ids = agent_state.message_ids
@@ -1169,6 +1180,9 @@ class LettaAgent(BaseAgent):
             tool_rules_solver=tool_rules_solver,
         )
+        # scrub inner thoughts from messages if reasoning is completely disabled
+        in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, agent_state.llm_config)
         tools = [
             t
             for t in agent_state.tools

letta/agents/letta_agent_batch.py CHANGED Viewed

@@ -19,7 +19,7 @@ from letta.log import get_logger
 from letta.orm.enums import ToolType
 from letta.otel.tracing import log_event, trace_method
 from letta.schemas.agent import AgentState
-from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType
+from letta.schemas.enums import AgentStepStatus, JobStatus, MessageStreamStatus, ProviderType, SandboxType
 from letta.schemas.job import JobUpdate
 from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
 from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -28,7 +28,7 @@ from letta.schemas.letta_response import LettaBatchResponse, LettaResponse
 from letta.schemas.llm_batch_job import AgentStepState, LLMBatchItem
 from letta.schemas.message import Message, MessageCreate
 from letta.schemas.openai.chat_completion_response import ToolCall as OpenAIToolCall
-from letta.schemas.sandbox_config import SandboxConfig, SandboxType
+from letta.schemas.sandbox_config import SandboxConfig
 from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.server.rest_api.utils import create_heartbeat_system_message, create_letta_messages_from_llm_response

letta/agents/voice_agent.py CHANGED Viewed

@@ -36,7 +36,7 @@ from letta.server.rest_api.utils import (
 )
 from letta.services.agent_manager import AgentManager
 from letta.services.block_manager import BlockManager
-from letta.services.helpers.agent_manager_helper import compile_system_message
+from letta.services.helpers.agent_manager_helper import compile_system_message_async
 from letta.services.job_manager import JobManager
 from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
@@ -145,7 +145,7 @@ class VoiceAgent(BaseAgent):
         in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
         memory_edit_timestamp = get_utc_time()
-        in_context_messages[0].content[0].text = compile_system_message(
+        in_context_messages[0].content[0].text = await compile_system_message_async(
             system_prompt=agent_state.system,
             in_context_memory=agent_state.memory,
             in_context_memory_last_edit=memory_edit_timestamp,

letta/client/client.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import sys
 import time
 from typing import Callable, Dict, List, Optional, Union
@@ -33,16 +32,6 @@ from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
 from letta.schemas.tool_rule import BaseToolRule
 from letta.utils import get_human_text, get_persona_text
-# Print deprecation notice in yellow when module is imported
-print(
-    "\n\n\033[93m"
-    + "DEPRECATION WARNING: This legacy Python client has been deprecated and will be removed in a future release.\n"
-    + "Please migrate to the new official python SDK by running: pip install letta-client\n"
-    + "For further documentation, visit: https://docs.letta.com/api-reference/overview#python-sdk"
-    + "\033[0m\n\n",
-    file=sys.stderr,
-)
 class AbstractClient(object):
     def __init__(

letta/errors.py CHANGED Viewed

@@ -49,6 +49,17 @@ class LettaToolCreateError(LettaError):
         super().__init__(message=message or self.default_error_message)
+class LettaToolNameConflictError(LettaError):
+    """Error raised when a tool name already exists."""
+    def __init__(self, tool_name: str):
+        super().__init__(
+            message=f"Tool with name '{tool_name}' already exists in your organization",
+            code=ErrorCode.INVALID_ARGUMENT,
+            details={"tool_name": tool_name},
+        )
 class LettaConfigurationError(LettaError):
     """Error raised when there are configuration-related issues."""

letta/functions/function_sets/builtin.py CHANGED Viewed

@@ -17,11 +17,7 @@ def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) ->
     raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
-async def web_search(
-    tasks: List[SearchTask],
-    limit: int = 3,
-    return_raw: bool = False,
-) -> str:
+async def web_search(tasks: List[SearchTask], limit: int = 1, return_raw: bool = True) -> str:
     """
     Search the web with a list of query/question pairs and extract passages that answer the corresponding questions.
@@ -39,9 +35,9 @@ async def web_search(
     Args:
         tasks (List[SearchTask]): A list of search tasks, each containing a `query` and a corresponding `question`.
-        limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 3.
+        limit (int, optional): Maximum number of URLs to fetch and analyse per task (must be > 0). Defaults to 1.
         return_raw (bool, optional): If set to True, returns the raw content of the web pages.
-                                     This should be False unless otherwise specified by the user. Defaults to False.
+                                     This should be True unless otherwise specified by the user. Defaults to True.
     Returns:
         str: A JSON-encoded string containing a list of search results.

letta/functions/mcp_client/types.py CHANGED Viewed

@@ -1,5 +1,7 @@
+import re
+from abc import abstractmethod
 from enum import Enum
-from typing import List, Optional
+from typing import Dict, List, Optional
 from mcp import Tool
 from pydantic import BaseModel, Field
@@ -7,6 +9,9 @@ from pydantic import BaseModel, Field
 # MCP Authentication Constants
 MCP_AUTH_HEADER_AUTHORIZATION = "Authorization"
 MCP_AUTH_TOKEN_BEARER_PREFIX = "Bearer"
+TEMPLATED_VARIABLE_REGEX = (
+    r"\{\{\s*([A-Z_][A-Z0-9_]*)\s*(?:\|\s*([^}]+?)\s*)?\}\}"  # Allows for optional whitespace around the variable name and default value
+)
 class MCPTool(Tool):
@@ -23,6 +28,91 @@ class BaseServerConfig(BaseModel):
     server_name: str = Field(..., description="The name of the server")
     type: MCPServerType
+    def is_templated_tool_variable(self, value: str) -> bool:
+        """
+        Check if string contains templated variables.
+        Args:
+            value: The value string to check
+        Returns:
+            True if the value contains templated variables in the format {{ VARIABLE_NAME }} or {{ VARIABLE_NAME | default }}, False otherwise
+        """
+        return bool(re.search(TEMPLATED_VARIABLE_REGEX, value))
+    def get_tool_variable(self, value: str, environment_variables: Dict[str, str]) -> Optional[str]:
+        """
+        Replace templated variables in a value string with their values from environment variables.
+        Supports fallback/default values with pipe syntax.
+        Args:
+            value: The value string that may contain templated variables (e.g., "Bearer {{ API_KEY | default_token }}")
+            environment_variables: Dictionary of environment variables
+        Returns:
+            The string with templated variables replaced, or None if no templated variables found
+        """
+        # If no templated variables found or default value provided, return the original value
+        if not self.is_templated_tool_variable(value):
+            return value
+        def replace_template(match):
+            variable_name = match.group(1)
+            default_value = match.group(2) if match.group(2) else None
+            # Try to get the value from environment variables
+            env_value = environment_variables.get(variable_name) if environment_variables else None
+            # Return environment value if found, otherwise return default value, otherwise return empty string
+            if env_value is not None:
+                return env_value
+            elif default_value is not None:
+                return default_value
+            else:
+                # If no environment value and no default, return the original template
+                return match.group(0)
+        # Replace all templated variables in the token
+        result = re.sub(TEMPLATED_VARIABLE_REGEX, replace_template, value)
+        # If the result still contains unreplaced templates, just return original value
+        if re.search(TEMPLATED_VARIABLE_REGEX, result):
+            logger.warning(f"Unable to resolve templated variable in value: {value}")
+            return value
+        return result
+    def resolve_custom_headers(
+        self, custom_headers: Optional[Dict[str, str]], environment_variables: Optional[Dict[str, str]] = None
+    ) -> Optional[Dict[str, str]]:
+        """
+        Resolve templated variables in custom headers dictionary.
+        Args:
+            custom_headers: Dictionary of custom headers that may contain templated variables
+            environment_variables: Dictionary of environment variables for resolving templates
+        Returns:
+            Dictionary with resolved header values, or None if custom_headers is None
+        """
+        if custom_headers is None:
+            return None
+        resolved_headers = {}
+        for key, value in custom_headers.items():
+            # Resolve templated variables in each header value
+            if self.is_templated_tool_variable(value):
+                resolved_headers[key] = self.get_tool_variable(value, environment_variables)
+            else:
+                resolved_headers[key] = value
+        return resolved_headers
+    @abstractmethod
+    def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
+        raise NotImplementedError
 class SSEServerConfig(BaseServerConfig):
     """
@@ -47,6 +137,12 @@ class SSEServerConfig(BaseServerConfig):
             return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
         return self.auth_token
+    def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
+        if self.auth_token and super().is_templated_tool_variable(self.auth_token):
+            self.auth_token = super().get_tool_variable(self.auth_token, environment_variables)
+        self.custom_headers = super().resolve_custom_headers(self.custom_headers, environment_variables)
     def to_dict(self) -> dict:
         values = {
             "transport": "sse",
@@ -72,6 +168,10 @@ class StdioServerConfig(BaseServerConfig):
     args: List[str] = Field(..., description="The arguments to pass to the command")
     env: Optional[dict[str, str]] = Field(None, description="Environment variables to set")
+    # TODO: @jnjpng templated auth handling for stdio
+    def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
+        pass
     def to_dict(self) -> dict:
         values = {
             "transport": "stdio",
@@ -106,6 +206,12 @@ class StreamableHTTPServerConfig(BaseServerConfig):
             return self.auth_token[len(f"{MCP_AUTH_TOKEN_BEARER_PREFIX} ") :]
         return self.auth_token
+    def resolve_environment_variables(self, environment_variables: Optional[Dict[str, str]] = None) -> None:
+        if self.auth_token and super().is_templated_tool_variable(self.auth_token):
+            self.auth_token = super().get_tool_variable(self.auth_token, environment_variables)
+        self.custom_headers = super().resolve_custom_headers(self.custom_headers, environment_variables)
     def model_post_init(self, __context) -> None:
         """Validate the server URL format."""
         # Basic validation for streamable HTTP URLs

letta/helpers/reasoning_helper.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import List
+from letta.schemas.enums import MessageRole
+from letta.schemas.letta_message_content import TextContent
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message
+def is_reasoning_completely_disabled(llm_config: LLMConfig) -> bool:
+    """
+    Check if reasoning is completely disabled by verifying all three conditions:
+    - put_inner_thoughts_in_kwargs is False
+    - enable_reasoner is False
+    - max_reasoning_tokens is 0
+    Args:
+        llm_config: The LLM configuration to check
+    Returns:
+        True if reasoning is completely disabled, False otherwise
+    """
+    return llm_config.put_inner_thoughts_in_kwargs is False and llm_config.enable_reasoner is False and llm_config.max_reasoning_tokens == 0
+def scrub_inner_thoughts_from_messages(messages: List[Message], llm_config: LLMConfig) -> List[Message]:
+    """
+    Remove inner thoughts (reasoning text) from assistant messages when reasoning is completely disabled.
+    This makes the LLM think reasoning was never enabled by presenting clean message history.
+    Args:
+        messages: List of messages to potentially scrub
+        llm_config: The LLM configuration to check
+    Returns:
+        The message list with inner thoughts removed if reasoning is disabled, otherwise unchanged
+    """
+    # early return if reasoning is not completely disabled
+    if not is_reasoning_completely_disabled(llm_config):
+        return messages
+    # process messages to remove inner thoughts from assistant messages
+    for message in messages:
+        if message.role == MessageRole.assistant and message.content and message.tool_calls:
+            # remove text content from assistant messages that also have tool calls
+            # keep only non-text content (if any)
+            message.content = [content for content in message.content if not isinstance(content, TextContent)]
+    return messages

letta/helpers/tool_execution_helper.py CHANGED Viewed

@@ -1,17 +1,7 @@
 from collections import OrderedDict
 from typing import Any, Dict, Optional
-from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY, PRE_EXECUTION_MESSAGE_ARG
-from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source
-from letta.functions.composio_helpers import execute_composio_action, generate_composio_action_from_func_name
-from letta.helpers.composio_helpers import get_composio_api_key
-from letta.orm.enums import ToolType
-from letta.schemas.agent import AgentState
-from letta.schemas.sandbox_config import SandboxRunResult
-from letta.schemas.tool import Tool
-from letta.schemas.user import User
-from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSandbox
-from letta.utils import get_friendly_error_msg
+from letta.constants import PRE_EXECUTION_MESSAGE_ARG
 def enable_strict_mode(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
@@ -44,6 +34,7 @@ def add_pre_execution_message(tool_schema: Dict[str, Any], description: Optional
     Args:
         tool_schema (Dict[str, Any]): The original tool schema.
+        description (Optional[str]): Description of the tool schema. Defaults to None.
     Returns:
         Dict[str, Any]: A new tool schema with the `pre_execution_message` field added at the beginning.
@@ -117,57 +108,3 @@ def remove_request_heartbeat(tool_schema: Dict[str, Any]) -> Dict[str, Any]:
         schema["parameters"] = {**parameters, "properties": properties, "required": required}
     return schema
-# TODO: Deprecate the `execute_external_tool` function on the agent body
-def execute_external_tool(
-    agent_state: AgentState,
-    function_name: str,
-    function_args: dict,
-    target_letta_tool: Tool,
-    actor: User,
-    allow_agent_state_modifications: bool = False,
-) -> tuple[Any, Optional[SandboxRunResult]]:
-    # TODO: need to have an AgentState object that actually has full access to the block data
-    # this is because the sandbox tools need to be able to access block.value to edit this data
-    try:
-        if target_letta_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
-            action_name = generate_composio_action_from_func_name(target_letta_tool.name)
-            # Get entity ID from the agent_state
-            entity_id = None
-            for env_var in agent_state.tool_exec_environment_variables:
-                if env_var.key == COMPOSIO_ENTITY_ENV_VAR_KEY:
-                    entity_id = env_var.value
-            # Get composio_api_key
-            composio_api_key = get_composio_api_key(actor=actor)
-            function_response = execute_composio_action(
-                action_name=action_name, args=function_args, api_key=composio_api_key, entity_id=entity_id
-            )
-            return function_response, None
-        elif target_letta_tool.tool_type == ToolType.CUSTOM:
-            # Parse the source code to extract function annotations
-            annotations = get_function_annotations_from_source(target_letta_tool.source_code, function_name)
-            # Coerce the function arguments to the correct types based on the annotations
-            function_args = coerce_dict_args_by_annotations(function_args, annotations)
-            # execute tool in a sandbox
-            # TODO: allow agent_state to specify which sandbox to execute tools in
-            # TODO: This is only temporary, can remove after we publish a pip package with this object
-            if allow_agent_state_modifications:
-                agent_state_copy = agent_state.__deepcopy__()
-                agent_state_copy.tools = []
-                agent_state_copy.tool_rules = []
-            else:
-                agent_state_copy = None
-            tool_execution_result = ToolExecutionSandbox(function_name, function_args, actor).run(agent_state=agent_state_copy)
-            function_response, updated_agent_state = tool_execution_result.func_return, tool_execution_result.agent_state
-            # TODO: Bring this back
-            # if allow_agent_state_modifications and updated_agent_state is not None:
-            #     self.update_memory_if_changed(updated_agent_state.memory)
-            return function_response, tool_execution_result
-    except Exception as e:
-        # Need to catch error here, or else trunction wont happen
-        # TODO: modify to function execution error
-        function_response = get_friendly_error_msg(function_name=function_name, exception_name=type(e).__name__, exception_message=str(e))
-        return function_response, None

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -9,6 +9,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.helpers.datetime_helpers import get_utc_timestamp_ns, ns_to_ms
 from letta.llm_api.openai_client import is_openai_reasoning_model
+from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
 from letta.otel.context import get_ctx_attributes
 from letta.otel.metric_registry import MetricRegistry
@@ -19,6 +20,7 @@ from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
 from letta.server.rest_api.json_parser import OptimisticJSONParser
 from letta.streaming_utils import JSONInnerThoughtsExtractor
+from letta.utils import count_tokens
 logger = get_logger(__name__)
@@ -30,7 +32,14 @@ class OpenAIStreamingInterface:
     and detection of tool call events.
     """
-    def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
+    def __init__(
+        self,
+        use_assistant_message: bool = False,
+        put_inner_thoughts_in_kwarg: bool = False,
+        is_openai_proxy: bool = False,
+        messages: Optional[list] = None,
+        tools: Optional[list] = None,
+    ):
         self.use_assistant_message = use_assistant_message
         self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
         self.assistant_message_tool_kwarg = DEFAULT_MESSAGE_TOOL_KWARG
@@ -53,10 +62,19 @@ class OpenAIStreamingInterface:
         self.message_id = None
         self.model = None
-        # token counters
+        # Token counters (from OpenAI usage)
         self.input_tokens = 0
         self.output_tokens = 0
+        # Fallback token counters (using tiktoken cl200k-base)
+        self.fallback_input_tokens = 0
+        self.fallback_output_tokens = 0
+        # Store messages and tools for fallback counting
+        self.is_openai_proxy = is_openai_proxy
+        self.messages = messages or []
+        self.tools = tools or []
         self.content_buffer: list[str] = []
         self.tool_call_name: str | None = None
         self.tool_call_id: str | None = None
@@ -95,6 +113,18 @@ class OpenAIStreamingInterface:
         Iterates over the OpenAI stream, yielding SSE events.
         It also collects tokens and detects if a tool call is triggered.
         """
+        # Fallback input token counting - this should only be required for non-OpenAI providers using the OpenAI client (e.g. LMStudio)
+        if self.is_openai_proxy:
+            if self.messages:
+                # Convert messages to dict format for token counting
+                message_dicts = [msg.to_openai_dict() if hasattr(msg, "to_openai_dict") else msg for msg in self.messages]
+                self.fallback_input_tokens = num_tokens_from_messages(message_dicts)  # fallback to gpt-4 cl100k-base
+            if self.tools:
+                # Convert tools to dict format for token counting
+                tool_dicts = [tool["function"] if isinstance(tool, dict) and "function" in tool else tool for tool in self.tools]
+                self.fallback_input_tokens += num_tokens_from_functions(tool_dicts)
         first_chunk = True
         try:
             async with stream:
@@ -113,6 +143,9 @@ class OpenAIStreamingInterface:
                         metric_attributes["model.name"] = chunk.model
                         MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
+                        if self.is_openai_proxy:
+                            self.fallback_output_tokens += count_tokens(chunk.model_dump_json())
                         first_chunk = False
                     if not self.model or not self.message_id:
@@ -153,6 +186,9 @@ class OpenAIStreamingInterface:
                                     tool_call.function.arguments
                                 )
+                                if self.is_openai_proxy:
+                                    self.fallback_output_tokens += count_tokens(tool_call.function.arguments)
                                 # If we have inner thoughts, we should output them as a chunk
                                 if updates_inner_thoughts:
                                     if prev_message_type and prev_message_type != "reasoning_message":

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -215,11 +215,7 @@ class AnthropicClient(LLMClientBase):
                 )
                 llm_config.put_inner_thoughts_in_kwargs = True
         else:
-            if llm_config.put_inner_thoughts_in_kwargs:
-                # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
-                tool_choice = {"type": "any", "disable_parallel_tool_use": True}
-            else:
-                tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
+            tool_choice = {"type": "any", "disable_parallel_tool_use": True}
             tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
         # Add tool choice

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -102,7 +102,7 @@ class GoogleVertexClient(LLMClientBase):
         unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties", "$schema"]
         keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
         for key_to_remove in keys_to_remove_at_this_level:
-            logger.warning(f"Removing unsupported keyword 	'{key_to_remove}' from schema part.")
+            logger.debug(f"Removing unsupported keyword 	'{key_to_remove}' from schema part.")
             del schema_part[key_to_remove]
         if schema_part.get("type") == "string" and "format" in schema_part:

letta/llm_api/llm_client.py CHANGED Viewed

@@ -58,7 +58,7 @@ class LLMClient:
                     put_inner_thoughts_first=put_inner_thoughts_first,
                     actor=actor,
                 )
-            case ProviderType.openai | ProviderType.together:
+            case ProviderType.openai | ProviderType.together | ProviderType.ollama:
                 from letta.llm_api.openai_client import OpenAIClient
                 return OpenAIClient(

letta/llm_api/openai_client.py CHANGED Viewed

@@ -101,6 +101,8 @@ def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
         return True
     if llm_config.handle and "vllm" in llm_config.handle:
         return True
+    if llm_config.compatibility_type == "mlx":
+        return True
     return False

letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json CHANGED Viewed

@@ -11,5 +11,6 @@
       "quantization": "Q4_0",
       "state": "not-loaded",
       "max_context_length": 2048
-    },
-    ...
+    }
+  ]
+}

letta-nightly 0.9.1.dev20250731104458__py3-none-any.whl → 0.10.0.dev20250801010504__py3-none-any.whl

letta-nightly 0.9.1.dev20250731104458py3-none-any.whl → 0.10.0.dev20250801010504py3-none-any.whl