PyPI - letta-nightly - Versions diffs - 0.11.0.dev20250807104511__py3-none-any.whl → 0.11.0.dev20250808104456__py3-none-any.whl - Mend

letta-nightly 0.11.0.dev20250807104511py3-none-any.whl → 0.11.0.dev20250808104456py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

letta/agent.py +2 -1
letta/agents/letta_agent.py +215 -143
letta/constants.py +4 -1
letta/embeddings.py +6 -5
letta/functions/function_sets/base.py +2 -2
letta/functions/function_sets/files.py +22 -9
letta/interfaces/anthropic_streaming_interface.py +291 -265
letta/interfaces/openai_streaming_interface.py +270 -250
letta/llm_api/anthropic.py +3 -10
letta/llm_api/openai_client.py +6 -1
letta/orm/__init__.py +1 -0
letta/orm/step.py +14 -0
letta/orm/step_metrics.py +71 -0
letta/schemas/enums.py +9 -0
letta/schemas/llm_config.py +8 -6
letta/schemas/providers/lmstudio.py +2 -2
letta/schemas/providers/ollama.py +42 -54
letta/schemas/providers/openai.py +1 -1
letta/schemas/step.py +6 -0
letta/schemas/step_metrics.py +23 -0
letta/schemas/tool_rule.py +10 -29
letta/services/step_manager.py +179 -1
letta/services/tool_executor/builtin_tool_executor.py +4 -1
letta/services/tool_executor/core_tool_executor.py +2 -10
letta/services/tool_executor/files_tool_executor.py +89 -40
{letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/METADATA +1 -1
{letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/RECORD +30 -28
{letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.0.dev20250807104511.dist-info → letta_nightly-0.11.0.dev20250808104456.dist-info}/entry_points.txt +0 -0

letta/orm/step_metrics.py ADDED Viewed

@@ -0,0 +1,71 @@
+from typing import TYPE_CHECKING, Optional
+from sqlalchemy import BigInteger, ForeignKey, String
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from letta.orm.mixins import AgentMixin, ProjectMixin
+from letta.orm.sqlalchemy_base import SqlalchemyBase
+from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
+if TYPE_CHECKING:
+    from letta.orm.agent import Agent
+    from letta.orm.job import Job
+    from letta.orm.step import Step
+class StepMetrics(SqlalchemyBase, ProjectMixin, AgentMixin):
+    """Tracks performance metrics for agent steps."""
+    __tablename__ = "step_metrics"
+    __pydantic_model__ = PydanticStepMetrics
+    id: Mapped[str] = mapped_column(
+        ForeignKey("steps.id", ondelete="CASCADE"),
+        primary_key=True,
+        doc="The unique identifier of the step this metric belongs to (also serves as PK)",
+    )
+    organization_id: Mapped[str] = mapped_column(
+        ForeignKey("organizations.id", ondelete="RESTRICT"),
+        nullable=True,
+        doc="The unique identifier of the organization",
+    )
+    provider_id: Mapped[Optional[str]] = mapped_column(
+        ForeignKey("providers.id", ondelete="RESTRICT"),
+        nullable=True,
+        doc="The unique identifier of the provider",
+    )
+    job_id: Mapped[Optional[str]] = mapped_column(
+        ForeignKey("jobs.id", ondelete="SET NULL"),
+        nullable=True,
+        doc="The unique identifier of the job",
+    )
+    llm_request_ns: Mapped[Optional[int]] = mapped_column(
+        BigInteger,
+        nullable=True,
+        doc="Time spent on the LLM request in nanoseconds",
+    )
+    tool_execution_ns: Mapped[Optional[int]] = mapped_column(
+        BigInteger,
+        nullable=True,
+        doc="Time spent on tool execution in nanoseconds",
+    )
+    step_ns: Mapped[Optional[int]] = mapped_column(
+        BigInteger,
+        nullable=True,
+        doc="Total time for the step in nanoseconds",
+    )
+    base_template_id: Mapped[Optional[str]] = mapped_column(
+        String,
+        nullable=True,
+        doc="The base template ID for the step",
+    )
+    template_id: Mapped[Optional[str]] = mapped_column(
+        String,
+        nullable=True,
+        doc="The template ID for the step",
+    )
+    # Relationships (foreign keys)
+    step: Mapped["Step"] = relationship("Step", back_populates="metrics", uselist=False)
+    job: Mapped[Optional["Job"]] = relationship("Job")
+    agent: Mapped[Optional["Agent"]] = relationship("Agent")

letta/schemas/enums.py CHANGED Viewed

@@ -160,3 +160,12 @@ class SandboxType(str, Enum):
     E2B = "e2b"
     MODAL = "modal"
     LOCAL = "local"
+class StepStatus(str, Enum):
+    """Status of a step execution"""
+    PENDING = "pending"
+    SUCCESS = "success"
+    FAILED = "failed"
+    CANCELLED = "cancelled"

letta/schemas/llm_config.py CHANGED Viewed

@@ -58,7 +58,7 @@ class LLMConfig(BaseModel):
     enable_reasoner: bool = Field(
         False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
     )
-    reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
+    reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
         None,
         description="The reasoning effort to use when generating text reasoning models",
     )
@@ -188,6 +188,8 @@ class LLMConfig(BaseModel):
     @classmethod
     def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool):
         if reasoning:
+            config.enable_reasoner = True
             if (
                 config.model_endpoint_type == "anthropic"
                 and ("claude-opus-4" in config.model or "claude-sonnet-4" in config.model or "claude-3-7-sonnet" in config.model)
@@ -195,19 +197,19 @@ class LLMConfig(BaseModel):
                 config.model_endpoint_type == "google_vertex" and ("gemini-2.5-flash" in config.model or "gemini-2.0-pro" in config.model)
             ):
                 config.put_inner_thoughts_in_kwargs = False
-                config.enable_reasoner = True
                 if config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
             elif config.model_endpoint_type == "openai" and (
                 config.model.startswith("o1") or config.model.startswith("o3") or config.model.startswith("o4")
             ):
-                config.put_inner_thoughts_in_kwargs = True
-                config.enable_reasoner = True
+                config.put_inner_thoughts_in_kwargs = False
                 if config.reasoning_effort is None:
                     config.reasoning_effort = "medium"
             else:
                 config.put_inner_thoughts_in_kwargs = True
-                config.enable_reasoner = False
         else:
-            config.put_inner_thoughts_in_kwargs = False
             config.enable_reasoner = False
+            config.put_inner_thoughts_in_kwargs = False
+        return config

letta/schemas/providers/lmstudio.py CHANGED Viewed

@@ -55,7 +55,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
                 LLMConfig(
                     model=model_name,
                     model_endpoint_type="openai",
-                    model_endpoint=self.base_url,
+                    model_endpoint=self.model_endpoint_url,
                     context_window=context_window_size,
                     handle=self.get_handle(model_name),
                     compatibility_type=compatibility_type,
@@ -94,7 +94,7 @@ class LMStudioOpenAIProvider(OpenAIProvider):
                 EmbeddingConfig(
                     embedding_model=model_name,
                     embedding_endpoint_type="openai",
-                    embedding_endpoint=self.base_url,
+                    embedding_endpoint=self.model_endpoint_url,
                     embedding_dim=768,  # Default embedding dimension, not context window
                     embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,  # NOTE: max is 2048
                     handle=self.get_handle(model_name),

letta/schemas/providers/ollama.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Literal
 import aiohttp
 from pydantic import Field
-from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
+from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, DEFAULT_CONTEXT_WINDOW, DEFAULT_EMBEDDING_DIM, OLLAMA_API_PREFIX
 from letta.log import get_logger
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -12,8 +12,6 @@ from letta.schemas.providers.openai import OpenAIProvider
 logger = get_logger(__name__)
-ollama_prefix = "/v1"
 class OllamaProvider(OpenAIProvider):
     """Ollama provider that uses the native /api/generate endpoint
@@ -41,19 +39,30 @@ class OllamaProvider(OpenAIProvider):
                 response_json = await response.json()
         configs = []
-        for model in response_json["models"]:
-            context_window = await self._get_model_context_window(model["name"])
+        for model in response_json.get("models", []):
+            model_name = model["name"]
+            model_details = await self._get_model_details_async(model_name)
+            if not model_details or "completion" not in model_details.get("capabilities", []):
+                continue
+            context_window = None
+            model_info = model_details.get("model_info", {})
+            if architecture := model_info.get("general.architecture"):
+                if context_length := model_info.get(f"{architecture}.context_length"):
+                    context_window = int(context_length)
             if context_window is None:
-                print(f"Ollama model {model['name']} has no context window, using default 32000")
-                context_window = 32000
+                logger.warning(f"Ollama model {model_name} has no context window, using default {DEFAULT_CONTEXT_WINDOW}")
+                context_window = DEFAULT_CONTEXT_WINDOW
             configs.append(
                 LLMConfig(
-                    model=model["name"],
+                    model=model_name,
                     model_endpoint_type=ProviderType.ollama,
-                    model_endpoint=f"{self.base_url}{ollama_prefix}",
+                    model_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
                     model_wrapper=self.default_prompt_formatter,
                     context_window=context_window,
-                    handle=self.get_handle(model["name"]),
+                    handle=self.get_handle(model_name),
                     provider_name=self.name,
                     provider_category=self.provider_category,
                 )
@@ -73,25 +82,36 @@ class OllamaProvider(OpenAIProvider):
                 response_json = await response.json()
         configs = []
-        for model in response_json["models"]:
-            embedding_dim = await self._get_model_embedding_dim(model["name"])
+        for model in response_json.get("models", []):
+            model_name = model["name"]
+            model_details = await self._get_model_details_async(model_name)
+            if not model_details or "embedding" not in model_details.get("capabilities", []):
+                continue
+            embedding_dim = None
+            model_info = model_details.get("model_info", {})
+            if architecture := model_info.get("general.architecture"):
+                if embedding_length := model_info.get(f"{architecture}.embedding_length"):
+                    embedding_dim = int(embedding_length)
             if not embedding_dim:
-                print(f"Ollama model {model['name']} has no embedding dimension, using default 1024")
-                # continue
-                embedding_dim = 1024
+                logger.warning(f"Ollama model {model_name} has no embedding dimension, using default {DEFAULT_EMBEDDING_DIM}")
+                embedding_dim = DEFAULT_EMBEDDING_DIM
             configs.append(
                 EmbeddingConfig(
-                    embedding_model=model["name"],
+                    embedding_model=model_name,
                     embedding_endpoint_type=ProviderType.ollama,
-                    embedding_endpoint=f"{self.base_url}{ollama_prefix}",
+                    embedding_endpoint=f"{self.base_url}{OLLAMA_API_PREFIX}",
                     embedding_dim=embedding_dim,
                     embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
-                    handle=self.get_handle(model["name"], is_embedding=True),
+                    handle=self.get_handle(model_name, is_embedding=True),
                 )
             )
         return configs
-    async def _get_model_context_window(self, model_name: str) -> int | None:
+    async def _get_model_details_async(self, model_name: str) -> dict | None:
+        """Get detailed information for a specific model from /api/show."""
         endpoint = f"{self.base_url}/api/show"
         payload = {"name": model_name}
@@ -102,39 +122,7 @@ class OllamaProvider(OpenAIProvider):
                         error_text = await response.text()
                         logger.warning(f"Failed to get model info for {model_name}: {response.status} - {error_text}")
                         return None
-                    response_json = await response.json()
-                    model_info = response_json.get("model_info", {})
-                    if architecture := model_info.get("general.architecture"):
-                        if context_length := model_info.get(f"{architecture}.context_length"):
-                            return int(context_length)
+                    return await response.json()
         except Exception as e:
-            logger.warning(f"Failed to get model context window for {model_name} with error: {e}")
-        return None
-    async def _get_model_embedding_dim(self, model_name: str) -> int | None:
-        endpoint = f"{self.base_url}/api/show"
-        payload = {"name": model_name}
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(endpoint, json=payload) as response:
-                    if response.status != 200:
-                        error_text = await response.text()
-                        logger.warning(f"Failed to get model info for {model_name}: {response.status} - {error_text}")
-                        return None
-                    response_json = await response.json()
-                    model_info = response_json.get("model_info", {})
-                    if architecture := model_info.get("general.architecture"):
-                        if embedding_length := model_info.get(f"{architecture}.embedding_length"):
-                            return int(embedding_length)
-        except Exception as e:
-            logger.warning(f"Failed to get model embedding dimension for {model_name} with error: {e}")
-        return None
+            logger.warning(f"Failed to get model details for {model_name} with error: {e}")
+            return None

letta/schemas/providers/openai.py CHANGED Viewed

@@ -11,7 +11,7 @@ from letta.schemas.providers.base import Provider
 logger = get_logger(__name__)
-ALLOWED_PREFIXES = {"gpt-4", "o1", "o3", "o4"}
+ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
 DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
 DEFAULT_EMBEDDING_BATCH_SIZE = 1024

letta/schemas/step.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Dict, List, Literal, Optional
 from pydantic import Field
+from letta.schemas.enums import StepStatus
 from letta.schemas.letta_base import LettaBase
 from letta.schemas.letta_stop_reason import StopReasonType
 from letta.schemas.message import Message
@@ -40,6 +41,11 @@ class Step(StepBase):
     )
     project_id: Optional[str] = Field(None, description="The project that the agent that executed this step belongs to (cloud only).")
+    # error tracking fields
+    error_type: Optional[str] = Field(None, description="The type/class of the error that occurred")
+    error_data: Optional[Dict] = Field(None, description="Error details including message, traceback, and additional context")
+    status: Optional[StepStatus] = Field(StepStatus.PENDING, description="Step status: pending, success, or failed")
 class StepProgression(int, Enum):
     START = auto()

letta/schemas/step_metrics.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Optional
+from pydantic import Field
+from letta.schemas.letta_base import LettaBase
+class StepMetricsBase(LettaBase):
+    __id_prefix__ = "step"
+class StepMetrics(StepMetricsBase):
+    id: str = Field(..., description="The id of the step this metric belongs to (matches steps.id).")
+    organization_id: Optional[str] = Field(None, description="The unique identifier of the organization.")
+    provider_id: Optional[str] = Field(None, description="The unique identifier of the provider.")
+    job_id: Optional[str] = Field(None, description="The unique identifier of the job.")
+    agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
+    llm_request_ns: Optional[int] = Field(None, description="Time spent on LLM requests in nanoseconds.")
+    tool_execution_ns: Optional[int] = Field(None, description="Time spent on tool execution in nanoseconds.")
+    step_ns: Optional[int] = Field(None, description="Total time for the step in nanoseconds.")
+    base_template_id: Optional[str] = Field(None, description="The base template ID that the step belongs to (cloud only).")
+    template_id: Optional[str] = Field(None, description="The template ID that the step belongs to (cloud only).")
+    project_id: Optional[str] = Field(None, description="The project that the step belongs to (cloud only).")

letta/schemas/tool_rule.py CHANGED Viewed

@@ -23,26 +23,24 @@ class BaseToolRule(LettaBase):
     def get_valid_tools(self, tool_call_history: List[str], available_tools: Set[str], last_function_response: Optional[str]) -> set[str]:
         raise NotImplementedError
-    def render_prompt(self) -> Optional[str]:
+    def render_prompt(self) -> str | None:
         """Render the prompt template with this rule's attributes."""
-        template_to_use = self.prompt_template or self._get_default_template()
-        if not template_to_use:
+        if not self.prompt_template:
             return None
         try:
-            template = Template(template_to_use)
+            template = Template(self.prompt_template)
             return template.render(**self.model_dump())
         except Exception as e:
             logger.warning(
-                f"Failed to render prompt template for tool rule '{self.tool_name}' (type: {self.type}). "
-                f"Template: '{template_to_use}'. Error: {e}"
+                "Failed to render prompt template for tool rule '%s' (type: %s). Template: '%s'. Error: %s",
+                self.tool_name,
+                self.type,
+                self.prompt_template,
+                e,
             )
             return None
-    def _get_default_template(self) -> Optional[str]:
-        """Get the default template for this rule type. Override in subclasses."""
-        return None
 class ChildToolRule(BaseToolRule):
     """
@@ -60,9 +58,6 @@ class ChildToolRule(BaseToolRule):
         last_tool = tool_call_history[-1] if tool_call_history else None
         return set(self.children) if last_tool == self.tool_name else available_tools
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>\nAfter using {{ tool_name }}, you must use one of these tools: {{ children | join(', ') }}\n</tool_rule>"
 class ParentToolRule(BaseToolRule):
     """
@@ -80,9 +75,6 @@ class ParentToolRule(BaseToolRule):
         last_tool = tool_call_history[-1] if tool_call_history else None
         return set(self.children) if last_tool == self.tool_name else available_tools - set(self.children)
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>\n{{ children | join(', ') }} can only be used after {{ tool_name }}\n</tool_rule>"
 class ConditionalToolRule(BaseToolRule):
     """
@@ -125,7 +117,8 @@ class ConditionalToolRule(BaseToolRule):
         return {self.default_child} if self.default_child else available_tools
-    def _matches_key(self, function_output: str, key: Any) -> bool:
+    @staticmethod
+    def _matches_key(function_output: str, key: Any) -> bool:
         """Helper function to determine if function output matches a mapping key."""
         if isinstance(key, bool):
             return function_output.lower() == "true" if key else function_output.lower() == "false"
@@ -142,9 +135,6 @@ class ConditionalToolRule(BaseToolRule):
         else:  # Assume string
             return str(function_output) == str(key)
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>\n{{ tool_name }} will determine which tool to use next based on its output\n</tool_rule>"
 class InitToolRule(BaseToolRule):
     """
@@ -165,9 +155,6 @@ class TerminalToolRule(BaseToolRule):
         description="Optional Jinja2 template for generating agent prompt about this tool rule.",
     )
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>\n{{ tool_name }} ends your response (yields control) when called\n</tool_rule>"
 class ContinueToolRule(BaseToolRule):
     """
@@ -196,9 +183,6 @@ class RequiredBeforeExitToolRule(BaseToolRule):
         """Returns all available tools - the logic for preventing exit is handled elsewhere."""
         return available_tools
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>{{ tool_name }} must be called before ending the conversation</tool_rule>"
 class MaxCountPerStepToolRule(BaseToolRule):
     """
@@ -222,9 +206,6 @@ class MaxCountPerStepToolRule(BaseToolRule):
         return available_tools
-    def _get_default_template(self) -> Optional[str]:
-        return "<tool_rule>\n{{ tool_name }}: at most {{ max_count_limit }} use(s) per response\n</tool_rule>"
 ToolRule = Annotated[
     Union[

letta-nightly 0.11.0.dev20250807104511__py3-none-any.whl → 0.11.0.dev20250808104456__py3-none-any.whl

letta-nightly 0.11.0.dev20250807104511py3-none-any.whl → 0.11.0.dev20250808104456py3-none-any.whl