PyPI - letta-nightly - Versions diffs - 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl - Mend

letta-nightly 0.8.0.dev20250606195656py3-none-any.whl → 0.8.2.dev20250606215616py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

letta/__init__.py +1 -1
letta/agent.py +1 -1
letta/agents/letta_agent.py +49 -29
letta/agents/letta_agent_batch.py +1 -2
letta/agents/voice_agent.py +19 -13
letta/agents/voice_sleeptime_agent.py +11 -3
letta/constants.py +18 -0
letta/data_sources/__init__.py +0 -0
letta/data_sources/redis_client.py +282 -0
letta/errors.py +0 -4
letta/functions/function_sets/files.py +58 -0
letta/functions/schema_generator.py +18 -1
letta/groups/sleeptime_multi_agent_v2.py +1 -1
letta/helpers/datetime_helpers.py +47 -3
letta/helpers/decorators.py +69 -0
letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
letta/interfaces/anthropic_streaming_interface.py +43 -24
letta/interfaces/openai_streaming_interface.py +21 -19
letta/llm_api/anthropic.py +1 -1
letta/llm_api/anthropic_client.py +22 -14
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/helpers.py +36 -30
letta/llm_api/llm_api_tools.py +1 -1
letta/llm_api/llm_client_base.py +29 -1
letta/llm_api/openai.py +1 -1
letta/llm_api/openai_client.py +6 -8
letta/local_llm/chat_completion_proxy.py +1 -1
letta/memory.py +1 -1
letta/orm/enums.py +1 -0
letta/orm/file.py +80 -3
letta/orm/files_agents.py +13 -0
letta/orm/sqlalchemy_base.py +34 -11
letta/otel/__init__.py +0 -0
letta/otel/context.py +25 -0
letta/otel/events.py +0 -0
letta/otel/metric_registry.py +122 -0
letta/otel/metrics.py +66 -0
letta/otel/resource.py +26 -0
letta/{tracing.py → otel/tracing.py} +55 -78
letta/plugins/README.md +22 -0
letta/plugins/__init__.py +0 -0
letta/plugins/defaults.py +11 -0
letta/plugins/plugins.py +72 -0
letta/schemas/enums.py +8 -0
letta/schemas/file.py +12 -0
letta/schemas/tool.py +4 -0
letta/server/db.py +7 -7
letta/server/rest_api/app.py +8 -6
letta/server/rest_api/routers/v1/agents.py +37 -36
letta/server/rest_api/routers/v1/groups.py +3 -3
letta/server/rest_api/routers/v1/sources.py +26 -3
letta/server/rest_api/utils.py +9 -6
letta/server/server.py +18 -12
letta/services/agent_manager.py +185 -193
letta/services/block_manager.py +1 -1
letta/services/context_window_calculator/token_counter.py +3 -2
letta/services/file_processor/chunker/line_chunker.py +34 -0
letta/services/file_processor/file_processor.py +40 -11
letta/services/file_processor/parser/mistral_parser.py +11 -1
letta/services/files_agents_manager.py +96 -7
letta/services/group_manager.py +6 -6
letta/services/helpers/agent_manager_helper.py +373 -3
letta/services/identity_manager.py +1 -1
letta/services/job_manager.py +1 -1
letta/services/llm_batch_manager.py +1 -1
letta/services/message_manager.py +1 -1
letta/services/organization_manager.py +1 -1
letta/services/passage_manager.py +1 -1
letta/services/per_agent_lock_manager.py +1 -1
letta/services/provider_manager.py +1 -1
letta/services/sandbox_config_manager.py +1 -1
letta/services/source_manager.py +178 -19
letta/services/step_manager.py +2 -2
letta/services/summarizer/summarizer.py +1 -1
letta/services/telemetry_manager.py +1 -1
letta/services/tool_executor/builtin_tool_executor.py +117 -0
letta/services/tool_executor/composio_tool_executor.py +53 -0
letta/services/tool_executor/core_tool_executor.py +474 -0
letta/services/tool_executor/files_tool_executor.py +131 -0
letta/services/tool_executor/mcp_tool_executor.py +45 -0
letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
letta/services/tool_executor/tool_execution_manager.py +34 -14
letta/services/tool_executor/tool_execution_sandbox.py +1 -1
letta/services/tool_executor/tool_executor.py +3 -802
letta/services/tool_executor/tool_executor_base.py +43 -0
letta/services/tool_manager.py +55 -59
letta/services/tool_sandbox/e2b_sandbox.py +1 -1
letta/services/tool_sandbox/local_sandbox.py +6 -3
letta/services/user_manager.py +6 -3
letta/settings.py +21 -1
letta/utils.py +7 -2
{letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/METADATA +4 -2
{letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/RECORD +96 -74
{letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/entry_points.txt +0 -0

letta/llm_api/openai.py CHANGED Viewed

@@ -19,6 +19,7 @@ from letta.llm_api.openai_client import (
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
+from letta.otel.tracing import log_event
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
@@ -36,7 +37,6 @@ from letta.schemas.openai.chat_completion_response import (
 )
 from letta.schemas.openai.embedding_response import EmbeddingResponse
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
-from letta.tracing import log_event
 from letta.utils import get_tool_call_id, smart_urljoin
 logger = get_logger(__name__)

letta/llm_api/openai_client.py CHANGED Viewed

@@ -8,11 +8,11 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from letta.constants import LETTA_MODEL_ENDPOINT
 from letta.errors import (
+    ContextWindowExceededError,
     ErrorCode,
     LLMAuthenticationError,
     LLMBadRequestError,
     LLMConnectionError,
-    LLMContextWindowExceededError,
     LLMNotFoundError,
     LLMPermissionDeniedError,
     LLMRateLimitError,
@@ -23,6 +23,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
+from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -34,7 +35,6 @@ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
 from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.settings import model_settings
-from letta.tracing import trace_method
 logger = get_logger(__name__)
@@ -280,7 +280,7 @@ class OpenAIClient(LLMClientBase):
         # OpenAI's response structure directly maps to ChatCompletionResponse
         # We just need to instantiate the Pydantic model for validation and type safety.
         chat_completion_response = ChatCompletionResponse(**response_data)
+        chat_completion_response = self._fix_truncated_json_response(chat_completion_response)
         # Unpack inner thoughts if they were embedded in function arguments
         if llm_config.put_inner_thoughts_in_kwargs:
             chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
@@ -342,11 +342,9 @@ class OpenAIClient(LLMClientBase):
             # Check message content if finer-grained errors are needed
             # Example: if "context_length_exceeded" in str(e): return LLMContextLengthExceededError(...)
             # TODO: This is a super soft check. Not sure if we can do better, needs more investigation.
-            if "context" in str(e):
-                return LLMContextWindowExceededError(
-                    message=f"Bad request to OpenAI (context length exceeded): {str(e)}",
-                    code=ErrorCode.INVALID_ARGUMENT,  # Or more specific if detectable
-                    details=e.body,
+            if "This model's maximum context length is" in str(e):
+                return ContextWindowExceededError(
+                    message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
                 )
             else:
                 return LLMBadRequestError(

letta/local_llm/chat_completion_proxy.py CHANGED Viewed

@@ -20,9 +20,9 @@ from letta.local_llm.utils import count_tokens, get_available_wrappers
 from letta.local_llm.vllm.api import get_vllm_completion
 from letta.local_llm.webui.api import get_webui_completion
 from letta.local_llm.webui.legacy_api import get_webui_completion as get_webui_completion_legacy
+from letta.otel.tracing import log_event
 from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, Message, ToolCall, UsageStatistics
-from letta.tracing import log_event
 from letta.utils import get_tool_call_id
 has_shown_warning = False

letta/memory.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Callable, Dict, List
 from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
 from letta.llm_api.llm_api_tools import create
 from letta.llm_api.llm_client import LLMClient
+from letta.otel.tracing import trace_method
 from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
 from letta.schemas.agent import AgentState
 from letta.schemas.enums import MessageRole
@@ -10,7 +11,6 @@ from letta.schemas.letta_message_content import TextContent
 from letta.schemas.memory import Memory
 from letta.schemas.message import Message
 from letta.settings import summarizer_settings
-from letta.tracing import trace_method
 from letta.utils import count_tokens, printd
 if TYPE_CHECKING:

letta/orm/enums.py CHANGED Viewed

@@ -9,6 +9,7 @@ class ToolType(str, Enum):
     LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
     LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
     LETTA_BUILTIN = "letta_builtin"
+    LETTA_FILES_CORE = "letta_files_core"
     EXTERNAL_COMPOSIO = "external_composio"
     EXTERNAL_LANGCHAIN = "external_langchain"
     # TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?

letta/orm/file.py CHANGED Viewed

@@ -1,10 +1,13 @@
+import uuid
 from typing import TYPE_CHECKING, List, Optional
-from sqlalchemy import Integer, String
+from sqlalchemy import ForeignKey, Index, Integer, String, Text, UniqueConstraint, desc
+from sqlalchemy.ext.asyncio import AsyncAttrs
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from letta.orm.mixins import OrganizationMixin, SourceMixin
 from letta.orm.sqlalchemy_base import SqlalchemyBase
+from letta.schemas.enums import FileProcessingStatus
 from letta.schemas.file import FileMetadata as PydanticFileMetadata
 if TYPE_CHECKING:
@@ -14,11 +17,36 @@ if TYPE_CHECKING:
     from letta.orm.source import Source
-class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
+# TODO: Note that this is NOT organization scoped, this is potentially dangerous if we misuse this
+# TODO: This should ONLY be manipulated internally in relation to FileMetadata.content
+# TODO: Leaving organization_id out of this for now for simplicity
+class FileContent(SqlalchemyBase):
+    """Holds the full text content of a file (potentially large)."""
+    __tablename__ = "file_contents"
+    __table_args__ = (UniqueConstraint("file_id", name="uq_file_contents_file_id"),)
+    # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
+    # TODO: Some still rely on the Pydantic object to do this
+    id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"file_content-{uuid.uuid4()}")
+    file_id: Mapped[str] = mapped_column(ForeignKey("files.id", ondelete="CASCADE"), nullable=False, doc="Foreign key to files table.")
+    text: Mapped[str] = mapped_column(Text, nullable=False, doc="Full plain-text content of the file (e.g., extracted from a PDF).")
+    # back-reference to FileMetadata
+    file: Mapped["FileMetadata"] = relationship(back_populates="content", lazy="selectin")
+class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs):
     """Represents an uploaded file."""
     __tablename__ = "files"
     __pydantic_model__ = PydanticFileMetadata
+    __table_args__ = (
+        Index("ix_files_org_created", "organization_id", desc("created_at")),
+        Index("ix_files_source_created", "source_id", desc("created_at")),
+        Index("ix_files_processing_status", "processing_status"),
+    )
     file_name: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The name of the file.")
     file_path: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The file path on the system.")
@@ -26,6 +54,11 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
     file_size: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, doc="The size of the file in bytes.")
     file_creation_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The creation date of the file.")
     file_last_modified_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The last modified date of the file.")
+    processing_status: Mapped[FileProcessingStatus] = mapped_column(
+        String, default=FileProcessingStatus.PENDING, nullable=False, doc="The current processing status of the file."
+    )
+    error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True, doc="Any error message encountered during processing.")
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="files", lazy="selectin")
@@ -33,4 +66,48 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
     source_passages: Mapped[List["SourcePassage"]] = relationship(
         "SourcePassage", back_populates="file", lazy="selectin", cascade="all, delete-orphan"
     )
-    file_agents: Mapped[List["FileAgent"]] = relationship("FileAgent", back_populates="file", lazy="selectin")
+    file_agents: Mapped[List["FileAgent"]] = relationship(
+        "FileAgent",
+        back_populates="file",
+        lazy="selectin",
+        cascade="all, delete-orphan",
+        passive_deletes=True,  # ← add this
+    )
+    content: Mapped[Optional["FileContent"]] = relationship(
+        "FileContent",
+        uselist=False,
+        back_populates="file",
+        lazy="raise",  # raises if you access without eager load
+        cascade="all, delete-orphan",
+    )
+    async def to_pydantic_async(self, include_content: bool = False) -> PydanticFileMetadata:
+        """
+        Async version of `to_pydantic` that supports optional relationship loading
+        without requiring `expire_on_commit=False`.
+        """
+        # Load content relationship if requested
+        if include_content:
+            content_obj = await self.awaitable_attrs.content
+            content_text = content_obj.text if content_obj else None
+        else:
+            content_text = None
+        return PydanticFileMetadata(
+            id=self.id,
+            organization_id=self.organization_id,
+            source_id=self.source_id,
+            file_name=self.file_name,
+            file_path=self.file_path,
+            file_type=self.file_type,
+            file_size=self.file_size,
+            file_creation_date=self.file_creation_date,
+            file_last_modified_date=self.file_last_modified_date,
+            processing_status=self.processing_status,
+            error_message=self.error_message,
+            created_at=self.created_at,
+            updated_at=self.updated_at,
+            is_deleted=self.is_deleted,
+            content=content_text,
+        )

letta/orm/files_agents.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Optional
 from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func
 from sqlalchemy.orm import Mapped, mapped_column, relationship
+from letta.constants import CORE_MEMORY_SOURCE_CHAR_LIMIT, FILE_IS_TRUNCATED_WARNING
 from letta.orm.mixins import OrganizationMixin
 from letta.orm.sqlalchemy_base import SqlalchemyBase
 from letta.schemas.block import Block as PydanticBlock
@@ -26,6 +27,8 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
     __table_args__ = (
         Index("ix_files_agents_file_id_agent_id", "file_id", "agent_id"),
         UniqueConstraint("file_id", "agent_id", name="uq_files_agents_file_agent"),
+        UniqueConstraint("agent_id", "file_name", name="uq_files_agents_agent_file_name"),
+        Index("ix_files_agents_agent_file_name", "agent_id", "file_name"),
     )
     __pydantic_model__ = PydanticFileAgent
@@ -33,6 +36,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
     # TODO: Some still rely on the Pydantic object to do this
     id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"file_agent-{uuid.uuid4()}")
     file_id: Mapped[str] = mapped_column(String, ForeignKey("files.id", ondelete="CASCADE"), primary_key=True, doc="ID of the file.")
+    file_name: Mapped[str] = mapped_column(String, nullable=False, doc="Denormalized copy of files.file_name; unique per agent.")
     agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True, doc="ID of the agent.")
     is_open: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True, doc="True if the agent currently has the file open.")
@@ -55,11 +59,20 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
         "FileMetadata",
         foreign_keys=[file_id],
         lazy="selectin",
+        back_populates="file_agents",
+        passive_deletes=True,  # ← add this
     )
     # TODO: This is temporary as we figure out if we want FileBlock as a first class citizen
     def to_pydantic_block(self) -> PydanticBlock:
         visible_content = self.visible_content if self.visible_content and self.is_open else ""
+        # Truncate content and add warnings here when converting from FileAgent to Block
+        if len(visible_content) > CORE_MEMORY_SOURCE_CHAR_LIMIT:
+            truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
+            visible_content = visible_content[: CORE_MEMORY_SOURCE_CHAR_LIMIT - len(truncated_warning)]
+            visible_content += truncated_warning
         return PydanticBlock(
             organization_id=self.organization_id,
             value=visible_content,

letta/orm/sqlalchemy_base.py CHANGED Viewed

@@ -1,13 +1,15 @@
+import inspect
 from datetime import datetime
 from enum import Enum
 from functools import wraps
 from pprint import pformat
 from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union
-from sqlalchemy import String, and_, delete, func, or_, select, text
+from sqlalchemy import Sequence, String, and_, delete, func, or_, select, text
 from sqlalchemy.exc import DBAPIError, IntegrityError, TimeoutError
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import Mapped, Session, mapped_column
+from sqlalchemy.orm.interfaces import ORMOption
 from letta.log import get_logger
 from letta.orm.base import Base, CommonSqlalchemyMetaMixins
@@ -23,16 +25,28 @@ logger = get_logger(__name__)
 def handle_db_timeout(func):
     """Decorator to handle SQLAlchemy TimeoutError and wrap it in a custom exception."""
+    if not inspect.iscoroutinefunction(func):
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except TimeoutError as e:
-            logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
-            raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except TimeoutError as e:
+                logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
+                raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
+        return wrapper
+    else:
+        @wraps(func)
+        async def async_wrapper(*args, **kwargs):
+            try:
+                return await func(*args, **kwargs)
+            except TimeoutError as e:
+                logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
+                raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
-    return wrapper
+        return async_wrapper
 class AccessType(str, Enum):
@@ -163,6 +177,7 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
         join_conditions: Optional[Union[Tuple, List]] = None,
         identifier_keys: Optional[List[str]] = None,
         identity_id: Optional[str] = None,
+        query_options: Sequence[ORMOption] | None = None,  # ← new
         **kwargs,
     ) -> List["SqlalchemyBase"]:
         """
@@ -224,6 +239,9 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
             identity_id=identity_id,
             **kwargs,
         )
+        if query_options:
+            for opt in query_options:
+                query = query.options(opt)
         # Execute the query
         results = await db_session.execute(query)
@@ -472,14 +490,19 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
         Raises:
             NoResultFound: if the object is not found
         """
+        from letta.settings import settings
         identifiers = [] if identifier is None else [identifier]
         query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
-        await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
+        if settings.letta_pg_uri_no_default:
+            await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
         try:
             result = await db_session.execute(query)
             item = result.scalar_one_or_none()
         finally:
-            await db_session.execute(text("SET LOCAL enable_seqscan = ON"))
+            if settings.letta_pg_uri_no_default:
+                await db_session.execute(text("SET LOCAL enable_seqscan = ON"))
         if item is None:
             raise NoResultFound(f"{cls.__name__} not found with {', '.join(query_conditions if query_conditions else ['no conditions'])}")

letta/otel/__init__.py ADDED Viewed

File without changes

letta/otel/context.py ADDED Viewed

@@ -0,0 +1,25 @@
+from contextvars import ContextVar
+from typing import Any, Dict
+# Create context var at module level (outside middleware)
+request_attributes: ContextVar[Dict[str, Any]] = ContextVar("request_attributes", default={})
+# Helper functions
+def set_ctx_attributes(attrs: Dict[str, Any]):
+    """Set attributes in current context"""
+    current = request_attributes.get()
+    new_attrs = {**current, **attrs}
+    request_attributes.set(new_attrs)
+def add_ctx_attribute(key: str, value: Any):
+    """Add single attribute to current context"""
+    current = request_attributes.get()
+    new_attrs = {**current, key: value}
+    request_attributes.set(new_attrs)
+def get_ctx_attributes() -> Dict[str, Any]:
+    """Get all attributes from current context"""
+    return request_attributes.get()

letta/otel/events.py ADDED Viewed

File without changes

letta/otel/metric_registry.py ADDED Viewed

@@ -0,0 +1,122 @@
+from dataclasses import dataclass, field
+from functools import partial
+from opentelemetry import metrics
+from opentelemetry.metrics import Counter, Histogram
+from letta.helpers.singleton import singleton
+from letta.otel.metrics import get_letta_meter
+@singleton
+@dataclass(frozen=True)
+class MetricRegistry:
+    """Registry of all application metrics
+    Metrics are composed of the following:
+        - name
+        - description
+        - unit: UCUM unit of the metric (i.e. 'By' for bytes, 'ms' for milliseconds, '1' for count
+        - bucket_bounds (list[float] | None): the explicit bucket bounds for histogram metrics
+        and instruments are of types Counter, Histogram, and Gauge
+    The relationship between the various models is as follows:
+        project_id -N:1-> base_template_id -N:1-> template_id -N:1-> agent_id
+        agent_id -1:1+-> model_name
+        agent_id -1:N -> tool_name
+    """
+    Instrument = Counter | Histogram
+    _metrics: dict[str, Instrument] = field(default_factory=dict, init=False)
+    _meter: metrics.Meter = field(init=False)
+    def __post_init__(self):
+        object.__setattr__(self, "_meter", get_letta_meter())
+    def _get_or_create_metric(self, name: str, factory):
+        """Lazy initialization of metrics."""
+        if name not in self._metrics:
+            self._metrics[name] = factory()
+        return self._metrics[name]
+    # (includes base attributes: project, template_base, template, agent)
+    @property
+    def user_message_counter(self) -> Counter:
+        return self._get_or_create_metric(
+            "count_user_message",
+            partial(
+                self._meter.create_counter,
+                name="count_user_message",
+                description="Counts the number of messages sent by the user",
+                unit="1",
+            ),
+        )
+    # (includes tool_name, tool_execution_success, & step_id on failure)
+    @property
+    def tool_execution_counter(self) -> Counter:
+        return self._get_or_create_metric(
+            "count_tool_execution",
+            partial(self._meter.create_counter, name="count_tool_execution", description="Counts the number of tools executed.", unit="1"),
+        )
+    # project_id + model
+    @property
+    def ttft_ms_histogram(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_ttft_ms",
+            partial(self._meter.create_histogram, name="hist_ttft_ms", description="Histogram for the Time to First Token (ms)", unit="ms"),
+        )
+    # (includes model name)
+    @property
+    def llm_execution_time_ms_histogram(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_llm_execution_time_ms",
+            partial(
+                self._meter.create_histogram,
+                name="hist_llm_execution_time_ms",
+                description="Histogram for LLM execution time (ms)",
+                unit="ms",
+            ),
+        )
+    # (includes tool name)
+    @property
+    def tool_execution_time_ms_histogram(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_tool_execution_time_ms",
+            partial(
+                self._meter.create_histogram,
+                name="hist_tool_execution_time_ms",
+                description="Histogram for tool execution time (ms)",
+                unit="ms",
+            ),
+        )
+    # TODO (cliandy): instrument this
+    @property
+    def message_cost(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_message_cost_usd",
+            partial(
+                self._meter.create_histogram,
+                name="hist_message_cost_usd",
+                description="Histogram for cost of messages (usd) per step",
+                unit="usd",
+            ),
+        )
+    # (includes model name)
+    @property
+    def message_output_tokens(self) -> Histogram:
+        return self._get_or_create_metric(
+            "hist_message_output_tokens",
+            partial(
+                self._meter.create_histogram,
+                name="hist_message_output_tokens",
+                description="Histogram for output tokens generated by LLM per step",
+                unit="1",
+            ),
+        )

letta/otel/metrics.py ADDED Viewed

@@ -0,0 +1,66 @@
+from fastapi import FastAPI, Request
+from opentelemetry import metrics
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.metrics import NoOpMeter
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from letta.log import get_logger
+from letta.otel.context import add_ctx_attribute
+from letta.otel.resource import get_resource, is_pytest_environment
+logger = get_logger(__name__)
+_meter: metrics.Meter = NoOpMeter("noop")
+_is_metrics_initialized: bool = False
+async def _otel_metric_middleware(request: Request, call_next):
+    if not _is_metrics_initialized:
+        return await call_next(request)
+    header_attributes = {
+        "x-organization-id": "organization.id",
+        "x-project-id": "project.id",
+        "x-base-template-id": "base_template.id",
+        "x-template-id": "template.id",
+        "x-agent-id": "agent.id",
+    }
+    try:
+        for header_key, otel_key in header_attributes.items():
+            header_value = request.headers.get(header_key)
+            if header_value:
+                add_ctx_attribute(otel_key, header_value)
+        return await call_next(request)
+    except Exception:
+        raise
+def setup_metrics(
+    endpoint: str,
+    app: FastAPI | None = None,
+    service_name: str = "memgpt-server",
+) -> None:
+    if is_pytest_environment():
+        return
+    assert endpoint
+    global _is_metrics_initialized, _meter
+    otlp_metric_exporter = OTLPMetricExporter(endpoint=endpoint)
+    metric_reader = PeriodicExportingMetricReader(exporter=otlp_metric_exporter)
+    meter_provider = MeterProvider(resource=get_resource(service_name), metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+    _meter = metrics.get_meter(__name__)
+    if app:
+        app.middleware("http")(_otel_metric_middleware)
+    _is_metrics_initialized = True
+def get_letta_meter() -> metrics.Meter | None:
+    """Returns the global letta meter if metrics are initialized."""
+    if not _is_metrics_initialized or isinstance(_meter, NoOpMeter):
+        logger.warning("Metrics are not initialized or meter is not available.")
+    return _meter

letta/otel/resource.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+import sys
+import uuid
+from opentelemetry.sdk.resources import Resource
+from letta import __version__ as letta_version
+_resources = {}
+def get_resource(service_name: str) -> Resource:
+    _env = os.getenv("LETTA_ENVIRONMENT")
+    if service_name not in _resources:
+        resource_dict = {
+            "service.name": service_name,
+            "letta.version": letta_version,
+        }
+        if _env != "PRODUCTION":
+            resource_dict["device.id"] = uuid.getnode()  # MAC address as unique device identifier,
+        _resources[(service_name, _env)] = Resource.create(resource_dict)
+    return _resources[(service_name, _env)]
+def is_pytest_environment():
+    return "pytest" in sys.modules

letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl

letta-nightly 0.8.0.dev20250606195656py3-none-any.whl → 0.8.2.dev20250606215616py3-none-any.whl