letta-nightly 0.8.0.dev20250606195656__py3-none-any.whl → 0.8.2.dev20250606215616__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +1 -1
  3. letta/agents/letta_agent.py +49 -29
  4. letta/agents/letta_agent_batch.py +1 -2
  5. letta/agents/voice_agent.py +19 -13
  6. letta/agents/voice_sleeptime_agent.py +11 -3
  7. letta/constants.py +18 -0
  8. letta/data_sources/__init__.py +0 -0
  9. letta/data_sources/redis_client.py +282 -0
  10. letta/errors.py +0 -4
  11. letta/functions/function_sets/files.py +58 -0
  12. letta/functions/schema_generator.py +18 -1
  13. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  14. letta/helpers/datetime_helpers.py +47 -3
  15. letta/helpers/decorators.py +69 -0
  16. letta/{services/helpers/noop_helper.py → helpers/singleton.py} +5 -0
  17. letta/interfaces/anthropic_streaming_interface.py +43 -24
  18. letta/interfaces/openai_streaming_interface.py +21 -19
  19. letta/llm_api/anthropic.py +1 -1
  20. letta/llm_api/anthropic_client.py +22 -14
  21. letta/llm_api/google_vertex_client.py +1 -1
  22. letta/llm_api/helpers.py +36 -30
  23. letta/llm_api/llm_api_tools.py +1 -1
  24. letta/llm_api/llm_client_base.py +29 -1
  25. letta/llm_api/openai.py +1 -1
  26. letta/llm_api/openai_client.py +6 -8
  27. letta/local_llm/chat_completion_proxy.py +1 -1
  28. letta/memory.py +1 -1
  29. letta/orm/enums.py +1 -0
  30. letta/orm/file.py +80 -3
  31. letta/orm/files_agents.py +13 -0
  32. letta/orm/sqlalchemy_base.py +34 -11
  33. letta/otel/__init__.py +0 -0
  34. letta/otel/context.py +25 -0
  35. letta/otel/events.py +0 -0
  36. letta/otel/metric_registry.py +122 -0
  37. letta/otel/metrics.py +66 -0
  38. letta/otel/resource.py +26 -0
  39. letta/{tracing.py → otel/tracing.py} +55 -78
  40. letta/plugins/README.md +22 -0
  41. letta/plugins/__init__.py +0 -0
  42. letta/plugins/defaults.py +11 -0
  43. letta/plugins/plugins.py +72 -0
  44. letta/schemas/enums.py +8 -0
  45. letta/schemas/file.py +12 -0
  46. letta/schemas/tool.py +4 -0
  47. letta/server/db.py +7 -7
  48. letta/server/rest_api/app.py +8 -6
  49. letta/server/rest_api/routers/v1/agents.py +37 -36
  50. letta/server/rest_api/routers/v1/groups.py +3 -3
  51. letta/server/rest_api/routers/v1/sources.py +26 -3
  52. letta/server/rest_api/utils.py +9 -6
  53. letta/server/server.py +18 -12
  54. letta/services/agent_manager.py +185 -193
  55. letta/services/block_manager.py +1 -1
  56. letta/services/context_window_calculator/token_counter.py +3 -2
  57. letta/services/file_processor/chunker/line_chunker.py +34 -0
  58. letta/services/file_processor/file_processor.py +40 -11
  59. letta/services/file_processor/parser/mistral_parser.py +11 -1
  60. letta/services/files_agents_manager.py +96 -7
  61. letta/services/group_manager.py +6 -6
  62. letta/services/helpers/agent_manager_helper.py +373 -3
  63. letta/services/identity_manager.py +1 -1
  64. letta/services/job_manager.py +1 -1
  65. letta/services/llm_batch_manager.py +1 -1
  66. letta/services/message_manager.py +1 -1
  67. letta/services/organization_manager.py +1 -1
  68. letta/services/passage_manager.py +1 -1
  69. letta/services/per_agent_lock_manager.py +1 -1
  70. letta/services/provider_manager.py +1 -1
  71. letta/services/sandbox_config_manager.py +1 -1
  72. letta/services/source_manager.py +178 -19
  73. letta/services/step_manager.py +2 -2
  74. letta/services/summarizer/summarizer.py +1 -1
  75. letta/services/telemetry_manager.py +1 -1
  76. letta/services/tool_executor/builtin_tool_executor.py +117 -0
  77. letta/services/tool_executor/composio_tool_executor.py +53 -0
  78. letta/services/tool_executor/core_tool_executor.py +474 -0
  79. letta/services/tool_executor/files_tool_executor.py +131 -0
  80. letta/services/tool_executor/mcp_tool_executor.py +45 -0
  81. letta/services/tool_executor/multi_agent_tool_executor.py +123 -0
  82. letta/services/tool_executor/tool_execution_manager.py +34 -14
  83. letta/services/tool_executor/tool_execution_sandbox.py +1 -1
  84. letta/services/tool_executor/tool_executor.py +3 -802
  85. letta/services/tool_executor/tool_executor_base.py +43 -0
  86. letta/services/tool_manager.py +55 -59
  87. letta/services/tool_sandbox/e2b_sandbox.py +1 -1
  88. letta/services/tool_sandbox/local_sandbox.py +6 -3
  89. letta/services/user_manager.py +6 -3
  90. letta/settings.py +21 -1
  91. letta/utils.py +7 -2
  92. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/METADATA +4 -2
  93. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/RECORD +96 -74
  94. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.0.dev20250606195656.dist-info → letta_nightly-0.8.2.dev20250606215616.dist-info}/entry_points.txt +0 -0
letta/llm_api/openai.py CHANGED
@@ -19,6 +19,7 @@ from letta.llm_api.openai_client import (
19
19
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
20
20
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
21
21
  from letta.log import get_logger
22
+ from letta.otel.tracing import log_event
22
23
  from letta.schemas.llm_config import LLMConfig
23
24
  from letta.schemas.message import Message as _Message
24
25
  from letta.schemas.message import MessageRole as _MessageRole
@@ -36,7 +37,6 @@ from letta.schemas.openai.chat_completion_response import (
36
37
  )
37
38
  from letta.schemas.openai.embedding_response import EmbeddingResponse
38
39
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
39
- from letta.tracing import log_event
40
40
  from letta.utils import get_tool_call_id, smart_urljoin
41
41
 
42
42
  logger = get_logger(__name__)
@@ -8,11 +8,11 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
8
8
 
9
9
  from letta.constants import LETTA_MODEL_ENDPOINT
10
10
  from letta.errors import (
11
+ ContextWindowExceededError,
11
12
  ErrorCode,
12
13
  LLMAuthenticationError,
13
14
  LLMBadRequestError,
14
15
  LLMConnectionError,
15
- LLMContextWindowExceededError,
16
16
  LLMNotFoundError,
17
17
  LLMPermissionDeniedError,
18
18
  LLMRateLimitError,
@@ -23,6 +23,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
23
23
  from letta.llm_api.llm_client_base import LLMClientBase
24
24
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
25
25
  from letta.log import get_logger
26
+ from letta.otel.tracing import trace_method
26
27
  from letta.schemas.embedding_config import EmbeddingConfig
27
28
  from letta.schemas.enums import ProviderCategory, ProviderType
28
29
  from letta.schemas.llm_config import LLMConfig
@@ -34,7 +35,6 @@ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
34
35
  from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
35
36
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
36
37
  from letta.settings import model_settings
37
- from letta.tracing import trace_method
38
38
 
39
39
  logger = get_logger(__name__)
40
40
 
@@ -280,7 +280,7 @@ class OpenAIClient(LLMClientBase):
280
280
  # OpenAI's response structure directly maps to ChatCompletionResponse
281
281
  # We just need to instantiate the Pydantic model for validation and type safety.
282
282
  chat_completion_response = ChatCompletionResponse(**response_data)
283
-
283
+ chat_completion_response = self._fix_truncated_json_response(chat_completion_response)
284
284
  # Unpack inner thoughts if they were embedded in function arguments
285
285
  if llm_config.put_inner_thoughts_in_kwargs:
286
286
  chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
@@ -342,11 +342,9 @@ class OpenAIClient(LLMClientBase):
342
342
  # Check message content if finer-grained errors are needed
343
343
  # Example: if "context_length_exceeded" in str(e): return LLMContextLengthExceededError(...)
344
344
  # TODO: This is a super soft check. Not sure if we can do better, needs more investigation.
345
- if "context" in str(e):
346
- return LLMContextWindowExceededError(
347
- message=f"Bad request to OpenAI (context length exceeded): {str(e)}",
348
- code=ErrorCode.INVALID_ARGUMENT, # Or more specific if detectable
349
- details=e.body,
345
+ if "This model's maximum context length is" in str(e):
346
+ return ContextWindowExceededError(
347
+ message=f"Bad request to OpenAI (context window exceeded): {str(e)}",
350
348
  )
351
349
  else:
352
350
  return LLMBadRequestError(
@@ -20,9 +20,9 @@ from letta.local_llm.utils import count_tokens, get_available_wrappers
20
20
  from letta.local_llm.vllm.api import get_vllm_completion
21
21
  from letta.local_llm.webui.api import get_webui_completion
22
22
  from letta.local_llm.webui.legacy_api import get_webui_completion as get_webui_completion_legacy
23
+ from letta.otel.tracing import log_event
23
24
  from letta.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
24
25
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, Message, ToolCall, UsageStatistics
25
- from letta.tracing import log_event
26
26
  from letta.utils import get_tool_call_id
27
27
 
28
28
  has_shown_warning = False
letta/memory.py CHANGED
@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Callable, Dict, List
3
3
  from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
4
4
  from letta.llm_api.llm_api_tools import create
5
5
  from letta.llm_api.llm_client import LLMClient
6
+ from letta.otel.tracing import trace_method
6
7
  from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
7
8
  from letta.schemas.agent import AgentState
8
9
  from letta.schemas.enums import MessageRole
@@ -10,7 +11,6 @@ from letta.schemas.letta_message_content import TextContent
10
11
  from letta.schemas.memory import Memory
11
12
  from letta.schemas.message import Message
12
13
  from letta.settings import summarizer_settings
13
- from letta.tracing import trace_method
14
14
  from letta.utils import count_tokens, printd
15
15
 
16
16
  if TYPE_CHECKING:
letta/orm/enums.py CHANGED
@@ -9,6 +9,7 @@ class ToolType(str, Enum):
9
9
  LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
10
10
  LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
11
11
  LETTA_BUILTIN = "letta_builtin"
12
+ LETTA_FILES_CORE = "letta_files_core"
12
13
  EXTERNAL_COMPOSIO = "external_composio"
13
14
  EXTERNAL_LANGCHAIN = "external_langchain"
14
15
  # TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?
letta/orm/file.py CHANGED
@@ -1,10 +1,13 @@
1
+ import uuid
1
2
  from typing import TYPE_CHECKING, List, Optional
2
3
 
3
- from sqlalchemy import Integer, String
4
+ from sqlalchemy import ForeignKey, Index, Integer, String, Text, UniqueConstraint, desc
5
+ from sqlalchemy.ext.asyncio import AsyncAttrs
4
6
  from sqlalchemy.orm import Mapped, mapped_column, relationship
5
7
 
6
8
  from letta.orm.mixins import OrganizationMixin, SourceMixin
7
9
  from letta.orm.sqlalchemy_base import SqlalchemyBase
10
+ from letta.schemas.enums import FileProcessingStatus
8
11
  from letta.schemas.file import FileMetadata as PydanticFileMetadata
9
12
 
10
13
  if TYPE_CHECKING:
@@ -14,11 +17,36 @@ if TYPE_CHECKING:
14
17
  from letta.orm.source import Source
15
18
 
16
19
 
17
- class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
20
+ # TODO: Note that this is NOT organization scoped, this is potentially dangerous if we misuse this
21
+ # TODO: This should ONLY be manipulated internally in relation to FileMetadata.content
22
+ # TODO: Leaving organization_id out of this for now for simplicity
23
+ class FileContent(SqlalchemyBase):
24
+ """Holds the full text content of a file (potentially large)."""
25
+
26
+ __tablename__ = "file_contents"
27
+ __table_args__ = (UniqueConstraint("file_id", name="uq_file_contents_file_id"),)
28
+
29
+ # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
30
+ # TODO: Some still rely on the Pydantic object to do this
31
+ id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"file_content-{uuid.uuid4()}")
32
+ file_id: Mapped[str] = mapped_column(ForeignKey("files.id", ondelete="CASCADE"), nullable=False, doc="Foreign key to files table.")
33
+
34
+ text: Mapped[str] = mapped_column(Text, nullable=False, doc="Full plain-text content of the file (e.g., extracted from a PDF).")
35
+
36
+ # back-reference to FileMetadata
37
+ file: Mapped["FileMetadata"] = relationship(back_populates="content", lazy="selectin")
38
+
39
+
40
+ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs):
18
41
  """Represents an uploaded file."""
19
42
 
20
43
  __tablename__ = "files"
21
44
  __pydantic_model__ = PydanticFileMetadata
45
+ __table_args__ = (
46
+ Index("ix_files_org_created", "organization_id", desc("created_at")),
47
+ Index("ix_files_source_created", "source_id", desc("created_at")),
48
+ Index("ix_files_processing_status", "processing_status"),
49
+ )
22
50
 
23
51
  file_name: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The name of the file.")
24
52
  file_path: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The file path on the system.")
@@ -26,6 +54,11 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
26
54
  file_size: Mapped[Optional[int]] = mapped_column(Integer, nullable=True, doc="The size of the file in bytes.")
27
55
  file_creation_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The creation date of the file.")
28
56
  file_last_modified_date: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The last modified date of the file.")
57
+ processing_status: Mapped[FileProcessingStatus] = mapped_column(
58
+ String, default=FileProcessingStatus.PENDING, nullable=False, doc="The current processing status of the file."
59
+ )
60
+
61
+ error_message: Mapped[Optional[str]] = mapped_column(Text, nullable=True, doc="Any error message encountered during processing.")
29
62
 
30
63
  # relationships
31
64
  organization: Mapped["Organization"] = relationship("Organization", back_populates="files", lazy="selectin")
@@ -33,4 +66,48 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin):
33
66
  source_passages: Mapped[List["SourcePassage"]] = relationship(
34
67
  "SourcePassage", back_populates="file", lazy="selectin", cascade="all, delete-orphan"
35
68
  )
36
- file_agents: Mapped[List["FileAgent"]] = relationship("FileAgent", back_populates="file", lazy="selectin")
69
+ file_agents: Mapped[List["FileAgent"]] = relationship(
70
+ "FileAgent",
71
+ back_populates="file",
72
+ lazy="selectin",
73
+ cascade="all, delete-orphan",
74
+ passive_deletes=True, # ← add this
75
+ )
76
+ content: Mapped[Optional["FileContent"]] = relationship(
77
+ "FileContent",
78
+ uselist=False,
79
+ back_populates="file",
80
+ lazy="raise", # raises if you access without eager load
81
+ cascade="all, delete-orphan",
82
+ )
83
+
84
+ async def to_pydantic_async(self, include_content: bool = False) -> PydanticFileMetadata:
85
+ """
86
+ Async version of `to_pydantic` that supports optional relationship loading
87
+ without requiring `expire_on_commit=False`.
88
+ """
89
+
90
+ # Load content relationship if requested
91
+ if include_content:
92
+ content_obj = await self.awaitable_attrs.content
93
+ content_text = content_obj.text if content_obj else None
94
+ else:
95
+ content_text = None
96
+
97
+ return PydanticFileMetadata(
98
+ id=self.id,
99
+ organization_id=self.organization_id,
100
+ source_id=self.source_id,
101
+ file_name=self.file_name,
102
+ file_path=self.file_path,
103
+ file_type=self.file_type,
104
+ file_size=self.file_size,
105
+ file_creation_date=self.file_creation_date,
106
+ file_last_modified_date=self.file_last_modified_date,
107
+ processing_status=self.processing_status,
108
+ error_message=self.error_message,
109
+ created_at=self.created_at,
110
+ updated_at=self.updated_at,
111
+ is_deleted=self.is_deleted,
112
+ content=content_text,
113
+ )
letta/orm/files_agents.py CHANGED
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Optional
5
5
  from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func
6
6
  from sqlalchemy.orm import Mapped, mapped_column, relationship
7
7
 
8
+ from letta.constants import CORE_MEMORY_SOURCE_CHAR_LIMIT, FILE_IS_TRUNCATED_WARNING
8
9
  from letta.orm.mixins import OrganizationMixin
9
10
  from letta.orm.sqlalchemy_base import SqlalchemyBase
10
11
  from letta.schemas.block import Block as PydanticBlock
@@ -26,6 +27,8 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
26
27
  __table_args__ = (
27
28
  Index("ix_files_agents_file_id_agent_id", "file_id", "agent_id"),
28
29
  UniqueConstraint("file_id", "agent_id", name="uq_files_agents_file_agent"),
30
+ UniqueConstraint("agent_id", "file_name", name="uq_files_agents_agent_file_name"),
31
+ Index("ix_files_agents_agent_file_name", "agent_id", "file_name"),
29
32
  )
30
33
  __pydantic_model__ = PydanticFileAgent
31
34
 
@@ -33,6 +36,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
33
36
  # TODO: Some still rely on the Pydantic object to do this
34
37
  id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"file_agent-{uuid.uuid4()}")
35
38
  file_id: Mapped[str] = mapped_column(String, ForeignKey("files.id", ondelete="CASCADE"), primary_key=True, doc="ID of the file.")
39
+ file_name: Mapped[str] = mapped_column(String, nullable=False, doc="Denormalized copy of files.file_name; unique per agent.")
36
40
  agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True, doc="ID of the agent.")
37
41
 
38
42
  is_open: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True, doc="True if the agent currently has the file open.")
@@ -55,11 +59,20 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
55
59
  "FileMetadata",
56
60
  foreign_keys=[file_id],
57
61
  lazy="selectin",
62
+ back_populates="file_agents",
63
+ passive_deletes=True, # ← add this
58
64
  )
59
65
 
60
66
  # TODO: This is temporary as we figure out if we want FileBlock as a first class citizen
61
67
  def to_pydantic_block(self) -> PydanticBlock:
62
68
  visible_content = self.visible_content if self.visible_content and self.is_open else ""
69
+
70
+ # Truncate content and add warnings here when converting from FileAgent to Block
71
+ if len(visible_content) > CORE_MEMORY_SOURCE_CHAR_LIMIT:
72
+ truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
73
+ visible_content = visible_content[: CORE_MEMORY_SOURCE_CHAR_LIMIT - len(truncated_warning)]
74
+ visible_content += truncated_warning
75
+
63
76
  return PydanticBlock(
64
77
  organization_id=self.organization_id,
65
78
  value=visible_content,
@@ -1,13 +1,15 @@
1
+ import inspect
1
2
  from datetime import datetime
2
3
  from enum import Enum
3
4
  from functools import wraps
4
5
  from pprint import pformat
5
6
  from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union
6
7
 
7
- from sqlalchemy import String, and_, delete, func, or_, select, text
8
+ from sqlalchemy import Sequence, String, and_, delete, func, or_, select, text
8
9
  from sqlalchemy.exc import DBAPIError, IntegrityError, TimeoutError
9
10
  from sqlalchemy.ext.asyncio import AsyncSession
10
11
  from sqlalchemy.orm import Mapped, Session, mapped_column
12
+ from sqlalchemy.orm.interfaces import ORMOption
11
13
 
12
14
  from letta.log import get_logger
13
15
  from letta.orm.base import Base, CommonSqlalchemyMetaMixins
@@ -23,16 +25,28 @@ logger = get_logger(__name__)
23
25
 
24
26
  def handle_db_timeout(func):
25
27
  """Decorator to handle SQLAlchemy TimeoutError and wrap it in a custom exception."""
28
+ if not inspect.iscoroutinefunction(func):
26
29
 
27
- @wraps(func)
28
- def wrapper(*args, **kwargs):
29
- try:
30
- return func(*args, **kwargs)
31
- except TimeoutError as e:
32
- logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
33
- raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
30
+ @wraps(func)
31
+ def wrapper(*args, **kwargs):
32
+ try:
33
+ return func(*args, **kwargs)
34
+ except TimeoutError as e:
35
+ logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
36
+ raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
37
+
38
+ return wrapper
39
+ else:
40
+
41
+ @wraps(func)
42
+ async def async_wrapper(*args, **kwargs):
43
+ try:
44
+ return await func(*args, **kwargs)
45
+ except TimeoutError as e:
46
+ logger.error(f"Timeout while executing {func.__name__} with args {args} and kwargs {kwargs}: {e}")
47
+ raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e)
34
48
 
35
- return wrapper
49
+ return async_wrapper
36
50
 
37
51
 
38
52
  class AccessType(str, Enum):
@@ -163,6 +177,7 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
163
177
  join_conditions: Optional[Union[Tuple, List]] = None,
164
178
  identifier_keys: Optional[List[str]] = None,
165
179
  identity_id: Optional[str] = None,
180
+ query_options: Sequence[ORMOption] | None = None, # ← new
166
181
  **kwargs,
167
182
  ) -> List["SqlalchemyBase"]:
168
183
  """
@@ -224,6 +239,9 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
224
239
  identity_id=identity_id,
225
240
  **kwargs,
226
241
  )
242
+ if query_options:
243
+ for opt in query_options:
244
+ query = query.options(opt)
227
245
 
228
246
  # Execute the query
229
247
  results = await db_session.execute(query)
@@ -472,14 +490,19 @@ class SqlalchemyBase(CommonSqlalchemyMetaMixins, Base):
472
490
  Raises:
473
491
  NoResultFound: if the object is not found
474
492
  """
493
+ from letta.settings import settings
494
+
475
495
  identifiers = [] if identifier is None else [identifier]
476
496
  query, query_conditions = cls._read_multiple_preprocess(identifiers, actor, access, access_type, check_is_deleted, **kwargs)
477
- await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
497
+
498
+ if settings.letta_pg_uri_no_default:
499
+ await db_session.execute(text("SET LOCAL enable_seqscan = OFF"))
478
500
  try:
479
501
  result = await db_session.execute(query)
480
502
  item = result.scalar_one_or_none()
481
503
  finally:
482
- await db_session.execute(text("SET LOCAL enable_seqscan = ON"))
504
+ if settings.letta_pg_uri_no_default:
505
+ await db_session.execute(text("SET LOCAL enable_seqscan = ON"))
483
506
 
484
507
  if item is None:
485
508
  raise NoResultFound(f"{cls.__name__} not found with {', '.join(query_conditions if query_conditions else ['no conditions'])}")
letta/otel/__init__.py ADDED
File without changes
letta/otel/context.py ADDED
@@ -0,0 +1,25 @@
1
+ from contextvars import ContextVar
2
+ from typing import Any, Dict
3
+
4
+ # Create context var at module level (outside middleware)
5
+ request_attributes: ContextVar[Dict[str, Any]] = ContextVar("request_attributes", default={})
6
+
7
+
8
+ # Helper functions
9
+ def set_ctx_attributes(attrs: Dict[str, Any]):
10
+ """Set attributes in current context"""
11
+ current = request_attributes.get()
12
+ new_attrs = {**current, **attrs}
13
+ request_attributes.set(new_attrs)
14
+
15
+
16
+ def add_ctx_attribute(key: str, value: Any):
17
+ """Add single attribute to current context"""
18
+ current = request_attributes.get()
19
+ new_attrs = {**current, key: value}
20
+ request_attributes.set(new_attrs)
21
+
22
+
23
+ def get_ctx_attributes() -> Dict[str, Any]:
24
+ """Get all attributes from current context"""
25
+ return request_attributes.get()
letta/otel/events.py ADDED
File without changes
@@ -0,0 +1,122 @@
1
+ from dataclasses import dataclass, field
2
+ from functools import partial
3
+
4
+ from opentelemetry import metrics
5
+ from opentelemetry.metrics import Counter, Histogram
6
+
7
+ from letta.helpers.singleton import singleton
8
+ from letta.otel.metrics import get_letta_meter
9
+
10
+
11
+ @singleton
12
+ @dataclass(frozen=True)
13
+ class MetricRegistry:
14
+ """Registry of all application metrics
15
+
16
+ Metrics are composed of the following:
17
+ - name
18
+ - description
19
+ - unit: UCUM unit of the metric (i.e. 'By' for bytes, 'ms' for milliseconds, '1' for count
20
+ - bucket_bounds (list[float] | None): the explicit bucket bounds for histogram metrics
21
+
22
+ and instruments are of types Counter, Histogram, and Gauge
23
+
24
+ The relationship between the various models is as follows:
25
+ project_id -N:1-> base_template_id -N:1-> template_id -N:1-> agent_id
26
+ agent_id -1:1+-> model_name
27
+ agent_id -1:N -> tool_name
28
+ """
29
+
30
+ Instrument = Counter | Histogram
31
+ _metrics: dict[str, Instrument] = field(default_factory=dict, init=False)
32
+ _meter: metrics.Meter = field(init=False)
33
+
34
+ def __post_init__(self):
35
+ object.__setattr__(self, "_meter", get_letta_meter())
36
+
37
+ def _get_or_create_metric(self, name: str, factory):
38
+ """Lazy initialization of metrics."""
39
+ if name not in self._metrics:
40
+ self._metrics[name] = factory()
41
+ return self._metrics[name]
42
+
43
+ # (includes base attributes: project, template_base, template, agent)
44
+ @property
45
+ def user_message_counter(self) -> Counter:
46
+ return self._get_or_create_metric(
47
+ "count_user_message",
48
+ partial(
49
+ self._meter.create_counter,
50
+ name="count_user_message",
51
+ description="Counts the number of messages sent by the user",
52
+ unit="1",
53
+ ),
54
+ )
55
+
56
+ # (includes tool_name, tool_execution_success, & step_id on failure)
57
+ @property
58
+ def tool_execution_counter(self) -> Counter:
59
+ return self._get_or_create_metric(
60
+ "count_tool_execution",
61
+ partial(self._meter.create_counter, name="count_tool_execution", description="Counts the number of tools executed.", unit="1"),
62
+ )
63
+
64
+ # project_id + model
65
+ @property
66
+ def ttft_ms_histogram(self) -> Histogram:
67
+ return self._get_or_create_metric(
68
+ "hist_ttft_ms",
69
+ partial(self._meter.create_histogram, name="hist_ttft_ms", description="Histogram for the Time to First Token (ms)", unit="ms"),
70
+ )
71
+
72
+ # (includes model name)
73
+ @property
74
+ def llm_execution_time_ms_histogram(self) -> Histogram:
75
+ return self._get_or_create_metric(
76
+ "hist_llm_execution_time_ms",
77
+ partial(
78
+ self._meter.create_histogram,
79
+ name="hist_llm_execution_time_ms",
80
+ description="Histogram for LLM execution time (ms)",
81
+ unit="ms",
82
+ ),
83
+ )
84
+
85
+ # (includes tool name)
86
+ @property
87
+ def tool_execution_time_ms_histogram(self) -> Histogram:
88
+ return self._get_or_create_metric(
89
+ "hist_tool_execution_time_ms",
90
+ partial(
91
+ self._meter.create_histogram,
92
+ name="hist_tool_execution_time_ms",
93
+ description="Histogram for tool execution time (ms)",
94
+ unit="ms",
95
+ ),
96
+ )
97
+
98
+ # TODO (cliandy): instrument this
99
+ @property
100
+ def message_cost(self) -> Histogram:
101
+ return self._get_or_create_metric(
102
+ "hist_message_cost_usd",
103
+ partial(
104
+ self._meter.create_histogram,
105
+ name="hist_message_cost_usd",
106
+ description="Histogram for cost of messages (usd) per step",
107
+ unit="usd",
108
+ ),
109
+ )
110
+
111
+ # (includes model name)
112
+ @property
113
+ def message_output_tokens(self) -> Histogram:
114
+ return self._get_or_create_metric(
115
+ "hist_message_output_tokens",
116
+ partial(
117
+ self._meter.create_histogram,
118
+ name="hist_message_output_tokens",
119
+ description="Histogram for output tokens generated by LLM per step",
120
+ unit="1",
121
+ ),
122
+ )
letta/otel/metrics.py ADDED
@@ -0,0 +1,66 @@
1
+ from fastapi import FastAPI, Request
2
+ from opentelemetry import metrics
3
+ from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
4
+ from opentelemetry.metrics import NoOpMeter
5
+ from opentelemetry.sdk.metrics import MeterProvider
6
+ from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
7
+
8
+ from letta.log import get_logger
9
+ from letta.otel.context import add_ctx_attribute
10
+ from letta.otel.resource import get_resource, is_pytest_environment
11
+
12
+ logger = get_logger(__name__)
13
+
14
+ _meter: metrics.Meter = NoOpMeter("noop")
15
+ _is_metrics_initialized: bool = False
16
+
17
+
18
+ async def _otel_metric_middleware(request: Request, call_next):
19
+ if not _is_metrics_initialized:
20
+ return await call_next(request)
21
+
22
+ header_attributes = {
23
+ "x-organization-id": "organization.id",
24
+ "x-project-id": "project.id",
25
+ "x-base-template-id": "base_template.id",
26
+ "x-template-id": "template.id",
27
+ "x-agent-id": "agent.id",
28
+ }
29
+ try:
30
+ for header_key, otel_key in header_attributes.items():
31
+ header_value = request.headers.get(header_key)
32
+ if header_value:
33
+ add_ctx_attribute(otel_key, header_value)
34
+ return await call_next(request)
35
+ except Exception:
36
+ raise
37
+
38
+
39
+ def setup_metrics(
40
+ endpoint: str,
41
+ app: FastAPI | None = None,
42
+ service_name: str = "memgpt-server",
43
+ ) -> None:
44
+ if is_pytest_environment():
45
+ return
46
+ assert endpoint
47
+
48
+ global _is_metrics_initialized, _meter
49
+
50
+ otlp_metric_exporter = OTLPMetricExporter(endpoint=endpoint)
51
+ metric_reader = PeriodicExportingMetricReader(exporter=otlp_metric_exporter)
52
+ meter_provider = MeterProvider(resource=get_resource(service_name), metric_readers=[metric_reader])
53
+ metrics.set_meter_provider(meter_provider)
54
+ _meter = metrics.get_meter(__name__)
55
+
56
+ if app:
57
+ app.middleware("http")(_otel_metric_middleware)
58
+
59
+ _is_metrics_initialized = True
60
+
61
+
62
+ def get_letta_meter() -> metrics.Meter | None:
63
+ """Returns the global letta meter if metrics are initialized."""
64
+ if not _is_metrics_initialized or isinstance(_meter, NoOpMeter):
65
+ logger.warning("Metrics are not initialized or meter is not available.")
66
+ return _meter
letta/otel/resource.py ADDED
@@ -0,0 +1,26 @@
1
+ import os
2
+ import sys
3
+ import uuid
4
+
5
+ from opentelemetry.sdk.resources import Resource
6
+
7
+ from letta import __version__ as letta_version
8
+
9
+ _resources = {}
10
+
11
+
12
+ def get_resource(service_name: str) -> Resource:
13
+ _env = os.getenv("LETTA_ENVIRONMENT")
14
+ if service_name not in _resources:
15
+ resource_dict = {
16
+ "service.name": service_name,
17
+ "letta.version": letta_version,
18
+ }
19
+ if _env != "PRODUCTION":
20
+ resource_dict["device.id"] = uuid.getnode() # MAC address as unique device identifier,
21
+ _resources[(service_name, _env)] = Resource.create(resource_dict)
22
+ return _resources[(service_name, _env)]
23
+
24
+
25
+ def is_pytest_environment():
26
+ return "pytest" in sys.modules