letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +47 -12
  3. letta/agents/base_agent.py +7 -4
  4. letta/agents/helpers.py +52 -0
  5. letta/agents/letta_agent.py +105 -42
  6. letta/agents/voice_agent.py +2 -2
  7. letta/constants.py +13 -1
  8. letta/errors.py +10 -3
  9. letta/functions/function_sets/base.py +65 -0
  10. letta/functions/interface.py +2 -2
  11. letta/functions/mcp_client/base_client.py +18 -1
  12. letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
  13. letta/groups/helpers.py +113 -0
  14. letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
  15. letta/groups/sleeptime_multi_agent.py +259 -0
  16. letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
  17. letta/helpers/converters.py +109 -7
  18. letta/helpers/message_helper.py +1 -0
  19. letta/helpers/tool_rule_solver.py +40 -23
  20. letta/interface.py +12 -5
  21. letta/interfaces/anthropic_streaming_interface.py +329 -0
  22. letta/llm_api/anthropic.py +12 -1
  23. letta/llm_api/anthropic_client.py +65 -14
  24. letta/llm_api/azure_openai.py +2 -2
  25. letta/llm_api/google_ai_client.py +13 -2
  26. letta/llm_api/google_constants.py +3 -0
  27. letta/llm_api/google_vertex_client.py +2 -2
  28. letta/llm_api/llm_api_tools.py +1 -1
  29. letta/llm_api/llm_client.py +7 -0
  30. letta/llm_api/llm_client_base.py +2 -7
  31. letta/llm_api/openai.py +7 -1
  32. letta/llm_api/openai_client.py +250 -0
  33. letta/orm/__init__.py +4 -0
  34. letta/orm/agent.py +6 -0
  35. letta/orm/block.py +32 -2
  36. letta/orm/block_history.py +46 -0
  37. letta/orm/custom_columns.py +60 -0
  38. letta/orm/enums.py +7 -0
  39. letta/orm/group.py +6 -0
  40. letta/orm/groups_blocks.py +13 -0
  41. letta/orm/llm_batch_items.py +55 -0
  42. letta/orm/llm_batch_job.py +48 -0
  43. letta/orm/message.py +7 -1
  44. letta/orm/organization.py +2 -0
  45. letta/orm/sqlalchemy_base.py +18 -15
  46. letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
  47. letta/prompts/system/sleeptime.txt +26 -0
  48. letta/schemas/agent.py +13 -1
  49. letta/schemas/enums.py +17 -2
  50. letta/schemas/group.py +14 -1
  51. letta/schemas/letta_message.py +5 -3
  52. letta/schemas/llm_batch_job.py +53 -0
  53. letta/schemas/llm_config.py +14 -4
  54. letta/schemas/message.py +44 -0
  55. letta/schemas/tool.py +3 -0
  56. letta/schemas/usage.py +1 -0
  57. letta/server/db.py +2 -0
  58. letta/server/rest_api/app.py +1 -1
  59. letta/server/rest_api/chat_completions_interface.py +8 -3
  60. letta/server/rest_api/interface.py +36 -7
  61. letta/server/rest_api/routers/v1/agents.py +53 -39
  62. letta/server/rest_api/routers/v1/runs.py +14 -2
  63. letta/server/rest_api/utils.py +15 -4
  64. letta/server/server.py +120 -71
  65. letta/services/agent_manager.py +70 -6
  66. letta/services/block_manager.py +190 -2
  67. letta/services/group_manager.py +68 -0
  68. letta/services/helpers/agent_manager_helper.py +6 -4
  69. letta/services/llm_batch_manager.py +139 -0
  70. letta/services/message_manager.py +17 -31
  71. letta/services/tool_executor/tool_execution_sandbox.py +1 -3
  72. letta/services/tool_executor/tool_executor.py +9 -20
  73. letta/services/tool_manager.py +14 -3
  74. letta/services/tool_sandbox/__init__.py +0 -0
  75. letta/services/tool_sandbox/base.py +188 -0
  76. letta/services/tool_sandbox/e2b_sandbox.py +116 -0
  77. letta/services/tool_sandbox/local_sandbox.py +221 -0
  78. letta/sleeptime_agent.py +61 -0
  79. letta/streaming_interface.py +20 -10
  80. letta/utils.py +4 -0
  81. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
  82. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
  83. letta/offline_memory_agent.py +0 -173
  84. letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
  85. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
  86. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
  87. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,7 @@ import uuid
2
2
  from typing import List, Optional, Tuple
3
3
 
4
4
  import requests
5
+ from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
5
6
 
6
7
  from letta.constants import NON_USER_MSG_PREFIX
7
8
  from letta.helpers.datetime_helpers import get_utc_time
@@ -36,7 +37,7 @@ class GoogleAIClient(LLMClientBase):
36
37
  self,
37
38
  messages: List[PydanticMessage],
38
39
  tools: List[dict],
39
- tool_call: Optional[str],
40
+ force_tool_call: Optional[str] = None,
40
41
  ) -> dict:
41
42
  """
42
43
  Constructs a request object in the expected data format for this client.
@@ -50,7 +51,7 @@ class GoogleAIClient(LLMClientBase):
50
51
  [m.to_google_ai_dict() for m in messages],
51
52
  )
52
53
 
53
- return {
54
+ request_data = {
54
55
  "contents": contents,
55
56
  "tools": tools,
56
57
  "generation_config": {
@@ -59,6 +60,16 @@ class GoogleAIClient(LLMClientBase):
59
60
  },
60
61
  }
61
62
 
63
+ # write tool config
64
+ tool_config = ToolConfig(
65
+ function_calling_config=FunctionCallingConfig(
66
+ # ANY mode forces the model to predict only function calls
67
+ mode=FunctionCallingConfigMode.ANY,
68
+ )
69
+ )
70
+ request_data["tool_config"] = tool_config.model_dump()
71
+ return request_data
72
+
62
73
  def convert_response_to_chat_completion(
63
74
  self,
64
75
  response_data: dict,
@@ -1,4 +1,5 @@
1
1
  GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
2
+ "gemini-2.5-pro-exp-03-25": 1048576,
2
3
  "gemini-2.0-flash-001": 1048576,
3
4
  "gemini-2.0-pro-exp-02-05": 2097152,
4
5
  "gemini-2.0-flash-lite-preview-02-05": 1048576,
@@ -9,4 +10,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
9
10
  "gemini-1.0-pro-vision": 16384,
10
11
  }
11
12
 
13
+ GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
14
+
12
15
  GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
@@ -38,12 +38,12 @@ class GoogleVertexClient(GoogleAIClient):
38
38
  self,
39
39
  messages: List[PydanticMessage],
40
40
  tools: List[dict],
41
- tool_call: Optional[str],
41
+ force_tool_call: Optional[str] = None,
42
42
  ) -> dict:
43
43
  """
44
44
  Constructs a request object in the expected data format for this client.
45
45
  """
46
- request_data = super().build_request_data(messages, tools, tool_call)
46
+ request_data = super().build_request_data(messages, tools, force_tool_call)
47
47
  request_data["config"] = request_data.pop("generation_config")
48
48
  request_data["config"]["tools"] = request_data.pop("tools")
49
49
 
@@ -340,7 +340,7 @@ def create(
340
340
  tool_choice = {"type": "any", "disable_parallel_tool_use": True}
341
341
  else:
342
342
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
343
- tools = [{"type": "function", "function": f} for f in functions]
343
+ tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
344
344
 
345
345
  chat_completion_request = ChatCompletionRequest(
346
346
  model=llm_config.model,
@@ -49,5 +49,12 @@ class LLMClient:
49
49
  llm_config=llm_config,
50
50
  put_inner_thoughts_first=put_inner_thoughts_first,
51
51
  )
52
+ case "openai":
53
+ from letta.llm_api.openai_client import OpenAIClient
54
+
55
+ return OpenAIClient(
56
+ llm_config=llm_config,
57
+ put_inner_thoughts_first=put_inner_thoughts_first,
58
+ )
52
59
  case _:
53
60
  return None
@@ -32,9 +32,7 @@ class LLMClientBase:
32
32
  self,
33
33
  messages: List[Message],
34
34
  tools: Optional[List[dict]] = None, # TODO: change to Tool object
35
- tool_call: Optional[str] = None,
36
35
  stream: bool = False,
37
- first_message: bool = False,
38
36
  force_tool_call: Optional[str] = None,
39
37
  ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
40
38
  """
@@ -42,7 +40,7 @@ class LLMClientBase:
42
40
  If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
43
41
  Otherwise returns a ChatCompletionResponse.
44
42
  """
45
- request_data = self.build_request_data(messages, tools, tool_call)
43
+ request_data = self.build_request_data(messages, tools, force_tool_call)
46
44
 
47
45
  try:
48
46
  log_event(name="llm_request_sent", attributes=request_data)
@@ -60,9 +58,7 @@ class LLMClientBase:
60
58
  self,
61
59
  messages: List[Message],
62
60
  tools: Optional[List[dict]] = None, # TODO: change to Tool object
63
- tool_call: Optional[str] = None,
64
61
  stream: bool = False,
65
- first_message: bool = False,
66
62
  force_tool_call: Optional[str] = None,
67
63
  ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
68
64
  """
@@ -70,7 +66,7 @@ class LLMClientBase:
70
66
  If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
71
67
  Otherwise returns a ChatCompletionResponse.
72
68
  """
73
- request_data = self.build_request_data(messages, tools, tool_call, force_tool_call)
69
+ request_data = self.build_request_data(messages, tools, force_tool_call)
74
70
  response_data = {}
75
71
 
76
72
  try:
@@ -90,7 +86,6 @@ class LLMClientBase:
90
86
  self,
91
87
  messages: List[Message],
92
88
  tools: List[dict],
93
- tool_call: Optional[str],
94
89
  force_tool_call: Optional[str] = None,
95
90
  ) -> dict:
96
91
  """
letta/llm_api/openai.py CHANGED
@@ -252,6 +252,8 @@ def openai_chat_completions_process_stream(
252
252
 
253
253
  n_chunks = 0 # approx == n_tokens
254
254
  chunk_idx = 0
255
+ prev_message_type = None
256
+ message_idx = 0
255
257
  try:
256
258
  for chat_completion_chunk in openai_chat_completions_request_stream(
257
259
  url=url, api_key=api_key, chat_completion_request=chat_completion_request
@@ -268,13 +270,17 @@ def openai_chat_completions_process_stream(
268
270
 
269
271
  if stream_interface:
270
272
  if isinstance(stream_interface, AgentChunkStreamingInterface):
271
- stream_interface.process_chunk(
273
+ message_type = stream_interface.process_chunk(
272
274
  chat_completion_chunk,
273
275
  message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
274
276
  message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
275
277
  expect_reasoning_content=expect_reasoning_content,
276
278
  name=name,
279
+ message_index=message_idx,
277
280
  )
281
+ if message_type != prev_message_type and message_type is not None:
282
+ message_idx += 1
283
+ prev_message_type = message_type
278
284
  elif isinstance(stream_interface, AgentRefreshStreamingInterface):
279
285
  stream_interface.process_refresh(chat_completion_response)
280
286
  else:
@@ -0,0 +1,250 @@
1
+ import os
2
+ from typing import List, Optional
3
+
4
+ import openai
5
+ from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
6
+ from openai.types.chat.chat_completion import ChatCompletion
7
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
8
+
9
+ from letta.errors import (
10
+ ErrorCode,
11
+ LLMAuthenticationError,
12
+ LLMBadRequestError,
13
+ LLMConnectionError,
14
+ LLMNotFoundError,
15
+ LLMPermissionDeniedError,
16
+ LLMRateLimitError,
17
+ LLMServerError,
18
+ LLMUnprocessableEntityError,
19
+ )
20
+ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, unpack_all_inner_thoughts_from_kwargs
21
+ from letta.llm_api.llm_client_base import LLMClientBase
22
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
23
+ from letta.log import get_logger
24
+ from letta.schemas.message import Message as PydanticMessage
25
+ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
26
+ from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
27
+ from letta.schemas.openai.chat_completion_request import FunctionSchema
28
+ from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
29
+ from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
30
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
31
+ from letta.settings import model_settings
32
+
33
+ logger = get_logger(__name__)
34
+
35
+
36
+ class OpenAIClient(LLMClientBase):
37
+ def _prepare_client_kwargs(self) -> dict:
38
+ api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
39
+ # supposedly the openai python client requires a dummy API key
40
+ api_key = api_key or "DUMMY_API_KEY"
41
+ kwargs = {"api_key": api_key, "base_url": self.llm_config.model_endpoint}
42
+
43
+ return kwargs
44
+
45
+ def build_request_data(
46
+ self,
47
+ messages: List[PydanticMessage],
48
+ tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
49
+ force_tool_call: Optional[str] = None,
50
+ ) -> dict:
51
+ """
52
+ Constructs a request object in the expected data format for the OpenAI API.
53
+ """
54
+ if tools and self.llm_config.put_inner_thoughts_in_kwargs:
55
+ # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
56
+ # TODO(fix)
57
+ inner_thoughts_desc = (
58
+ INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in self.llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
59
+ )
60
+ tools = add_inner_thoughts_to_functions(
61
+ functions=tools,
62
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
63
+ inner_thoughts_description=inner_thoughts_desc,
64
+ put_inner_thoughts_first=True,
65
+ )
66
+
67
+ openai_message_list = [
68
+ cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs))
69
+ for m in messages
70
+ ]
71
+
72
+ if self.llm_config.model:
73
+ model = self.llm_config.model
74
+ else:
75
+ logger.warning(f"Model type not set in llm_config: {self.llm_config.model_dump_json(indent=4)}")
76
+ model = None
77
+
78
+ # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
79
+ # TODO(matt) move into LLMConfig
80
+ # TODO: This vllm checking is very brittle and is a patch at most
81
+ if self.llm_config.model_endpoint == "https://inference.memgpt.ai" or (self.llm_config.handle and "vllm" in self.llm_config.handle):
82
+ tool_choice = "auto" # TODO change to "required" once proxy supports it
83
+ else:
84
+ tool_choice = "required"
85
+
86
+ if force_tool_call is not None:
87
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
88
+
89
+ data = ChatCompletionRequest(
90
+ model=model,
91
+ messages=openai_message_list,
92
+ tools=[OpenAITool(type="function", function=f) for f in tools] if tools else None,
93
+ tool_choice=tool_choice,
94
+ user=str(),
95
+ max_completion_tokens=self.llm_config.max_tokens,
96
+ temperature=self.llm_config.temperature,
97
+ )
98
+
99
+ if "inference.memgpt.ai" in self.llm_config.model_endpoint:
100
+ # override user id for inference.memgpt.ai
101
+ import uuid
102
+
103
+ data.user = str(uuid.UUID(int=0))
104
+ data.model = "memgpt-openai"
105
+
106
+ if data.tools is not None and len(data.tools) > 0:
107
+ # Convert to structured output style (which has 'strict' and no optionals)
108
+ for tool in data.tools:
109
+ try:
110
+ structured_output_version = convert_to_structured_output(tool.function.model_dump())
111
+ tool.function = FunctionSchema(**structured_output_version)
112
+ except ValueError as e:
113
+ logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
114
+
115
+ return data.model_dump(exclude_unset=True)
116
+
117
+ def request(self, request_data: dict) -> dict:
118
+ """
119
+ Performs underlying synchronous request to OpenAI API and returns raw response dict.
120
+ """
121
+ client = OpenAI(**self._prepare_client_kwargs())
122
+
123
+ response: ChatCompletion = client.chat.completions.create(**request_data)
124
+ return response.model_dump()
125
+
126
+ async def request_async(self, request_data: dict) -> dict:
127
+ """
128
+ Performs underlying asynchronous request to OpenAI API and returns raw response dict.
129
+ """
130
+ client = AsyncOpenAI(**self._prepare_client_kwargs())
131
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
132
+ return response.model_dump()
133
+
134
+ def convert_response_to_chat_completion(
135
+ self,
136
+ response_data: dict,
137
+ input_messages: List[PydanticMessage], # Included for consistency, maybe used later
138
+ ) -> ChatCompletionResponse:
139
+ """
140
+ Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
141
+ Handles potential extraction of inner thoughts if they were added via kwargs.
142
+ """
143
+ # OpenAI's response structure directly maps to ChatCompletionResponse
144
+ # We just need to instantiate the Pydantic model for validation and type safety.
145
+ chat_completion_response = ChatCompletionResponse(**response_data)
146
+
147
+ # Unpack inner thoughts if they were embedded in function arguments
148
+ if self.llm_config.put_inner_thoughts_in_kwargs:
149
+ chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
150
+ response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
151
+ )
152
+
153
+ return chat_completion_response
154
+
155
+ def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
156
+ """
157
+ Performs underlying streaming request to OpenAI and returns the stream iterator.
158
+ """
159
+ client = OpenAI(**self._prepare_client_kwargs())
160
+ response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
161
+ return response_stream
162
+
163
+ async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
164
+ """
165
+ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
166
+ """
167
+ client = AsyncOpenAI(**self._prepare_client_kwargs())
168
+ response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
169
+ return response_stream
170
+
171
+ def handle_llm_error(self, e: Exception) -> Exception:
172
+ """
173
+ Maps OpenAI-specific errors to common LLMError types.
174
+ """
175
+ if isinstance(e, openai.APIConnectionError):
176
+ logger.warning(f"[OpenAI] API connection error: {e}")
177
+ return LLMConnectionError(
178
+ message=f"Failed to connect to OpenAI: {str(e)}",
179
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
180
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
181
+ )
182
+
183
+ if isinstance(e, openai.RateLimitError):
184
+ logger.warning(f"[OpenAI] Rate limited (429). Consider backoff. Error: {e}")
185
+ return LLMRateLimitError(
186
+ message=f"Rate limited by OpenAI: {str(e)}",
187
+ code=ErrorCode.RATE_LIMIT_EXCEEDED,
188
+ details=e.body, # Include body which often has rate limit details
189
+ )
190
+
191
+ if isinstance(e, openai.BadRequestError):
192
+ logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
193
+ # BadRequestError can signify different issues (e.g., invalid args, context length)
194
+ # Check message content if finer-grained errors are needed
195
+ # Example: if "context_length_exceeded" in str(e): return LLMContextLengthExceededError(...)
196
+ return LLMBadRequestError(
197
+ message=f"Bad request to OpenAI: {str(e)}",
198
+ code=ErrorCode.INVALID_ARGUMENT, # Or more specific if detectable
199
+ details=e.body,
200
+ )
201
+
202
+ if isinstance(e, openai.AuthenticationError):
203
+ logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
204
+ return LLMAuthenticationError(
205
+ message=f"Authentication failed with OpenAI: {str(e)}", code=ErrorCode.UNAUTHENTICATED, details=e.body
206
+ )
207
+
208
+ if isinstance(e, openai.PermissionDeniedError):
209
+ logger.error(f"[OpenAI] Permission denied (403): {str(e)}") # More severe log level
210
+ return LLMPermissionDeniedError(
211
+ message=f"Permission denied by OpenAI: {str(e)}", code=ErrorCode.PERMISSION_DENIED, details=e.body
212
+ )
213
+
214
+ if isinstance(e, openai.NotFoundError):
215
+ logger.warning(f"[OpenAI] Resource not found (404): {str(e)}")
216
+ # Could be invalid model name, etc.
217
+ return LLMNotFoundError(message=f"Resource not found in OpenAI: {str(e)}", code=ErrorCode.NOT_FOUND, details=e.body)
218
+
219
+ if isinstance(e, openai.UnprocessableEntityError):
220
+ logger.warning(f"[OpenAI] Unprocessable entity (422): {str(e)}")
221
+ return LLMUnprocessableEntityError(
222
+ message=f"Invalid request content for OpenAI: {str(e)}",
223
+ code=ErrorCode.INVALID_ARGUMENT, # Usually validation errors
224
+ details=e.body,
225
+ )
226
+
227
+ # General API error catch-all
228
+ if isinstance(e, openai.APIStatusError):
229
+ logger.warning(f"[OpenAI] API status error ({e.status_code}): {str(e)}")
230
+ # Map based on status code potentially
231
+ if e.status_code >= 500:
232
+ error_cls = LLMServerError
233
+ error_code = ErrorCode.INTERNAL_SERVER_ERROR
234
+ else:
235
+ # Treat other 4xx as bad requests if not caught above
236
+ error_cls = LLMBadRequestError
237
+ error_code = ErrorCode.INVALID_ARGUMENT
238
+
239
+ return error_cls(
240
+ message=f"OpenAI API error: {str(e)}",
241
+ code=error_code,
242
+ details={
243
+ "status_code": e.status_code,
244
+ "response": str(e.response),
245
+ "body": e.body,
246
+ },
247
+ )
248
+
249
+ # Fallback for unexpected errors
250
+ return super().handle_llm_error(e)
letta/orm/__init__.py CHANGED
@@ -2,15 +2,19 @@ from letta.orm.agent import Agent
2
2
  from letta.orm.agents_tags import AgentsTags
3
3
  from letta.orm.base import Base
4
4
  from letta.orm.block import Block
5
+ from letta.orm.block_history import BlockHistory
5
6
  from letta.orm.blocks_agents import BlocksAgents
6
7
  from letta.orm.file import FileMetadata
7
8
  from letta.orm.group import Group
8
9
  from letta.orm.groups_agents import GroupsAgents
10
+ from letta.orm.groups_blocks import GroupsBlocks
9
11
  from letta.orm.identities_agents import IdentitiesAgents
10
12
  from letta.orm.identities_blocks import IdentitiesBlocks
11
13
  from letta.orm.identity import Identity
12
14
  from letta.orm.job import Job
13
15
  from letta.orm.job_messages import JobMessage
16
+ from letta.orm.llm_batch_items import LLMBatchItem
17
+ from letta.orm.llm_batch_job import LLMBatchJob
14
18
  from letta.orm.message import Message
15
19
  from letta.orm.organization import Organization
16
20
  from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
letta/orm/agent.py CHANGED
@@ -68,6 +68,9 @@ class Agent(SqlalchemyBase, OrganizationMixin):
68
68
  message_buffer_autoclear: Mapped[bool] = mapped_column(
69
69
  Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
70
70
  )
71
+ enable_sleeptime: Mapped[Optional[bool]] = mapped_column(
72
+ Boolean, doc="If set to True, memory management will move to a background agent thread."
73
+ )
71
74
 
72
75
  # relationships
73
76
  organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
@@ -141,6 +144,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
141
144
  viewonly=True,
142
145
  back_populates="manager_agent",
143
146
  )
147
+ batch_items: Mapped[List["LLMBatchItem"]] = relationship("LLMBatchItem", back_populates="agent", lazy="selectin")
144
148
 
145
149
  def to_pydantic(self, include_relationships: Optional[Set[str]] = None) -> PydanticAgentState:
146
150
  """
@@ -190,6 +194,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
190
194
  "identity_ids": [],
191
195
  "multi_agent_group": None,
192
196
  "tool_exec_environment_variables": [],
197
+ "enable_sleeptime": None,
193
198
  }
194
199
 
195
200
  # Optional fields: only included if requested
@@ -201,6 +206,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
201
206
  "identity_ids": lambda: [i.id for i in self.identities],
202
207
  "multi_agent_group": lambda: self.multi_agent_group,
203
208
  "tool_exec_environment_variables": lambda: self.tool_exec_environment_variables,
209
+ "enable_sleeptime": lambda: self.enable_sleeptime,
204
210
  }
205
211
 
206
212
  include_relationships = set(optional_fields.keys() if include_relationships is None else include_relationships)
letta/orm/block.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from typing import TYPE_CHECKING, List, Optional, Type
2
2
 
3
- from sqlalchemy import JSON, BigInteger, Index, Integer, UniqueConstraint, event
4
- from sqlalchemy.orm import Mapped, attributes, mapped_column, relationship
3
+ from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, UniqueConstraint, event
4
+ from sqlalchemy.orm import Mapped, attributes, declared_attr, mapped_column, relationship
5
5
 
6
6
  from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
7
+ from letta.orm.block_history import BlockHistory
7
8
  from letta.orm.blocks_agents import BlocksAgents
8
9
  from letta.orm.mixins import OrganizationMixin
9
10
  from letta.orm.sqlalchemy_base import SqlalchemyBase
@@ -38,6 +39,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
38
39
  limit: Mapped[BigInteger] = mapped_column(Integer, default=CORE_MEMORY_BLOCK_CHAR_LIMIT, doc="Character limit of the block.")
39
40
  metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default={}, doc="arbitrary information related to the block.")
40
41
 
42
+ # history pointers / locking mechanisms
43
+ current_history_entry_id: Mapped[Optional[str]] = mapped_column(
44
+ String, ForeignKey("block_history.id", name="fk_block_current_history_entry", use_alter=True), nullable=True, index=True
45
+ )
46
+ version: Mapped[int] = mapped_column(
47
+ Integer, nullable=False, default=1, server_default="1", doc="Optimistic locking version counter, incremented on each state change."
48
+ )
49
+ # NOTE: This takes advantage of built-in optimistic locking functionality by SqlAlchemy
50
+ # https://docs.sqlalchemy.org/en/20/orm/versioning.html
51
+ __mapper_args__ = {"version_id_col": version}
52
+
41
53
  # relationships
42
54
  organization: Mapped[Optional["Organization"]] = relationship("Organization")
43
55
  agents: Mapped[List["Agent"]] = relationship(
@@ -55,6 +67,13 @@ class Block(OrganizationMixin, SqlalchemyBase):
55
67
  back_populates="blocks",
56
68
  passive_deletes=True,
57
69
  )
70
+ groups: Mapped[List["Group"]] = relationship(
71
+ "Group",
72
+ secondary="groups_blocks",
73
+ lazy="selectin",
74
+ back_populates="shared_blocks",
75
+ passive_deletes=True,
76
+ )
58
77
 
59
78
  def to_pydantic(self) -> Type:
60
79
  match self.label:
@@ -68,6 +87,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
68
87
  model_dict["metadata"] = self.metadata_
69
88
  return Schema.model_validate(model_dict)
70
89
 
90
+ @declared_attr
91
+ def current_history_entry(cls) -> Mapped[Optional["BlockHistory"]]:
92
+ # Relationship to easily load the specific history entry that is current
93
+ return relationship(
94
+ "BlockHistory",
95
+ primaryjoin=lambda: cls.current_history_entry_id == BlockHistory.id,
96
+ foreign_keys=[cls.current_history_entry_id],
97
+ lazy="joined", # Typically want current history details readily available
98
+ post_update=True,
99
+ ) # Helps manage potential FK cycles
100
+
71
101
 
72
102
  @event.listens_for(Block, "after_update") # Changed from 'before_update'
73
103
  def block_before_update(mapper, connection, target):
@@ -0,0 +1,46 @@
1
+ import uuid
2
+ from typing import Optional
3
+
4
+ from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, Text
5
+ from sqlalchemy.orm import Mapped, mapped_column
6
+
7
+ from letta.orm.enums import ActorType
8
+ from letta.orm.mixins import OrganizationMixin
9
+ from letta.orm.sqlalchemy_base import SqlalchemyBase
10
+
11
+
12
+ class BlockHistory(OrganizationMixin, SqlalchemyBase):
13
+ """Stores a single historical state of a Block for undo/redo functionality."""
14
+
15
+ __tablename__ = "block_history"
16
+
17
+ __table_args__ = (
18
+ # PRIMARY lookup index for finding specific history entries & ordering
19
+ Index("ix_block_history_block_id_sequence", "block_id", "sequence_number", unique=True),
20
+ )
21
+
22
+ # agent generates its own id
23
+ # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
24
+ # TODO: Some still rely on the Pydantic object to do this
25
+ id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"block_hist-{uuid.uuid4()}")
26
+
27
+ # Snapshot State Fields (Copied from Block)
28
+ description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
29
+ label: Mapped[str] = mapped_column(String, nullable=False)
30
+ value: Mapped[str] = mapped_column(Text, nullable=False)
31
+ limit: Mapped[BigInteger] = mapped_column(BigInteger, nullable=False)
32
+ metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
33
+
34
+ # Editor info
35
+ # These are not made to be FKs because these may not always exist (e.g. a User be deleted after they made a checkpoint)
36
+ actor_type: Mapped[Optional[ActorType]] = mapped_column(String, nullable=True)
37
+ actor_id: Mapped[Optional[str]] = mapped_column(String, nullable=True)
38
+
39
+ # Relationships
40
+ block_id: Mapped[str] = mapped_column(
41
+ String, ForeignKey("block.id", ondelete="CASCADE"), nullable=False # History deleted if Block is deleted
42
+ )
43
+
44
+ sequence_number: Mapped[int] = mapped_column(
45
+ Integer, nullable=False, doc="Monotonically increasing sequence number for the history of a specific block_id, starting from 1."
46
+ )
@@ -2,16 +2,24 @@ from sqlalchemy import JSON
2
2
  from sqlalchemy.types import BINARY, TypeDecorator
3
3
 
4
4
  from letta.helpers.converters import (
5
+ deserialize_agent_step_state,
6
+ deserialize_batch_request_result,
7
+ deserialize_create_batch_response,
5
8
  deserialize_embedding_config,
6
9
  deserialize_llm_config,
7
10
  deserialize_message_content,
11
+ deserialize_poll_batch_response,
8
12
  deserialize_tool_calls,
9
13
  deserialize_tool_returns,
10
14
  deserialize_tool_rules,
11
15
  deserialize_vector,
16
+ serialize_agent_step_state,
17
+ serialize_batch_request_result,
18
+ serialize_create_batch_response,
12
19
  serialize_embedding_config,
13
20
  serialize_llm_config,
14
21
  serialize_message_content,
22
+ serialize_poll_batch_response,
15
23
  serialize_tool_calls,
16
24
  serialize_tool_returns,
17
25
  serialize_tool_rules,
@@ -108,3 +116,55 @@ class CommonVector(TypeDecorator):
108
116
 
109
117
  def process_result_value(self, value, dialect):
110
118
  return deserialize_vector(value, dialect)
119
+
120
+
121
+ class CreateBatchResponseColumn(TypeDecorator):
122
+ """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
123
+
124
+ impl = JSON
125
+ cache_ok = True
126
+
127
+ def process_bind_param(self, value, dialect):
128
+ return serialize_create_batch_response(value)
129
+
130
+ def process_result_value(self, value, dialect):
131
+ return deserialize_create_batch_response(value)
132
+
133
+
134
+ class PollBatchResponseColumn(TypeDecorator):
135
+ """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
136
+
137
+ impl = JSON
138
+ cache_ok = True
139
+
140
+ def process_bind_param(self, value, dialect):
141
+ return serialize_poll_batch_response(value)
142
+
143
+ def process_result_value(self, value, dialect):
144
+ return deserialize_poll_batch_response(value)
145
+
146
+
147
+ class BatchRequestResultColumn(TypeDecorator):
148
+ """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
149
+
150
+ impl = JSON
151
+ cache_ok = True
152
+
153
+ def process_bind_param(self, value, dialect):
154
+ return serialize_batch_request_result(value)
155
+
156
+ def process_result_value(self, value, dialect):
157
+ return deserialize_batch_request_result(value)
158
+
159
+
160
+ class AgentStepStateColumn(TypeDecorator):
161
+ """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
162
+
163
+ impl = JSON
164
+ cache_ok = True
165
+
166
+ def process_bind_param(self, value, dialect):
167
+ return serialize_agent_step_state(value)
168
+
169
+ def process_result_value(self, value, dialect):
170
+ return deserialize_agent_step_state(value)