letta-nightly 0.6.48.dev20250407104216__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +47 -12
- letta/agents/base_agent.py +7 -4
- letta/agents/helpers.py +52 -0
- letta/agents/letta_agent.py +105 -42
- letta/agents/voice_agent.py +2 -2
- letta/constants.py +13 -1
- letta/errors.py +10 -3
- letta/functions/function_sets/base.py +65 -0
- letta/functions/interface.py +2 -2
- letta/functions/mcp_client/base_client.py +18 -1
- letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
- letta/groups/helpers.py +113 -0
- letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
- letta/groups/sleeptime_multi_agent.py +259 -0
- letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
- letta/helpers/converters.py +109 -7
- letta/helpers/message_helper.py +1 -0
- letta/helpers/tool_rule_solver.py +40 -23
- letta/interface.py +12 -5
- letta/interfaces/anthropic_streaming_interface.py +329 -0
- letta/llm_api/anthropic.py +12 -1
- letta/llm_api/anthropic_client.py +65 -14
- letta/llm_api/azure_openai.py +2 -2
- letta/llm_api/google_ai_client.py +13 -2
- letta/llm_api/google_constants.py +3 -0
- letta/llm_api/google_vertex_client.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client.py +7 -0
- letta/llm_api/llm_client_base.py +2 -7
- letta/llm_api/openai.py +7 -1
- letta/llm_api/openai_client.py +250 -0
- letta/orm/__init__.py +4 -0
- letta/orm/agent.py +6 -0
- letta/orm/block.py +32 -2
- letta/orm/block_history.py +46 -0
- letta/orm/custom_columns.py +60 -0
- letta/orm/enums.py +7 -0
- letta/orm/group.py +6 -0
- letta/orm/groups_blocks.py +13 -0
- letta/orm/llm_batch_items.py +55 -0
- letta/orm/llm_batch_job.py +48 -0
- letta/orm/message.py +7 -1
- letta/orm/organization.py +2 -0
- letta/orm/sqlalchemy_base.py +18 -15
- letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
- letta/prompts/system/sleeptime.txt +26 -0
- letta/schemas/agent.py +13 -1
- letta/schemas/enums.py +17 -2
- letta/schemas/group.py +14 -1
- letta/schemas/letta_message.py +5 -3
- letta/schemas/llm_batch_job.py +53 -0
- letta/schemas/llm_config.py +14 -4
- letta/schemas/message.py +44 -0
- letta/schemas/tool.py +3 -0
- letta/schemas/usage.py +1 -0
- letta/server/db.py +2 -0
- letta/server/rest_api/app.py +1 -1
- letta/server/rest_api/chat_completions_interface.py +8 -3
- letta/server/rest_api/interface.py +36 -7
- letta/server/rest_api/routers/v1/agents.py +53 -39
- letta/server/rest_api/routers/v1/runs.py +14 -2
- letta/server/rest_api/utils.py +15 -4
- letta/server/server.py +120 -71
- letta/services/agent_manager.py +70 -6
- letta/services/block_manager.py +190 -2
- letta/services/group_manager.py +68 -0
- letta/services/helpers/agent_manager_helper.py +6 -4
- letta/services/llm_batch_manager.py +139 -0
- letta/services/message_manager.py +17 -31
- letta/services/tool_executor/tool_execution_sandbox.py +1 -3
- letta/services/tool_executor/tool_executor.py +9 -20
- letta/services/tool_manager.py +14 -3
- letta/services/tool_sandbox/__init__.py +0 -0
- letta/services/tool_sandbox/base.py +188 -0
- letta/services/tool_sandbox/e2b_sandbox.py +116 -0
- letta/services/tool_sandbox/local_sandbox.py +221 -0
- letta/sleeptime_agent.py +61 -0
- letta/streaming_interface.py +20 -10
- letta/utils.py +4 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
- letta/offline_memory_agent.py +0 -173
- letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
|
@@ -2,6 +2,7 @@ import uuid
|
|
|
2
2
|
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
+
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
|
5
6
|
|
|
6
7
|
from letta.constants import NON_USER_MSG_PREFIX
|
|
7
8
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
@@ -36,7 +37,7 @@ class GoogleAIClient(LLMClientBase):
|
|
|
36
37
|
self,
|
|
37
38
|
messages: List[PydanticMessage],
|
|
38
39
|
tools: List[dict],
|
|
39
|
-
|
|
40
|
+
force_tool_call: Optional[str] = None,
|
|
40
41
|
) -> dict:
|
|
41
42
|
"""
|
|
42
43
|
Constructs a request object in the expected data format for this client.
|
|
@@ -50,7 +51,7 @@ class GoogleAIClient(LLMClientBase):
|
|
|
50
51
|
[m.to_google_ai_dict() for m in messages],
|
|
51
52
|
)
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
request_data = {
|
|
54
55
|
"contents": contents,
|
|
55
56
|
"tools": tools,
|
|
56
57
|
"generation_config": {
|
|
@@ -59,6 +60,16 @@ class GoogleAIClient(LLMClientBase):
|
|
|
59
60
|
},
|
|
60
61
|
}
|
|
61
62
|
|
|
63
|
+
# write tool config
|
|
64
|
+
tool_config = ToolConfig(
|
|
65
|
+
function_calling_config=FunctionCallingConfig(
|
|
66
|
+
# ANY mode forces the model to predict only function calls
|
|
67
|
+
mode=FunctionCallingConfigMode.ANY,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
request_data["tool_config"] = tool_config.model_dump()
|
|
71
|
+
return request_data
|
|
72
|
+
|
|
62
73
|
def convert_response_to_chat_completion(
|
|
63
74
|
self,
|
|
64
75
|
response_data: dict,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
|
2
|
+
"gemini-2.5-pro-exp-03-25": 1048576,
|
|
2
3
|
"gemini-2.0-flash-001": 1048576,
|
|
3
4
|
"gemini-2.0-pro-exp-02-05": 2097152,
|
|
4
5
|
"gemini-2.0-flash-lite-preview-02-05": 1048576,
|
|
@@ -9,4 +10,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
|
|
9
10
|
"gemini-1.0-pro-vision": 16384,
|
|
10
11
|
}
|
|
11
12
|
|
|
13
|
+
GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
|
|
14
|
+
|
|
12
15
|
GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
|
|
@@ -38,12 +38,12 @@ class GoogleVertexClient(GoogleAIClient):
|
|
|
38
38
|
self,
|
|
39
39
|
messages: List[PydanticMessage],
|
|
40
40
|
tools: List[dict],
|
|
41
|
-
|
|
41
|
+
force_tool_call: Optional[str] = None,
|
|
42
42
|
) -> dict:
|
|
43
43
|
"""
|
|
44
44
|
Constructs a request object in the expected data format for this client.
|
|
45
45
|
"""
|
|
46
|
-
request_data = super().build_request_data(messages, tools,
|
|
46
|
+
request_data = super().build_request_data(messages, tools, force_tool_call)
|
|
47
47
|
request_data["config"] = request_data.pop("generation_config")
|
|
48
48
|
request_data["config"]["tools"] = request_data.pop("tools")
|
|
49
49
|
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -340,7 +340,7 @@ def create(
|
|
|
340
340
|
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
|
341
341
|
else:
|
|
342
342
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
|
343
|
-
tools = [{"type": "function", "function": f} for f in functions]
|
|
343
|
+
tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
|
|
344
344
|
|
|
345
345
|
chat_completion_request = ChatCompletionRequest(
|
|
346
346
|
model=llm_config.model,
|
letta/llm_api/llm_client.py
CHANGED
|
@@ -49,5 +49,12 @@ class LLMClient:
|
|
|
49
49
|
llm_config=llm_config,
|
|
50
50
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
51
51
|
)
|
|
52
|
+
case "openai":
|
|
53
|
+
from letta.llm_api.openai_client import OpenAIClient
|
|
54
|
+
|
|
55
|
+
return OpenAIClient(
|
|
56
|
+
llm_config=llm_config,
|
|
57
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
58
|
+
)
|
|
52
59
|
case _:
|
|
53
60
|
return None
|
letta/llm_api/llm_client_base.py
CHANGED
|
@@ -32,9 +32,7 @@ class LLMClientBase:
|
|
|
32
32
|
self,
|
|
33
33
|
messages: List[Message],
|
|
34
34
|
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
|
35
|
-
tool_call: Optional[str] = None,
|
|
36
35
|
stream: bool = False,
|
|
37
|
-
first_message: bool = False,
|
|
38
36
|
force_tool_call: Optional[str] = None,
|
|
39
37
|
) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
|
|
40
38
|
"""
|
|
@@ -42,7 +40,7 @@ class LLMClientBase:
|
|
|
42
40
|
If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
|
|
43
41
|
Otherwise returns a ChatCompletionResponse.
|
|
44
42
|
"""
|
|
45
|
-
request_data = self.build_request_data(messages, tools,
|
|
43
|
+
request_data = self.build_request_data(messages, tools, force_tool_call)
|
|
46
44
|
|
|
47
45
|
try:
|
|
48
46
|
log_event(name="llm_request_sent", attributes=request_data)
|
|
@@ -60,9 +58,7 @@ class LLMClientBase:
|
|
|
60
58
|
self,
|
|
61
59
|
messages: List[Message],
|
|
62
60
|
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
|
63
|
-
tool_call: Optional[str] = None,
|
|
64
61
|
stream: bool = False,
|
|
65
|
-
first_message: bool = False,
|
|
66
62
|
force_tool_call: Optional[str] = None,
|
|
67
63
|
) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
|
|
68
64
|
"""
|
|
@@ -70,7 +66,7 @@ class LLMClientBase:
|
|
|
70
66
|
If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
|
|
71
67
|
Otherwise returns a ChatCompletionResponse.
|
|
72
68
|
"""
|
|
73
|
-
request_data = self.build_request_data(messages, tools,
|
|
69
|
+
request_data = self.build_request_data(messages, tools, force_tool_call)
|
|
74
70
|
response_data = {}
|
|
75
71
|
|
|
76
72
|
try:
|
|
@@ -90,7 +86,6 @@ class LLMClientBase:
|
|
|
90
86
|
self,
|
|
91
87
|
messages: List[Message],
|
|
92
88
|
tools: List[dict],
|
|
93
|
-
tool_call: Optional[str],
|
|
94
89
|
force_tool_call: Optional[str] = None,
|
|
95
90
|
) -> dict:
|
|
96
91
|
"""
|
letta/llm_api/openai.py
CHANGED
|
@@ -252,6 +252,8 @@ def openai_chat_completions_process_stream(
|
|
|
252
252
|
|
|
253
253
|
n_chunks = 0 # approx == n_tokens
|
|
254
254
|
chunk_idx = 0
|
|
255
|
+
prev_message_type = None
|
|
256
|
+
message_idx = 0
|
|
255
257
|
try:
|
|
256
258
|
for chat_completion_chunk in openai_chat_completions_request_stream(
|
|
257
259
|
url=url, api_key=api_key, chat_completion_request=chat_completion_request
|
|
@@ -268,13 +270,17 @@ def openai_chat_completions_process_stream(
|
|
|
268
270
|
|
|
269
271
|
if stream_interface:
|
|
270
272
|
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
271
|
-
stream_interface.process_chunk(
|
|
273
|
+
message_type = stream_interface.process_chunk(
|
|
272
274
|
chat_completion_chunk,
|
|
273
275
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
|
274
276
|
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
|
275
277
|
expect_reasoning_content=expect_reasoning_content,
|
|
276
278
|
name=name,
|
|
279
|
+
message_index=message_idx,
|
|
277
280
|
)
|
|
281
|
+
if message_type != prev_message_type and message_type is not None:
|
|
282
|
+
message_idx += 1
|
|
283
|
+
prev_message_type = message_type
|
|
278
284
|
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
|
279
285
|
stream_interface.process_refresh(chat_completion_response)
|
|
280
286
|
else:
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
import openai
|
|
5
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
|
6
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
|
7
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
8
|
+
|
|
9
|
+
from letta.errors import (
|
|
10
|
+
ErrorCode,
|
|
11
|
+
LLMAuthenticationError,
|
|
12
|
+
LLMBadRequestError,
|
|
13
|
+
LLMConnectionError,
|
|
14
|
+
LLMNotFoundError,
|
|
15
|
+
LLMPermissionDeniedError,
|
|
16
|
+
LLMRateLimitError,
|
|
17
|
+
LLMServerError,
|
|
18
|
+
LLMUnprocessableEntityError,
|
|
19
|
+
)
|
|
20
|
+
from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, unpack_all_inner_thoughts_from_kwargs
|
|
21
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
|
22
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
|
23
|
+
from letta.log import get_logger
|
|
24
|
+
from letta.schemas.message import Message as PydanticMessage
|
|
25
|
+
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
|
26
|
+
from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
|
|
27
|
+
from letta.schemas.openai.chat_completion_request import FunctionSchema
|
|
28
|
+
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
|
29
|
+
from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
|
|
30
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
31
|
+
from letta.settings import model_settings
|
|
32
|
+
|
|
33
|
+
logger = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class OpenAIClient(LLMClientBase):
|
|
37
|
+
def _prepare_client_kwargs(self) -> dict:
|
|
38
|
+
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
|
39
|
+
# supposedly the openai python client requires a dummy API key
|
|
40
|
+
api_key = api_key or "DUMMY_API_KEY"
|
|
41
|
+
kwargs = {"api_key": api_key, "base_url": self.llm_config.model_endpoint}
|
|
42
|
+
|
|
43
|
+
return kwargs
|
|
44
|
+
|
|
45
|
+
def build_request_data(
|
|
46
|
+
self,
|
|
47
|
+
messages: List[PydanticMessage],
|
|
48
|
+
tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
|
|
49
|
+
force_tool_call: Optional[str] = None,
|
|
50
|
+
) -> dict:
|
|
51
|
+
"""
|
|
52
|
+
Constructs a request object in the expected data format for the OpenAI API.
|
|
53
|
+
"""
|
|
54
|
+
if tools and self.llm_config.put_inner_thoughts_in_kwargs:
|
|
55
|
+
# Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
|
56
|
+
# TODO(fix)
|
|
57
|
+
inner_thoughts_desc = (
|
|
58
|
+
INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in self.llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
59
|
+
)
|
|
60
|
+
tools = add_inner_thoughts_to_functions(
|
|
61
|
+
functions=tools,
|
|
62
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
63
|
+
inner_thoughts_description=inner_thoughts_desc,
|
|
64
|
+
put_inner_thoughts_first=True,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
openai_message_list = [
|
|
68
|
+
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs))
|
|
69
|
+
for m in messages
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
if self.llm_config.model:
|
|
73
|
+
model = self.llm_config.model
|
|
74
|
+
else:
|
|
75
|
+
logger.warning(f"Model type not set in llm_config: {self.llm_config.model_dump_json(indent=4)}")
|
|
76
|
+
model = None
|
|
77
|
+
|
|
78
|
+
# force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
|
|
79
|
+
# TODO(matt) move into LLMConfig
|
|
80
|
+
# TODO: This vllm checking is very brittle and is a patch at most
|
|
81
|
+
if self.llm_config.model_endpoint == "https://inference.memgpt.ai" or (self.llm_config.handle and "vllm" in self.llm_config.handle):
|
|
82
|
+
tool_choice = "auto" # TODO change to "required" once proxy supports it
|
|
83
|
+
else:
|
|
84
|
+
tool_choice = "required"
|
|
85
|
+
|
|
86
|
+
if force_tool_call is not None:
|
|
87
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
|
|
88
|
+
|
|
89
|
+
data = ChatCompletionRequest(
|
|
90
|
+
model=model,
|
|
91
|
+
messages=openai_message_list,
|
|
92
|
+
tools=[OpenAITool(type="function", function=f) for f in tools] if tools else None,
|
|
93
|
+
tool_choice=tool_choice,
|
|
94
|
+
user=str(),
|
|
95
|
+
max_completion_tokens=self.llm_config.max_tokens,
|
|
96
|
+
temperature=self.llm_config.temperature,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if "inference.memgpt.ai" in self.llm_config.model_endpoint:
|
|
100
|
+
# override user id for inference.memgpt.ai
|
|
101
|
+
import uuid
|
|
102
|
+
|
|
103
|
+
data.user = str(uuid.UUID(int=0))
|
|
104
|
+
data.model = "memgpt-openai"
|
|
105
|
+
|
|
106
|
+
if data.tools is not None and len(data.tools) > 0:
|
|
107
|
+
# Convert to structured output style (which has 'strict' and no optionals)
|
|
108
|
+
for tool in data.tools:
|
|
109
|
+
try:
|
|
110
|
+
structured_output_version = convert_to_structured_output(tool.function.model_dump())
|
|
111
|
+
tool.function = FunctionSchema(**structured_output_version)
|
|
112
|
+
except ValueError as e:
|
|
113
|
+
logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
|
114
|
+
|
|
115
|
+
return data.model_dump(exclude_unset=True)
|
|
116
|
+
|
|
117
|
+
def request(self, request_data: dict) -> dict:
|
|
118
|
+
"""
|
|
119
|
+
Performs underlying synchronous request to OpenAI API and returns raw response dict.
|
|
120
|
+
"""
|
|
121
|
+
client = OpenAI(**self._prepare_client_kwargs())
|
|
122
|
+
|
|
123
|
+
response: ChatCompletion = client.chat.completions.create(**request_data)
|
|
124
|
+
return response.model_dump()
|
|
125
|
+
|
|
126
|
+
async def request_async(self, request_data: dict) -> dict:
|
|
127
|
+
"""
|
|
128
|
+
Performs underlying asynchronous request to OpenAI API and returns raw response dict.
|
|
129
|
+
"""
|
|
130
|
+
client = AsyncOpenAI(**self._prepare_client_kwargs())
|
|
131
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
|
132
|
+
return response.model_dump()
|
|
133
|
+
|
|
134
|
+
def convert_response_to_chat_completion(
|
|
135
|
+
self,
|
|
136
|
+
response_data: dict,
|
|
137
|
+
input_messages: List[PydanticMessage], # Included for consistency, maybe used later
|
|
138
|
+
) -> ChatCompletionResponse:
|
|
139
|
+
"""
|
|
140
|
+
Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
|
|
141
|
+
Handles potential extraction of inner thoughts if they were added via kwargs.
|
|
142
|
+
"""
|
|
143
|
+
# OpenAI's response structure directly maps to ChatCompletionResponse
|
|
144
|
+
# We just need to instantiate the Pydantic model for validation and type safety.
|
|
145
|
+
chat_completion_response = ChatCompletionResponse(**response_data)
|
|
146
|
+
|
|
147
|
+
# Unpack inner thoughts if they were embedded in function arguments
|
|
148
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
149
|
+
chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
|
|
150
|
+
response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return chat_completion_response
|
|
154
|
+
|
|
155
|
+
def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
|
|
156
|
+
"""
|
|
157
|
+
Performs underlying streaming request to OpenAI and returns the stream iterator.
|
|
158
|
+
"""
|
|
159
|
+
client = OpenAI(**self._prepare_client_kwargs())
|
|
160
|
+
response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
|
|
161
|
+
return response_stream
|
|
162
|
+
|
|
163
|
+
async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
|
|
164
|
+
"""
|
|
165
|
+
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
|
166
|
+
"""
|
|
167
|
+
client = AsyncOpenAI(**self._prepare_client_kwargs())
|
|
168
|
+
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
|
|
169
|
+
return response_stream
|
|
170
|
+
|
|
171
|
+
def handle_llm_error(self, e: Exception) -> Exception:
|
|
172
|
+
"""
|
|
173
|
+
Maps OpenAI-specific errors to common LLMError types.
|
|
174
|
+
"""
|
|
175
|
+
if isinstance(e, openai.APIConnectionError):
|
|
176
|
+
logger.warning(f"[OpenAI] API connection error: {e}")
|
|
177
|
+
return LLMConnectionError(
|
|
178
|
+
message=f"Failed to connect to OpenAI: {str(e)}",
|
|
179
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
180
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
if isinstance(e, openai.RateLimitError):
|
|
184
|
+
logger.warning(f"[OpenAI] Rate limited (429). Consider backoff. Error: {e}")
|
|
185
|
+
return LLMRateLimitError(
|
|
186
|
+
message=f"Rate limited by OpenAI: {str(e)}",
|
|
187
|
+
code=ErrorCode.RATE_LIMIT_EXCEEDED,
|
|
188
|
+
details=e.body, # Include body which often has rate limit details
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if isinstance(e, openai.BadRequestError):
|
|
192
|
+
logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
|
|
193
|
+
# BadRequestError can signify different issues (e.g., invalid args, context length)
|
|
194
|
+
# Check message content if finer-grained errors are needed
|
|
195
|
+
# Example: if "context_length_exceeded" in str(e): return LLMContextLengthExceededError(...)
|
|
196
|
+
return LLMBadRequestError(
|
|
197
|
+
message=f"Bad request to OpenAI: {str(e)}",
|
|
198
|
+
code=ErrorCode.INVALID_ARGUMENT, # Or more specific if detectable
|
|
199
|
+
details=e.body,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
if isinstance(e, openai.AuthenticationError):
|
|
203
|
+
logger.error(f"[OpenAI] Authentication error (401): {str(e)}") # More severe log level
|
|
204
|
+
return LLMAuthenticationError(
|
|
205
|
+
message=f"Authentication failed with OpenAI: {str(e)}", code=ErrorCode.UNAUTHENTICATED, details=e.body
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if isinstance(e, openai.PermissionDeniedError):
|
|
209
|
+
logger.error(f"[OpenAI] Permission denied (403): {str(e)}") # More severe log level
|
|
210
|
+
return LLMPermissionDeniedError(
|
|
211
|
+
message=f"Permission denied by OpenAI: {str(e)}", code=ErrorCode.PERMISSION_DENIED, details=e.body
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if isinstance(e, openai.NotFoundError):
|
|
215
|
+
logger.warning(f"[OpenAI] Resource not found (404): {str(e)}")
|
|
216
|
+
# Could be invalid model name, etc.
|
|
217
|
+
return LLMNotFoundError(message=f"Resource not found in OpenAI: {str(e)}", code=ErrorCode.NOT_FOUND, details=e.body)
|
|
218
|
+
|
|
219
|
+
if isinstance(e, openai.UnprocessableEntityError):
|
|
220
|
+
logger.warning(f"[OpenAI] Unprocessable entity (422): {str(e)}")
|
|
221
|
+
return LLMUnprocessableEntityError(
|
|
222
|
+
message=f"Invalid request content for OpenAI: {str(e)}",
|
|
223
|
+
code=ErrorCode.INVALID_ARGUMENT, # Usually validation errors
|
|
224
|
+
details=e.body,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# General API error catch-all
|
|
228
|
+
if isinstance(e, openai.APIStatusError):
|
|
229
|
+
logger.warning(f"[OpenAI] API status error ({e.status_code}): {str(e)}")
|
|
230
|
+
# Map based on status code potentially
|
|
231
|
+
if e.status_code >= 500:
|
|
232
|
+
error_cls = LLMServerError
|
|
233
|
+
error_code = ErrorCode.INTERNAL_SERVER_ERROR
|
|
234
|
+
else:
|
|
235
|
+
# Treat other 4xx as bad requests if not caught above
|
|
236
|
+
error_cls = LLMBadRequestError
|
|
237
|
+
error_code = ErrorCode.INVALID_ARGUMENT
|
|
238
|
+
|
|
239
|
+
return error_cls(
|
|
240
|
+
message=f"OpenAI API error: {str(e)}",
|
|
241
|
+
code=error_code,
|
|
242
|
+
details={
|
|
243
|
+
"status_code": e.status_code,
|
|
244
|
+
"response": str(e.response),
|
|
245
|
+
"body": e.body,
|
|
246
|
+
},
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Fallback for unexpected errors
|
|
250
|
+
return super().handle_llm_error(e)
|
letta/orm/__init__.py
CHANGED
|
@@ -2,15 +2,19 @@ from letta.orm.agent import Agent
|
|
|
2
2
|
from letta.orm.agents_tags import AgentsTags
|
|
3
3
|
from letta.orm.base import Base
|
|
4
4
|
from letta.orm.block import Block
|
|
5
|
+
from letta.orm.block_history import BlockHistory
|
|
5
6
|
from letta.orm.blocks_agents import BlocksAgents
|
|
6
7
|
from letta.orm.file import FileMetadata
|
|
7
8
|
from letta.orm.group import Group
|
|
8
9
|
from letta.orm.groups_agents import GroupsAgents
|
|
10
|
+
from letta.orm.groups_blocks import GroupsBlocks
|
|
9
11
|
from letta.orm.identities_agents import IdentitiesAgents
|
|
10
12
|
from letta.orm.identities_blocks import IdentitiesBlocks
|
|
11
13
|
from letta.orm.identity import Identity
|
|
12
14
|
from letta.orm.job import Job
|
|
13
15
|
from letta.orm.job_messages import JobMessage
|
|
16
|
+
from letta.orm.llm_batch_items import LLMBatchItem
|
|
17
|
+
from letta.orm.llm_batch_job import LLMBatchJob
|
|
14
18
|
from letta.orm.message import Message
|
|
15
19
|
from letta.orm.organization import Organization
|
|
16
20
|
from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
|
letta/orm/agent.py
CHANGED
|
@@ -68,6 +68,9 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
68
68
|
message_buffer_autoclear: Mapped[bool] = mapped_column(
|
|
69
69
|
Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
|
|
70
70
|
)
|
|
71
|
+
enable_sleeptime: Mapped[Optional[bool]] = mapped_column(
|
|
72
|
+
Boolean, doc="If set to True, memory management will move to a background agent thread."
|
|
73
|
+
)
|
|
71
74
|
|
|
72
75
|
# relationships
|
|
73
76
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
|
|
@@ -141,6 +144,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
141
144
|
viewonly=True,
|
|
142
145
|
back_populates="manager_agent",
|
|
143
146
|
)
|
|
147
|
+
batch_items: Mapped[List["LLMBatchItem"]] = relationship("LLMBatchItem", back_populates="agent", lazy="selectin")
|
|
144
148
|
|
|
145
149
|
def to_pydantic(self, include_relationships: Optional[Set[str]] = None) -> PydanticAgentState:
|
|
146
150
|
"""
|
|
@@ -190,6 +194,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
190
194
|
"identity_ids": [],
|
|
191
195
|
"multi_agent_group": None,
|
|
192
196
|
"tool_exec_environment_variables": [],
|
|
197
|
+
"enable_sleeptime": None,
|
|
193
198
|
}
|
|
194
199
|
|
|
195
200
|
# Optional fields: only included if requested
|
|
@@ -201,6 +206,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
201
206
|
"identity_ids": lambda: [i.id for i in self.identities],
|
|
202
207
|
"multi_agent_group": lambda: self.multi_agent_group,
|
|
203
208
|
"tool_exec_environment_variables": lambda: self.tool_exec_environment_variables,
|
|
209
|
+
"enable_sleeptime": lambda: self.enable_sleeptime,
|
|
204
210
|
}
|
|
205
211
|
|
|
206
212
|
include_relationships = set(optional_fields.keys() if include_relationships is None else include_relationships)
|
letta/orm/block.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, List, Optional, Type
|
|
2
2
|
|
|
3
|
-
from sqlalchemy import JSON, BigInteger, Index, Integer, UniqueConstraint, event
|
|
4
|
-
from sqlalchemy.orm import Mapped, attributes, mapped_column, relationship
|
|
3
|
+
from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, UniqueConstraint, event
|
|
4
|
+
from sqlalchemy.orm import Mapped, attributes, declared_attr, mapped_column, relationship
|
|
5
5
|
|
|
6
6
|
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
|
|
7
|
+
from letta.orm.block_history import BlockHistory
|
|
7
8
|
from letta.orm.blocks_agents import BlocksAgents
|
|
8
9
|
from letta.orm.mixins import OrganizationMixin
|
|
9
10
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
|
@@ -38,6 +39,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
|
|
|
38
39
|
limit: Mapped[BigInteger] = mapped_column(Integer, default=CORE_MEMORY_BLOCK_CHAR_LIMIT, doc="Character limit of the block.")
|
|
39
40
|
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default={}, doc="arbitrary information related to the block.")
|
|
40
41
|
|
|
42
|
+
# history pointers / locking mechanisms
|
|
43
|
+
current_history_entry_id: Mapped[Optional[str]] = mapped_column(
|
|
44
|
+
String, ForeignKey("block_history.id", name="fk_block_current_history_entry", use_alter=True), nullable=True, index=True
|
|
45
|
+
)
|
|
46
|
+
version: Mapped[int] = mapped_column(
|
|
47
|
+
Integer, nullable=False, default=1, server_default="1", doc="Optimistic locking version counter, incremented on each state change."
|
|
48
|
+
)
|
|
49
|
+
# NOTE: This takes advantage of built-in optimistic locking functionality by SqlAlchemy
|
|
50
|
+
# https://docs.sqlalchemy.org/en/20/orm/versioning.html
|
|
51
|
+
__mapper_args__ = {"version_id_col": version}
|
|
52
|
+
|
|
41
53
|
# relationships
|
|
42
54
|
organization: Mapped[Optional["Organization"]] = relationship("Organization")
|
|
43
55
|
agents: Mapped[List["Agent"]] = relationship(
|
|
@@ -55,6 +67,13 @@ class Block(OrganizationMixin, SqlalchemyBase):
|
|
|
55
67
|
back_populates="blocks",
|
|
56
68
|
passive_deletes=True,
|
|
57
69
|
)
|
|
70
|
+
groups: Mapped[List["Group"]] = relationship(
|
|
71
|
+
"Group",
|
|
72
|
+
secondary="groups_blocks",
|
|
73
|
+
lazy="selectin",
|
|
74
|
+
back_populates="shared_blocks",
|
|
75
|
+
passive_deletes=True,
|
|
76
|
+
)
|
|
58
77
|
|
|
59
78
|
def to_pydantic(self) -> Type:
|
|
60
79
|
match self.label:
|
|
@@ -68,6 +87,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
|
|
|
68
87
|
model_dict["metadata"] = self.metadata_
|
|
69
88
|
return Schema.model_validate(model_dict)
|
|
70
89
|
|
|
90
|
+
@declared_attr
|
|
91
|
+
def current_history_entry(cls) -> Mapped[Optional["BlockHistory"]]:
|
|
92
|
+
# Relationship to easily load the specific history entry that is current
|
|
93
|
+
return relationship(
|
|
94
|
+
"BlockHistory",
|
|
95
|
+
primaryjoin=lambda: cls.current_history_entry_id == BlockHistory.id,
|
|
96
|
+
foreign_keys=[cls.current_history_entry_id],
|
|
97
|
+
lazy="joined", # Typically want current history details readily available
|
|
98
|
+
post_update=True,
|
|
99
|
+
) # Helps manage potential FK cycles
|
|
100
|
+
|
|
71
101
|
|
|
72
102
|
@event.listens_for(Block, "after_update") # Changed from 'before_update'
|
|
73
103
|
def block_before_update(mapper, connection, target):
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, Text
|
|
5
|
+
from sqlalchemy.orm import Mapped, mapped_column
|
|
6
|
+
|
|
7
|
+
from letta.orm.enums import ActorType
|
|
8
|
+
from letta.orm.mixins import OrganizationMixin
|
|
9
|
+
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BlockHistory(OrganizationMixin, SqlalchemyBase):
|
|
13
|
+
"""Stores a single historical state of a Block for undo/redo functionality."""
|
|
14
|
+
|
|
15
|
+
__tablename__ = "block_history"
|
|
16
|
+
|
|
17
|
+
__table_args__ = (
|
|
18
|
+
# PRIMARY lookup index for finding specific history entries & ordering
|
|
19
|
+
Index("ix_block_history_block_id_sequence", "block_id", "sequence_number", unique=True),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# agent generates its own id
|
|
23
|
+
# TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
|
|
24
|
+
# TODO: Some still rely on the Pydantic object to do this
|
|
25
|
+
id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"block_hist-{uuid.uuid4()}")
|
|
26
|
+
|
|
27
|
+
# Snapshot State Fields (Copied from Block)
|
|
28
|
+
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
|
29
|
+
label: Mapped[str] = mapped_column(String, nullable=False)
|
|
30
|
+
value: Mapped[str] = mapped_column(Text, nullable=False)
|
|
31
|
+
limit: Mapped[BigInteger] = mapped_column(BigInteger, nullable=False)
|
|
32
|
+
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
|
|
33
|
+
|
|
34
|
+
# Editor info
|
|
35
|
+
# These are not made to be FKs because these may not always exist (e.g. a User be deleted after they made a checkpoint)
|
|
36
|
+
actor_type: Mapped[Optional[ActorType]] = mapped_column(String, nullable=True)
|
|
37
|
+
actor_id: Mapped[Optional[str]] = mapped_column(String, nullable=True)
|
|
38
|
+
|
|
39
|
+
# Relationships
|
|
40
|
+
block_id: Mapped[str] = mapped_column(
|
|
41
|
+
String, ForeignKey("block.id", ondelete="CASCADE"), nullable=False # History deleted if Block is deleted
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
sequence_number: Mapped[int] = mapped_column(
|
|
45
|
+
Integer, nullable=False, doc="Monotonically increasing sequence number for the history of a specific block_id, starting from 1."
|
|
46
|
+
)
|
letta/orm/custom_columns.py
CHANGED
|
@@ -2,16 +2,24 @@ from sqlalchemy import JSON
|
|
|
2
2
|
from sqlalchemy.types import BINARY, TypeDecorator
|
|
3
3
|
|
|
4
4
|
from letta.helpers.converters import (
|
|
5
|
+
deserialize_agent_step_state,
|
|
6
|
+
deserialize_batch_request_result,
|
|
7
|
+
deserialize_create_batch_response,
|
|
5
8
|
deserialize_embedding_config,
|
|
6
9
|
deserialize_llm_config,
|
|
7
10
|
deserialize_message_content,
|
|
11
|
+
deserialize_poll_batch_response,
|
|
8
12
|
deserialize_tool_calls,
|
|
9
13
|
deserialize_tool_returns,
|
|
10
14
|
deserialize_tool_rules,
|
|
11
15
|
deserialize_vector,
|
|
16
|
+
serialize_agent_step_state,
|
|
17
|
+
serialize_batch_request_result,
|
|
18
|
+
serialize_create_batch_response,
|
|
12
19
|
serialize_embedding_config,
|
|
13
20
|
serialize_llm_config,
|
|
14
21
|
serialize_message_content,
|
|
22
|
+
serialize_poll_batch_response,
|
|
15
23
|
serialize_tool_calls,
|
|
16
24
|
serialize_tool_returns,
|
|
17
25
|
serialize_tool_rules,
|
|
@@ -108,3 +116,55 @@ class CommonVector(TypeDecorator):
|
|
|
108
116
|
|
|
109
117
|
def process_result_value(self, value, dialect):
|
|
110
118
|
return deserialize_vector(value, dialect)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class CreateBatchResponseColumn(TypeDecorator):
|
|
122
|
+
"""Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
|
|
123
|
+
|
|
124
|
+
impl = JSON
|
|
125
|
+
cache_ok = True
|
|
126
|
+
|
|
127
|
+
def process_bind_param(self, value, dialect):
|
|
128
|
+
return serialize_create_batch_response(value)
|
|
129
|
+
|
|
130
|
+
def process_result_value(self, value, dialect):
|
|
131
|
+
return deserialize_create_batch_response(value)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class PollBatchResponseColumn(TypeDecorator):
|
|
135
|
+
"""Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
|
|
136
|
+
|
|
137
|
+
impl = JSON
|
|
138
|
+
cache_ok = True
|
|
139
|
+
|
|
140
|
+
def process_bind_param(self, value, dialect):
|
|
141
|
+
return serialize_poll_batch_response(value)
|
|
142
|
+
|
|
143
|
+
def process_result_value(self, value, dialect):
|
|
144
|
+
return deserialize_poll_batch_response(value)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class BatchRequestResultColumn(TypeDecorator):
|
|
148
|
+
"""Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
|
|
149
|
+
|
|
150
|
+
impl = JSON
|
|
151
|
+
cache_ok = True
|
|
152
|
+
|
|
153
|
+
def process_bind_param(self, value, dialect):
|
|
154
|
+
return serialize_batch_request_result(value)
|
|
155
|
+
|
|
156
|
+
def process_result_value(self, value, dialect):
|
|
157
|
+
return deserialize_batch_request_result(value)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class AgentStepStateColumn(TypeDecorator):
|
|
161
|
+
"""Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
|
|
162
|
+
|
|
163
|
+
impl = JSON
|
|
164
|
+
cache_ok = True
|
|
165
|
+
|
|
166
|
+
def process_bind_param(self, value, dialect):
|
|
167
|
+
return serialize_agent_step_state(value)
|
|
168
|
+
|
|
169
|
+
def process_result_value(self, value, dialect):
|
|
170
|
+
return deserialize_agent_step_state(value)
|