letta-nightly 0.6.37.dev20250310103931__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +34 -12
- letta/client/client.py +1 -50
- letta/constants.py +1 -1
- letta/functions/function_sets/multi_agent.py +9 -8
- letta/functions/helpers.py +33 -6
- letta/llm_api/anthropic.py +20 -0
- letta/llm_api/google_ai_client.py +332 -0
- letta/llm_api/google_vertex_client.py +214 -0
- letta/llm_api/llm_client.py +48 -0
- letta/llm_api/llm_client_base.py +129 -0
- letta/orm/step.py +1 -0
- letta/schemas/block.py +4 -48
- letta/schemas/letta_message.py +26 -0
- letta/schemas/message.py +1 -1
- letta/schemas/step.py +1 -0
- letta/serialize_schemas/agent.py +8 -1
- letta/server/rest_api/interface.py +9 -7
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
- letta/server/rest_api/routers/v1/agents.py +12 -8
- letta/server/rest_api/routers/v1/steps.py +2 -0
- letta/server/rest_api/routers/v1/voice.py +3 -6
- letta/services/agent_manager.py +56 -3
- letta/services/helpers/agent_manager_helper.py +12 -1
- letta/services/identity_manager.py +7 -1
- letta/services/message_manager.py +40 -0
- letta/services/step_manager.py +8 -1
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/METADATA +18 -17
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/RECORD +32 -28
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from google import genai
|
|
5
|
+
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ToolConfig
|
|
6
|
+
|
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
|
8
|
+
from letta.helpers.json_helpers import json_dumps
|
|
9
|
+
from letta.llm_api.google_ai_client import GoogleAIClient
|
|
10
|
+
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
11
|
+
from letta.local_llm.utils import count_tokens
|
|
12
|
+
from letta.schemas.message import Message as PydanticMessage
|
|
13
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
|
14
|
+
from letta.settings import model_settings
|
|
15
|
+
from letta.utils import get_tool_call_id
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GoogleVertexClient(GoogleAIClient):
|
|
19
|
+
|
|
20
|
+
def request(self, request_data: dict) -> dict:
|
|
21
|
+
"""
|
|
22
|
+
Performs underlying request to llm and returns raw response.
|
|
23
|
+
"""
|
|
24
|
+
client = genai.Client(
|
|
25
|
+
vertexai=True,
|
|
26
|
+
project=model_settings.google_cloud_project,
|
|
27
|
+
location=model_settings.google_cloud_location,
|
|
28
|
+
http_options={"api_version": "v1"},
|
|
29
|
+
)
|
|
30
|
+
response = client.models.generate_content(
|
|
31
|
+
model=self.llm_config.model,
|
|
32
|
+
contents=request_data["contents"],
|
|
33
|
+
config=request_data["config"],
|
|
34
|
+
)
|
|
35
|
+
return response.model_dump()
|
|
36
|
+
|
|
37
|
+
def build_request_data(
|
|
38
|
+
self,
|
|
39
|
+
messages: List[PydanticMessage],
|
|
40
|
+
tools: List[dict],
|
|
41
|
+
tool_call: Optional[str],
|
|
42
|
+
) -> dict:
|
|
43
|
+
"""
|
|
44
|
+
Constructs a request object in the expected data format for this client.
|
|
45
|
+
"""
|
|
46
|
+
request_data = super().build_request_data(messages, tools, tool_call)
|
|
47
|
+
request_data["config"] = request_data.pop("generation_config")
|
|
48
|
+
request_data["config"]["tools"] = request_data.pop("tools")
|
|
49
|
+
|
|
50
|
+
tool_config = ToolConfig(
|
|
51
|
+
function_calling_config=FunctionCallingConfig(
|
|
52
|
+
# ANY mode forces the model to predict only function calls
|
|
53
|
+
mode=FunctionCallingConfigMode.ANY,
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
request_data["config"]["tool_config"] = tool_config.model_dump()
|
|
57
|
+
|
|
58
|
+
return request_data
|
|
59
|
+
|
|
60
|
+
def convert_response_to_chat_completion(
|
|
61
|
+
self,
|
|
62
|
+
response_data: dict,
|
|
63
|
+
input_messages: List[PydanticMessage],
|
|
64
|
+
) -> ChatCompletionResponse:
|
|
65
|
+
"""
|
|
66
|
+
Converts custom response format from llm client into an OpenAI
|
|
67
|
+
ChatCompletionsResponse object.
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
{
|
|
71
|
+
"candidates": [
|
|
72
|
+
{
|
|
73
|
+
"content": {
|
|
74
|
+
"parts": [
|
|
75
|
+
{
|
|
76
|
+
"text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
],
|
|
82
|
+
"usageMetadata": {
|
|
83
|
+
"promptTokenCount": 9,
|
|
84
|
+
"candidatesTokenCount": 27,
|
|
85
|
+
"totalTokenCount": 36
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
"""
|
|
89
|
+
response = GenerateContentResponse(**response_data)
|
|
90
|
+
try:
|
|
91
|
+
choices = []
|
|
92
|
+
index = 0
|
|
93
|
+
for candidate in response.candidates:
|
|
94
|
+
content = candidate.content
|
|
95
|
+
|
|
96
|
+
role = content.role
|
|
97
|
+
assert role == "model", f"Unknown role in response: {role}"
|
|
98
|
+
|
|
99
|
+
parts = content.parts
|
|
100
|
+
# TODO support parts / multimodal
|
|
101
|
+
# TODO support parallel tool calling natively
|
|
102
|
+
# TODO Alternative here is to throw away everything else except for the first part
|
|
103
|
+
for response_message in parts:
|
|
104
|
+
# Convert the actual message style to OpenAI style
|
|
105
|
+
if response_message.function_call:
|
|
106
|
+
function_call = response_message.function_call
|
|
107
|
+
function_name = function_call.name
|
|
108
|
+
function_args = function_call.args
|
|
109
|
+
assert isinstance(function_args, dict), function_args
|
|
110
|
+
|
|
111
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
|
112
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
113
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
114
|
+
|
|
115
|
+
assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
|
|
116
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
|
|
117
|
+
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
|
118
|
+
else:
|
|
119
|
+
inner_thoughts = None
|
|
120
|
+
|
|
121
|
+
# Google AI API doesn't generate tool call IDs
|
|
122
|
+
openai_response_message = Message(
|
|
123
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
124
|
+
content=inner_thoughts,
|
|
125
|
+
tool_calls=[
|
|
126
|
+
ToolCall(
|
|
127
|
+
id=get_tool_call_id(),
|
|
128
|
+
type="function",
|
|
129
|
+
function=FunctionCall(
|
|
130
|
+
name=function_name,
|
|
131
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
],
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
|
|
139
|
+
# Inner thoughts are the content by default
|
|
140
|
+
inner_thoughts = response_message.text
|
|
141
|
+
|
|
142
|
+
# Google AI API doesn't generate tool call IDs
|
|
143
|
+
openai_response_message = Message(
|
|
144
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
145
|
+
content=inner_thoughts,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Google AI API uses different finish reason strings than OpenAI
|
|
149
|
+
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
|
150
|
+
# see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
|
151
|
+
# Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
|
|
152
|
+
# see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
|
|
153
|
+
finish_reason = candidate.finish_reason.value
|
|
154
|
+
if finish_reason == "STOP":
|
|
155
|
+
openai_finish_reason = (
|
|
156
|
+
"function_call"
|
|
157
|
+
if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
|
|
158
|
+
else "stop"
|
|
159
|
+
)
|
|
160
|
+
elif finish_reason == "MAX_TOKENS":
|
|
161
|
+
openai_finish_reason = "length"
|
|
162
|
+
elif finish_reason == "SAFETY":
|
|
163
|
+
openai_finish_reason = "content_filter"
|
|
164
|
+
elif finish_reason == "RECITATION":
|
|
165
|
+
openai_finish_reason = "content_filter"
|
|
166
|
+
else:
|
|
167
|
+
raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
|
168
|
+
|
|
169
|
+
choices.append(
|
|
170
|
+
Choice(
|
|
171
|
+
finish_reason=openai_finish_reason,
|
|
172
|
+
index=index,
|
|
173
|
+
message=openai_response_message,
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
index += 1
|
|
177
|
+
|
|
178
|
+
# if len(choices) > 1:
|
|
179
|
+
# raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
|
|
180
|
+
|
|
181
|
+
# NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
|
|
182
|
+
# "usageMetadata": {
|
|
183
|
+
# "promptTokenCount": 9,
|
|
184
|
+
# "candidatesTokenCount": 27,
|
|
185
|
+
# "totalTokenCount": 36
|
|
186
|
+
# }
|
|
187
|
+
if response.usage_metadata:
|
|
188
|
+
usage = UsageStatistics(
|
|
189
|
+
prompt_tokens=response.usage_metadata.prompt_token_count,
|
|
190
|
+
completion_tokens=response.usage_metadata.candidates_token_count,
|
|
191
|
+
total_tokens=response.usage_metadata.total_token_count,
|
|
192
|
+
)
|
|
193
|
+
else:
|
|
194
|
+
# Count it ourselves
|
|
195
|
+
assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
|
|
196
|
+
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
|
197
|
+
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
|
198
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
199
|
+
usage = UsageStatistics(
|
|
200
|
+
prompt_tokens=prompt_tokens,
|
|
201
|
+
completion_tokens=completion_tokens,
|
|
202
|
+
total_tokens=total_tokens,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
response_id = str(uuid.uuid4())
|
|
206
|
+
return ChatCompletionResponse(
|
|
207
|
+
id=response_id,
|
|
208
|
+
choices=choices,
|
|
209
|
+
model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
|
210
|
+
created=get_utc_time(),
|
|
211
|
+
usage=usage,
|
|
212
|
+
)
|
|
213
|
+
except KeyError as e:
|
|
214
|
+
raise e
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
|
4
|
+
from letta.schemas.llm_config import LLMConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LLMClient:
|
|
8
|
+
"""Factory class for creating LLM clients based on the model endpoint type."""
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def create(
|
|
12
|
+
agent_id: str,
|
|
13
|
+
llm_config: LLMConfig,
|
|
14
|
+
put_inner_thoughts_first: bool = True,
|
|
15
|
+
actor_id: Optional[str] = None,
|
|
16
|
+
) -> Optional[LLMClientBase]:
|
|
17
|
+
"""
|
|
18
|
+
Create an LLM client based on the model endpoint type.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
agent_id: Unique identifier for the agent
|
|
22
|
+
llm_config: Configuration for the LLM model
|
|
23
|
+
put_inner_thoughts_first: Whether to put inner thoughts first in the response
|
|
24
|
+
use_structured_output: Whether to use structured output
|
|
25
|
+
use_tool_naming: Whether to use tool naming
|
|
26
|
+
actor_id: Optional actor identifier
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
An instance of LLMClientBase subclass
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If the model endpoint type is not supported
|
|
33
|
+
"""
|
|
34
|
+
match llm_config.model_endpoint_type:
|
|
35
|
+
case "google_ai":
|
|
36
|
+
from letta.llm_api.google_ai_client import GoogleAIClient
|
|
37
|
+
|
|
38
|
+
return GoogleAIClient(
|
|
39
|
+
agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
|
|
40
|
+
)
|
|
41
|
+
case "google_vertex":
|
|
42
|
+
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
|
43
|
+
|
|
44
|
+
return GoogleVertexClient(
|
|
45
|
+
agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
|
|
46
|
+
)
|
|
47
|
+
case _:
|
|
48
|
+
return None
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from openai import AsyncStream, Stream
|
|
5
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
6
|
+
|
|
7
|
+
from letta.schemas.llm_config import LLMConfig
|
|
8
|
+
from letta.schemas.message import Message
|
|
9
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
10
|
+
from letta.tracing import log_event
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LLMClientBase:
|
|
14
|
+
"""
|
|
15
|
+
Abstract base class for LLM clients, formatting the request objects,
|
|
16
|
+
handling the downstream request and parsing into chat completions response format
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
agent_id: str,
|
|
22
|
+
llm_config: LLMConfig,
|
|
23
|
+
put_inner_thoughts_first: Optional[bool] = True,
|
|
24
|
+
use_structured_output: Optional[bool] = True,
|
|
25
|
+
use_tool_naming: bool = True,
|
|
26
|
+
actor_id: Optional[str] = None,
|
|
27
|
+
):
|
|
28
|
+
self.agent_id = agent_id
|
|
29
|
+
self.llm_config = llm_config
|
|
30
|
+
self.put_inner_thoughts_first = put_inner_thoughts_first
|
|
31
|
+
self.actor_id = actor_id
|
|
32
|
+
|
|
33
|
+
def send_llm_request(
|
|
34
|
+
self,
|
|
35
|
+
messages: List[Message],
|
|
36
|
+
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
|
37
|
+
tool_call: Optional[str] = None,
|
|
38
|
+
stream: bool = False,
|
|
39
|
+
first_message: bool = False,
|
|
40
|
+
force_tool_call: Optional[str] = None,
|
|
41
|
+
) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
|
|
42
|
+
"""
|
|
43
|
+
Issues a request to the downstream model endpoint and parses response.
|
|
44
|
+
If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
|
|
45
|
+
Otherwise returns a ChatCompletionResponse.
|
|
46
|
+
"""
|
|
47
|
+
request_data = self.build_request_data(messages, tools, tool_call)
|
|
48
|
+
log_event(name="llm_request_sent", attributes=request_data)
|
|
49
|
+
if stream:
|
|
50
|
+
return self.stream(request_data)
|
|
51
|
+
else:
|
|
52
|
+
response_data = self.request(request_data)
|
|
53
|
+
log_event(name="llm_response_received", attributes=response_data)
|
|
54
|
+
return self.convert_response_to_chat_completion(response_data, messages)
|
|
55
|
+
|
|
56
|
+
async def send_llm_request_async(
|
|
57
|
+
self,
|
|
58
|
+
messages: List[Message],
|
|
59
|
+
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
|
60
|
+
tool_call: Optional[str] = None,
|
|
61
|
+
stream: bool = False,
|
|
62
|
+
first_message: bool = False,
|
|
63
|
+
force_tool_call: Optional[str] = None,
|
|
64
|
+
) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
|
|
65
|
+
"""
|
|
66
|
+
Issues a request to the downstream model endpoint.
|
|
67
|
+
If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
|
|
68
|
+
Otherwise returns a ChatCompletionResponse.
|
|
69
|
+
"""
|
|
70
|
+
request_data = self.build_request_data(messages, tools, tool_call)
|
|
71
|
+
log_event(name="llm_request_sent", attributes=request_data)
|
|
72
|
+
if stream:
|
|
73
|
+
return await self.stream_async(request_data)
|
|
74
|
+
else:
|
|
75
|
+
response_data = await self.request_async(request_data)
|
|
76
|
+
log_event(name="llm_response_received", attributes=response_data)
|
|
77
|
+
return self.convert_response_to_chat_completion(response_data, messages)
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def build_request_data(
|
|
81
|
+
self,
|
|
82
|
+
messages: List[Message],
|
|
83
|
+
tools: List[dict],
|
|
84
|
+
tool_call: Optional[str],
|
|
85
|
+
) -> dict:
|
|
86
|
+
"""
|
|
87
|
+
Constructs a request object in the expected data format for this client.
|
|
88
|
+
"""
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def request(self, request_data: dict) -> dict:
|
|
93
|
+
"""
|
|
94
|
+
Performs underlying request to llm and returns raw response.
|
|
95
|
+
"""
|
|
96
|
+
raise NotImplementedError
|
|
97
|
+
|
|
98
|
+
@abstractmethod
|
|
99
|
+
async def request_async(self, request_data: dict) -> dict:
|
|
100
|
+
"""
|
|
101
|
+
Performs underlying request to llm and returns raw response.
|
|
102
|
+
"""
|
|
103
|
+
raise NotImplementedError
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
def convert_response_to_chat_completion(
|
|
107
|
+
self,
|
|
108
|
+
response_data: dict,
|
|
109
|
+
input_messages: List[Message],
|
|
110
|
+
) -> ChatCompletionResponse:
|
|
111
|
+
"""
|
|
112
|
+
Converts custom response format from llm client into an OpenAI
|
|
113
|
+
ChatCompletionsResponse object.
|
|
114
|
+
"""
|
|
115
|
+
raise NotImplementedError
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
|
|
119
|
+
"""
|
|
120
|
+
Performs underlying streaming request to llm and returns raw response.
|
|
121
|
+
"""
|
|
122
|
+
raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
|
|
126
|
+
"""
|
|
127
|
+
Performs underlying streaming request to llm and returns raw response.
|
|
128
|
+
"""
|
|
129
|
+
raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")
|
letta/orm/step.py
CHANGED
|
@@ -33,6 +33,7 @@ class Step(SqlalchemyBase):
|
|
|
33
33
|
job_id: Mapped[Optional[str]] = mapped_column(
|
|
34
34
|
ForeignKey("jobs.id", ondelete="SET NULL"), nullable=True, doc="The unique identified of the job run that triggered this step"
|
|
35
35
|
)
|
|
36
|
+
agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
|
|
36
37
|
provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
|
|
37
38
|
model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
|
|
38
39
|
model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
|
letta/schemas/block.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from pydantic import
|
|
3
|
+
from pydantic import Field, model_validator
|
|
4
4
|
from typing_extensions import Self
|
|
5
5
|
|
|
6
6
|
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
|
|
@@ -37,7 +37,8 @@ class BaseBlock(LettaBase, validate_assignment=True):
|
|
|
37
37
|
|
|
38
38
|
@model_validator(mode="after")
|
|
39
39
|
def verify_char_limit(self) -> Self:
|
|
40
|
-
|
|
40
|
+
# self.limit can be None from
|
|
41
|
+
if self.limit is not None and self.value and len(self.value) > self.limit:
|
|
41
42
|
error_msg = f"Edit failed: Exceeds {self.limit} character limit (requested {len(self.value)}) - {str(self)}."
|
|
42
43
|
raise ValueError(error_msg)
|
|
43
44
|
|
|
@@ -89,61 +90,16 @@ class Persona(Block):
|
|
|
89
90
|
label: str = "persona"
|
|
90
91
|
|
|
91
92
|
|
|
92
|
-
# class CreateBlock(BaseBlock):
|
|
93
|
-
# """Create a block"""
|
|
94
|
-
#
|
|
95
|
-
# is_template: bool = True
|
|
96
|
-
# label: str = Field(..., description="Label of the block.")
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class BlockLabelUpdate(BaseModel):
|
|
100
|
-
"""Update the label of a block"""
|
|
101
|
-
|
|
102
|
-
current_label: str = Field(..., description="Current label of the block.")
|
|
103
|
-
new_label: str = Field(..., description="New label of the block.")
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
# class CreatePersona(CreateBlock):
|
|
107
|
-
# """Create a persona block"""
|
|
108
|
-
#
|
|
109
|
-
# label: str = "persona"
|
|
110
|
-
#
|
|
111
|
-
#
|
|
112
|
-
# class CreateHuman(CreateBlock):
|
|
113
|
-
# """Create a human block"""
|
|
114
|
-
#
|
|
115
|
-
# label: str = "human"
|
|
116
|
-
|
|
117
|
-
|
|
118
93
|
class BlockUpdate(BaseBlock):
|
|
119
94
|
"""Update a block"""
|
|
120
95
|
|
|
121
|
-
limit: Optional[int] = Field(
|
|
96
|
+
limit: Optional[int] = Field(None, description="Character limit of the block.")
|
|
122
97
|
value: Optional[str] = Field(None, description="Value of the block.")
|
|
123
98
|
|
|
124
99
|
class Config:
|
|
125
100
|
extra = "ignore" # Ignores extra fields
|
|
126
101
|
|
|
127
102
|
|
|
128
|
-
class BlockLimitUpdate(BaseModel):
|
|
129
|
-
"""Update the limit of a block"""
|
|
130
|
-
|
|
131
|
-
label: str = Field(..., description="Label of the block.")
|
|
132
|
-
limit: int = Field(..., description="New limit of the block.")
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
# class UpdatePersona(BlockUpdate):
|
|
136
|
-
# """Update a persona block"""
|
|
137
|
-
#
|
|
138
|
-
# label: str = "persona"
|
|
139
|
-
#
|
|
140
|
-
#
|
|
141
|
-
# class UpdateHuman(BlockUpdate):
|
|
142
|
-
# """Update a human block"""
|
|
143
|
-
#
|
|
144
|
-
# label: str = "human"
|
|
145
|
-
|
|
146
|
-
|
|
147
103
|
class CreateBlock(BaseBlock):
|
|
148
104
|
"""Create a block"""
|
|
149
105
|
|
letta/schemas/letta_message.py
CHANGED
|
@@ -236,6 +236,32 @@ LettaMessageUnion = Annotated[
|
|
|
236
236
|
]
|
|
237
237
|
|
|
238
238
|
|
|
239
|
+
class UpdateSystemMessage(BaseModel):
|
|
240
|
+
content: Union[str, List[MessageContentUnion]]
|
|
241
|
+
message_type: Literal["system_message"] = "system_message"
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class UpdateUserMessage(BaseModel):
|
|
245
|
+
content: Union[str, List[MessageContentUnion]]
|
|
246
|
+
message_type: Literal["user_message"] = "user_message"
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class UpdateReasoningMessage(BaseModel):
|
|
250
|
+
reasoning: Union[str, List[MessageContentUnion]]
|
|
251
|
+
message_type: Literal["reasoning_message"] = "reasoning_message"
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class UpdateAssistantMessage(BaseModel):
|
|
255
|
+
content: Union[str, List[MessageContentUnion]]
|
|
256
|
+
message_type: Literal["assistant_message"] = "assistant_message"
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
LettaMessageUpdateUnion = Annotated[
|
|
260
|
+
Union[UpdateSystemMessage, UpdateUserMessage, UpdateReasoningMessage, UpdateAssistantMessage],
|
|
261
|
+
Field(discriminator="message_type"),
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
|
|
239
265
|
def create_letta_message_union_schema():
|
|
240
266
|
return {
|
|
241
267
|
"oneOf": [
|
letta/schemas/message.py
CHANGED
|
@@ -74,7 +74,7 @@ class MessageUpdate(BaseModel):
|
|
|
74
74
|
"""Request to update a message"""
|
|
75
75
|
|
|
76
76
|
role: Optional[MessageRole] = Field(None, description="The role of the participant.")
|
|
77
|
-
content: Optional[Union[str, List[MessageContentUnion]]] = Field(
|
|
77
|
+
content: Optional[Union[str, List[MessageContentUnion]]] = Field(None, description="The content of the message.")
|
|
78
78
|
# NOTE: probably doesn't make sense to allow remapping user_id or agent_id (vs creating a new message)
|
|
79
79
|
# user_id: Optional[str] = Field(None, description="The unique identifier of the user.")
|
|
80
80
|
# agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
|
letta/schemas/step.py
CHANGED
|
@@ -18,6 +18,7 @@ class Step(StepBase):
|
|
|
18
18
|
job_id: Optional[str] = Field(
|
|
19
19
|
None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
|
|
20
20
|
)
|
|
21
|
+
agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
|
|
21
22
|
provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
|
|
22
23
|
model: Optional[str] = Field(None, description="The name of the model used for this step.")
|
|
23
24
|
model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
|
letta/serialize_schemas/agent.py
CHANGED
|
@@ -70,4 +70,11 @@ class SerializedAgentSchema(BaseSchema):
|
|
|
70
70
|
class Meta(BaseSchema.Meta):
|
|
71
71
|
model = Agent
|
|
72
72
|
# TODO: Serialize these as well...
|
|
73
|
-
exclude = BaseSchema.Meta.exclude + (
|
|
73
|
+
exclude = BaseSchema.Meta.exclude + (
|
|
74
|
+
"project_id",
|
|
75
|
+
"template_id",
|
|
76
|
+
"base_template_id",
|
|
77
|
+
"sources",
|
|
78
|
+
"source_passages",
|
|
79
|
+
"agent_passages",
|
|
80
|
+
)
|
|
@@ -918,13 +918,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
918
918
|
# skip if there's a finish
|
|
919
919
|
return None
|
|
920
920
|
else:
|
|
921
|
-
#
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
921
|
+
# Only warn for non-Claude models since Claude commonly has empty first chunks
|
|
922
|
+
if not chunk.model.startswith("claude-"):
|
|
923
|
+
# Example case that would trigger here:
|
|
924
|
+
# id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
|
|
925
|
+
# choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
|
|
926
|
+
# created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
|
|
927
|
+
# model='gpt-4o-mini-2024-07-18'
|
|
928
|
+
# object='chat.completion.chunk'
|
|
929
|
+
warnings.warn(f"Couldn't find delta in chunk: {chunk}")
|
|
928
930
|
return None
|
|
929
931
|
|
|
930
932
|
return processed_chunk
|
|
@@ -24,7 +24,7 @@ logger = get_logger(__name__)
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
@router.post(
|
|
27
|
-
"/chat/completions",
|
|
27
|
+
"/{agent_id}/chat/completions",
|
|
28
28
|
response_model=None,
|
|
29
29
|
operation_id="create_chat_completions",
|
|
30
30
|
responses={
|
|
@@ -37,6 +37,7 @@ logger = get_logger(__name__)
|
|
|
37
37
|
},
|
|
38
38
|
)
|
|
39
39
|
async def create_chat_completions(
|
|
40
|
+
agent_id: str,
|
|
40
41
|
completion_request: CompletionCreateParams = Body(...),
|
|
41
42
|
server: "SyncServer" = Depends(get_letta_server),
|
|
42
43
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
@@ -51,12 +52,6 @@ async def create_chat_completions(
|
|
|
51
52
|
|
|
52
53
|
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
53
54
|
|
|
54
|
-
agent_id = str(completion_request.get("user", None))
|
|
55
|
-
if agent_id is None:
|
|
56
|
-
error_msg = "Must pass agent_id in the 'user' field"
|
|
57
|
-
logger.error(error_msg)
|
|
58
|
-
raise HTTPException(status_code=400, detail=error_msg)
|
|
59
|
-
|
|
60
55
|
letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
|
|
61
56
|
llm_config = letta_agent.agent_state.llm_config
|
|
62
57
|
if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
|
|
@@ -13,13 +13,12 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
|
13
13
|
from letta.log import get_logger
|
|
14
14
|
from letta.orm.errors import NoResultFound
|
|
15
15
|
from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
|
|
16
|
-
from letta.schemas.block import Block, BlockUpdate
|
|
16
|
+
from letta.schemas.block import Block, BlockUpdate
|
|
17
17
|
from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
|
|
18
|
-
from letta.schemas.letta_message import LettaMessageUnion
|
|
18
|
+
from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
|
|
19
19
|
from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
|
20
20
|
from letta.schemas.letta_response import LettaResponse
|
|
21
21
|
from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
|
|
22
|
-
from letta.schemas.message import Message, MessageUpdate
|
|
23
22
|
from letta.schemas.passage import Passage, PassageUpdate
|
|
24
23
|
from letta.schemas.run import Run
|
|
25
24
|
from letta.schemas.source import Source
|
|
@@ -119,6 +118,7 @@ async def upload_agent_serialized(
|
|
|
119
118
|
True,
|
|
120
119
|
description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
|
|
121
120
|
),
|
|
121
|
+
project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
|
|
122
122
|
):
|
|
123
123
|
"""
|
|
124
124
|
Upload a serialized agent JSON file and recreate the agent in the system.
|
|
@@ -129,7 +129,11 @@ async def upload_agent_serialized(
|
|
|
129
129
|
serialized_data = await file.read()
|
|
130
130
|
agent_json = json.loads(serialized_data)
|
|
131
131
|
new_agent = server.agent_manager.deserialize(
|
|
132
|
-
serialized_agent=agent_json,
|
|
132
|
+
serialized_agent=agent_json,
|
|
133
|
+
actor=actor,
|
|
134
|
+
append_copy_suffix=append_copy_suffix,
|
|
135
|
+
override_existing_tools=override_existing_tools,
|
|
136
|
+
project_id=project_id,
|
|
133
137
|
)
|
|
134
138
|
return new_agent
|
|
135
139
|
|
|
@@ -526,20 +530,20 @@ def list_messages(
|
|
|
526
530
|
)
|
|
527
531
|
|
|
528
532
|
|
|
529
|
-
@router.patch("/{agent_id}/messages/{message_id}", response_model=
|
|
533
|
+
@router.patch("/{agent_id}/messages/{message_id}", response_model=LettaMessageUpdateUnion, operation_id="modify_message")
|
|
530
534
|
def modify_message(
|
|
531
535
|
agent_id: str,
|
|
532
536
|
message_id: str,
|
|
533
|
-
request:
|
|
537
|
+
request: LettaMessageUpdateUnion = Body(...),
|
|
534
538
|
server: "SyncServer" = Depends(get_letta_server),
|
|
535
539
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
536
540
|
):
|
|
537
541
|
"""
|
|
538
542
|
Update the details of a message associated with an agent.
|
|
539
543
|
"""
|
|
540
|
-
# TODO:
|
|
544
|
+
# TODO: support modifying tool calls/returns
|
|
541
545
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
542
|
-
return server.message_manager.
|
|
546
|
+
return server.message_manager.update_message_by_letta_message(message_id=message_id, letta_message_update=request, actor=actor)
|
|
543
547
|
|
|
544
548
|
|
|
545
549
|
@router.post(
|