letta-nightly 0.6.37.dev20250310103931__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +34 -12
  3. letta/client/client.py +1 -50
  4. letta/constants.py +1 -1
  5. letta/functions/function_sets/multi_agent.py +9 -8
  6. letta/functions/helpers.py +33 -6
  7. letta/llm_api/anthropic.py +20 -0
  8. letta/llm_api/google_ai_client.py +332 -0
  9. letta/llm_api/google_vertex_client.py +214 -0
  10. letta/llm_api/llm_client.py +48 -0
  11. letta/llm_api/llm_client_base.py +129 -0
  12. letta/orm/step.py +1 -0
  13. letta/schemas/block.py +4 -48
  14. letta/schemas/letta_message.py +26 -0
  15. letta/schemas/message.py +1 -1
  16. letta/schemas/step.py +1 -0
  17. letta/serialize_schemas/agent.py +8 -1
  18. letta/server/rest_api/interface.py +9 -7
  19. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
  20. letta/server/rest_api/routers/v1/agents.py +12 -8
  21. letta/server/rest_api/routers/v1/steps.py +2 -0
  22. letta/server/rest_api/routers/v1/voice.py +3 -6
  23. letta/services/agent_manager.py +56 -3
  24. letta/services/helpers/agent_manager_helper.py +12 -1
  25. letta/services/identity_manager.py +7 -1
  26. letta/services/message_manager.py +40 -0
  27. letta/services/step_manager.py +8 -1
  28. {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/METADATA +18 -17
  29. {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/RECORD +32 -28
  30. {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/LICENSE +0 -0
  31. {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/WHEEL +0 -0
  32. {letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,214 @@
1
+ import uuid
2
+ from typing import List, Optional
3
+
4
+ from google import genai
5
+ from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ToolConfig
6
+
7
+ from letta.helpers.datetime_helpers import get_utc_time
8
+ from letta.helpers.json_helpers import json_dumps
9
+ from letta.llm_api.google_ai_client import GoogleAIClient
10
+ from letta.local_llm.json_parser import clean_json_string_extra_backslash
11
+ from letta.local_llm.utils import count_tokens
12
+ from letta.schemas.message import Message as PydanticMessage
13
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
14
+ from letta.settings import model_settings
15
+ from letta.utils import get_tool_call_id
16
+
17
+
18
+ class GoogleVertexClient(GoogleAIClient):
19
+
20
+ def request(self, request_data: dict) -> dict:
21
+ """
22
+ Performs underlying request to llm and returns raw response.
23
+ """
24
+ client = genai.Client(
25
+ vertexai=True,
26
+ project=model_settings.google_cloud_project,
27
+ location=model_settings.google_cloud_location,
28
+ http_options={"api_version": "v1"},
29
+ )
30
+ response = client.models.generate_content(
31
+ model=self.llm_config.model,
32
+ contents=request_data["contents"],
33
+ config=request_data["config"],
34
+ )
35
+ return response.model_dump()
36
+
37
+ def build_request_data(
38
+ self,
39
+ messages: List[PydanticMessage],
40
+ tools: List[dict],
41
+ tool_call: Optional[str],
42
+ ) -> dict:
43
+ """
44
+ Constructs a request object in the expected data format for this client.
45
+ """
46
+ request_data = super().build_request_data(messages, tools, tool_call)
47
+ request_data["config"] = request_data.pop("generation_config")
48
+ request_data["config"]["tools"] = request_data.pop("tools")
49
+
50
+ tool_config = ToolConfig(
51
+ function_calling_config=FunctionCallingConfig(
52
+ # ANY mode forces the model to predict only function calls
53
+ mode=FunctionCallingConfigMode.ANY,
54
+ )
55
+ )
56
+ request_data["config"]["tool_config"] = tool_config.model_dump()
57
+
58
+ return request_data
59
+
60
+ def convert_response_to_chat_completion(
61
+ self,
62
+ response_data: dict,
63
+ input_messages: List[PydanticMessage],
64
+ ) -> ChatCompletionResponse:
65
+ """
66
+ Converts custom response format from llm client into an OpenAI
67
+ ChatCompletionsResponse object.
68
+
69
+ Example:
70
+ {
71
+ "candidates": [
72
+ {
73
+ "content": {
74
+ "parts": [
75
+ {
76
+ "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
77
+ }
78
+ ]
79
+ }
80
+ }
81
+ ],
82
+ "usageMetadata": {
83
+ "promptTokenCount": 9,
84
+ "candidatesTokenCount": 27,
85
+ "totalTokenCount": 36
86
+ }
87
+ }
88
+ """
89
+ response = GenerateContentResponse(**response_data)
90
+ try:
91
+ choices = []
92
+ index = 0
93
+ for candidate in response.candidates:
94
+ content = candidate.content
95
+
96
+ role = content.role
97
+ assert role == "model", f"Unknown role in response: {role}"
98
+
99
+ parts = content.parts
100
+ # TODO support parts / multimodal
101
+ # TODO support parallel tool calling natively
102
+ # TODO Alternative here is to throw away everything else except for the first part
103
+ for response_message in parts:
104
+ # Convert the actual message style to OpenAI style
105
+ if response_message.function_call:
106
+ function_call = response_message.function_call
107
+ function_name = function_call.name
108
+ function_args = function_call.args
109
+ assert isinstance(function_args, dict), function_args
110
+
111
+ # NOTE: this also involves stripping the inner monologue out of the function
112
+ if self.llm_config.put_inner_thoughts_in_kwargs:
113
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
114
+
115
+ assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
116
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
117
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
118
+ else:
119
+ inner_thoughts = None
120
+
121
+ # Google AI API doesn't generate tool call IDs
122
+ openai_response_message = Message(
123
+ role="assistant", # NOTE: "model" -> "assistant"
124
+ content=inner_thoughts,
125
+ tool_calls=[
126
+ ToolCall(
127
+ id=get_tool_call_id(),
128
+ type="function",
129
+ function=FunctionCall(
130
+ name=function_name,
131
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
132
+ ),
133
+ )
134
+ ],
135
+ )
136
+
137
+ else:
138
+
139
+ # Inner thoughts are the content by default
140
+ inner_thoughts = response_message.text
141
+
142
+ # Google AI API doesn't generate tool call IDs
143
+ openai_response_message = Message(
144
+ role="assistant", # NOTE: "model" -> "assistant"
145
+ content=inner_thoughts,
146
+ )
147
+
148
+ # Google AI API uses different finish reason strings than OpenAI
149
+ # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
150
+ # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
151
+ # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
152
+ # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
153
+ finish_reason = candidate.finish_reason.value
154
+ if finish_reason == "STOP":
155
+ openai_finish_reason = (
156
+ "function_call"
157
+ if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
158
+ else "stop"
159
+ )
160
+ elif finish_reason == "MAX_TOKENS":
161
+ openai_finish_reason = "length"
162
+ elif finish_reason == "SAFETY":
163
+ openai_finish_reason = "content_filter"
164
+ elif finish_reason == "RECITATION":
165
+ openai_finish_reason = "content_filter"
166
+ else:
167
+ raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
168
+
169
+ choices.append(
170
+ Choice(
171
+ finish_reason=openai_finish_reason,
172
+ index=index,
173
+ message=openai_response_message,
174
+ )
175
+ )
176
+ index += 1
177
+
178
+ # if len(choices) > 1:
179
+ # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
180
+
181
+ # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
182
+ # "usageMetadata": {
183
+ # "promptTokenCount": 9,
184
+ # "candidatesTokenCount": 27,
185
+ # "totalTokenCount": 36
186
+ # }
187
+ if response.usage_metadata:
188
+ usage = UsageStatistics(
189
+ prompt_tokens=response.usage_metadata.prompt_token_count,
190
+ completion_tokens=response.usage_metadata.candidates_token_count,
191
+ total_tokens=response.usage_metadata.total_token_count,
192
+ )
193
+ else:
194
+ # Count it ourselves
195
+ assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
196
+ prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
197
+ completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
198
+ total_tokens = prompt_tokens + completion_tokens
199
+ usage = UsageStatistics(
200
+ prompt_tokens=prompt_tokens,
201
+ completion_tokens=completion_tokens,
202
+ total_tokens=total_tokens,
203
+ )
204
+
205
+ response_id = str(uuid.uuid4())
206
+ return ChatCompletionResponse(
207
+ id=response_id,
208
+ choices=choices,
209
+ model=self.llm_config.model, # NOTE: Google API doesn't pass back model in the response
210
+ created=get_utc_time(),
211
+ usage=usage,
212
+ )
213
+ except KeyError as e:
214
+ raise e
@@ -0,0 +1,48 @@
1
+ from typing import Optional
2
+
3
+ from letta.llm_api.llm_client_base import LLMClientBase
4
+ from letta.schemas.llm_config import LLMConfig
5
+
6
+
7
+ class LLMClient:
8
+ """Factory class for creating LLM clients based on the model endpoint type."""
9
+
10
+ @staticmethod
11
+ def create(
12
+ agent_id: str,
13
+ llm_config: LLMConfig,
14
+ put_inner_thoughts_first: bool = True,
15
+ actor_id: Optional[str] = None,
16
+ ) -> Optional[LLMClientBase]:
17
+ """
18
+ Create an LLM client based on the model endpoint type.
19
+
20
+ Args:
21
+ agent_id: Unique identifier for the agent
22
+ llm_config: Configuration for the LLM model
23
+ put_inner_thoughts_first: Whether to put inner thoughts first in the response
24
+ use_structured_output: Whether to use structured output
25
+ use_tool_naming: Whether to use tool naming
26
+ actor_id: Optional actor identifier
27
+
28
+ Returns:
29
+ An instance of LLMClientBase subclass
30
+
31
+ Raises:
32
+ ValueError: If the model endpoint type is not supported
33
+ """
34
+ match llm_config.model_endpoint_type:
35
+ case "google_ai":
36
+ from letta.llm_api.google_ai_client import GoogleAIClient
37
+
38
+ return GoogleAIClient(
39
+ agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
40
+ )
41
+ case "google_vertex":
42
+ from letta.llm_api.google_vertex_client import GoogleVertexClient
43
+
44
+ return GoogleVertexClient(
45
+ agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
46
+ )
47
+ case _:
48
+ return None
@@ -0,0 +1,129 @@
1
+ from abc import abstractmethod
2
+ from typing import List, Optional, Union
3
+
4
+ from openai import AsyncStream, Stream
5
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
+
7
+ from letta.schemas.llm_config import LLMConfig
8
+ from letta.schemas.message import Message
9
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
10
+ from letta.tracing import log_event
11
+
12
+
13
+ class LLMClientBase:
14
+ """
15
+ Abstract base class for LLM clients, formatting the request objects,
16
+ handling the downstream request and parsing into chat completions response format
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ agent_id: str,
22
+ llm_config: LLMConfig,
23
+ put_inner_thoughts_first: Optional[bool] = True,
24
+ use_structured_output: Optional[bool] = True,
25
+ use_tool_naming: bool = True,
26
+ actor_id: Optional[str] = None,
27
+ ):
28
+ self.agent_id = agent_id
29
+ self.llm_config = llm_config
30
+ self.put_inner_thoughts_first = put_inner_thoughts_first
31
+ self.actor_id = actor_id
32
+
33
+ def send_llm_request(
34
+ self,
35
+ messages: List[Message],
36
+ tools: Optional[List[dict]] = None, # TODO: change to Tool object
37
+ tool_call: Optional[str] = None,
38
+ stream: bool = False,
39
+ first_message: bool = False,
40
+ force_tool_call: Optional[str] = None,
41
+ ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
42
+ """
43
+ Issues a request to the downstream model endpoint and parses response.
44
+ If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
45
+ Otherwise returns a ChatCompletionResponse.
46
+ """
47
+ request_data = self.build_request_data(messages, tools, tool_call)
48
+ log_event(name="llm_request_sent", attributes=request_data)
49
+ if stream:
50
+ return self.stream(request_data)
51
+ else:
52
+ response_data = self.request(request_data)
53
+ log_event(name="llm_response_received", attributes=response_data)
54
+ return self.convert_response_to_chat_completion(response_data, messages)
55
+
56
+ async def send_llm_request_async(
57
+ self,
58
+ messages: List[Message],
59
+ tools: Optional[List[dict]] = None, # TODO: change to Tool object
60
+ tool_call: Optional[str] = None,
61
+ stream: bool = False,
62
+ first_message: bool = False,
63
+ force_tool_call: Optional[str] = None,
64
+ ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
65
+ """
66
+ Issues a request to the downstream model endpoint.
67
+ If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
68
+ Otherwise returns a ChatCompletionResponse.
69
+ """
70
+ request_data = self.build_request_data(messages, tools, tool_call)
71
+ log_event(name="llm_request_sent", attributes=request_data)
72
+ if stream:
73
+ return await self.stream_async(request_data)
74
+ else:
75
+ response_data = await self.request_async(request_data)
76
+ log_event(name="llm_response_received", attributes=response_data)
77
+ return self.convert_response_to_chat_completion(response_data, messages)
78
+
79
+ @abstractmethod
80
+ def build_request_data(
81
+ self,
82
+ messages: List[Message],
83
+ tools: List[dict],
84
+ tool_call: Optional[str],
85
+ ) -> dict:
86
+ """
87
+ Constructs a request object in the expected data format for this client.
88
+ """
89
+ raise NotImplementedError
90
+
91
+ @abstractmethod
92
+ def request(self, request_data: dict) -> dict:
93
+ """
94
+ Performs underlying request to llm and returns raw response.
95
+ """
96
+ raise NotImplementedError
97
+
98
+ @abstractmethod
99
+ async def request_async(self, request_data: dict) -> dict:
100
+ """
101
+ Performs underlying request to llm and returns raw response.
102
+ """
103
+ raise NotImplementedError
104
+
105
+ @abstractmethod
106
+ def convert_response_to_chat_completion(
107
+ self,
108
+ response_data: dict,
109
+ input_messages: List[Message],
110
+ ) -> ChatCompletionResponse:
111
+ """
112
+ Converts custom response format from llm client into an OpenAI
113
+ ChatCompletionsResponse object.
114
+ """
115
+ raise NotImplementedError
116
+
117
+ @abstractmethod
118
+ def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
119
+ """
120
+ Performs underlying streaming request to llm and returns raw response.
121
+ """
122
+ raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")
123
+
124
+ @abstractmethod
125
+ async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
126
+ """
127
+ Performs underlying streaming request to llm and returns raw response.
128
+ """
129
+ raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")
letta/orm/step.py CHANGED
@@ -33,6 +33,7 @@ class Step(SqlalchemyBase):
33
33
  job_id: Mapped[Optional[str]] = mapped_column(
34
34
  ForeignKey("jobs.id", ondelete="SET NULL"), nullable=True, doc="The unique identified of the job run that triggered this step"
35
35
  )
36
+ agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
36
37
  provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
37
38
  model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
38
39
  model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
letta/schemas/block.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Optional
2
2
 
3
- from pydantic import BaseModel, Field, model_validator
3
+ from pydantic import Field, model_validator
4
4
  from typing_extensions import Self
5
5
 
6
6
  from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
@@ -37,7 +37,8 @@ class BaseBlock(LettaBase, validate_assignment=True):
37
37
 
38
38
  @model_validator(mode="after")
39
39
  def verify_char_limit(self) -> Self:
40
- if self.value and len(self.value) > self.limit:
40
+ # self.limit can be None from
41
+ if self.limit is not None and self.value and len(self.value) > self.limit:
41
42
  error_msg = f"Edit failed: Exceeds {self.limit} character limit (requested {len(self.value)}) - {str(self)}."
42
43
  raise ValueError(error_msg)
43
44
 
@@ -89,61 +90,16 @@ class Persona(Block):
89
90
  label: str = "persona"
90
91
 
91
92
 
92
- # class CreateBlock(BaseBlock):
93
- # """Create a block"""
94
- #
95
- # is_template: bool = True
96
- # label: str = Field(..., description="Label of the block.")
97
-
98
-
99
- class BlockLabelUpdate(BaseModel):
100
- """Update the label of a block"""
101
-
102
- current_label: str = Field(..., description="Current label of the block.")
103
- new_label: str = Field(..., description="New label of the block.")
104
-
105
-
106
- # class CreatePersona(CreateBlock):
107
- # """Create a persona block"""
108
- #
109
- # label: str = "persona"
110
- #
111
- #
112
- # class CreateHuman(CreateBlock):
113
- # """Create a human block"""
114
- #
115
- # label: str = "human"
116
-
117
-
118
93
  class BlockUpdate(BaseBlock):
119
94
  """Update a block"""
120
95
 
121
- limit: Optional[int] = Field(CORE_MEMORY_BLOCK_CHAR_LIMIT, description="Character limit of the block.")
96
+ limit: Optional[int] = Field(None, description="Character limit of the block.")
122
97
  value: Optional[str] = Field(None, description="Value of the block.")
123
98
 
124
99
  class Config:
125
100
  extra = "ignore" # Ignores extra fields
126
101
 
127
102
 
128
- class BlockLimitUpdate(BaseModel):
129
- """Update the limit of a block"""
130
-
131
- label: str = Field(..., description="Label of the block.")
132
- limit: int = Field(..., description="New limit of the block.")
133
-
134
-
135
- # class UpdatePersona(BlockUpdate):
136
- # """Update a persona block"""
137
- #
138
- # label: str = "persona"
139
- #
140
- #
141
- # class UpdateHuman(BlockUpdate):
142
- # """Update a human block"""
143
- #
144
- # label: str = "human"
145
-
146
-
147
103
  class CreateBlock(BaseBlock):
148
104
  """Create a block"""
149
105
 
@@ -236,6 +236,32 @@ LettaMessageUnion = Annotated[
236
236
  ]
237
237
 
238
238
 
239
+ class UpdateSystemMessage(BaseModel):
240
+ content: Union[str, List[MessageContentUnion]]
241
+ message_type: Literal["system_message"] = "system_message"
242
+
243
+
244
+ class UpdateUserMessage(BaseModel):
245
+ content: Union[str, List[MessageContentUnion]]
246
+ message_type: Literal["user_message"] = "user_message"
247
+
248
+
249
+ class UpdateReasoningMessage(BaseModel):
250
+ reasoning: Union[str, List[MessageContentUnion]]
251
+ message_type: Literal["reasoning_message"] = "reasoning_message"
252
+
253
+
254
+ class UpdateAssistantMessage(BaseModel):
255
+ content: Union[str, List[MessageContentUnion]]
256
+ message_type: Literal["assistant_message"] = "assistant_message"
257
+
258
+
259
+ LettaMessageUpdateUnion = Annotated[
260
+ Union[UpdateSystemMessage, UpdateUserMessage, UpdateReasoningMessage, UpdateAssistantMessage],
261
+ Field(discriminator="message_type"),
262
+ ]
263
+
264
+
239
265
  def create_letta_message_union_schema():
240
266
  return {
241
267
  "oneOf": [
letta/schemas/message.py CHANGED
@@ -74,7 +74,7 @@ class MessageUpdate(BaseModel):
74
74
  """Request to update a message"""
75
75
 
76
76
  role: Optional[MessageRole] = Field(None, description="The role of the participant.")
77
- content: Optional[Union[str, List[MessageContentUnion]]] = Field(..., description="The content of the message.")
77
+ content: Optional[Union[str, List[MessageContentUnion]]] = Field(None, description="The content of the message.")
78
78
  # NOTE: probably doesn't make sense to allow remapping user_id or agent_id (vs creating a new message)
79
79
  # user_id: Optional[str] = Field(None, description="The unique identifier of the user.")
80
80
  # agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
letta/schemas/step.py CHANGED
@@ -18,6 +18,7 @@ class Step(StepBase):
18
18
  job_id: Optional[str] = Field(
19
19
  None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
20
20
  )
21
+ agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
21
22
  provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
22
23
  model: Optional[str] = Field(None, description="The name of the model used for this step.")
23
24
  model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
@@ -70,4 +70,11 @@ class SerializedAgentSchema(BaseSchema):
70
70
  class Meta(BaseSchema.Meta):
71
71
  model = Agent
72
72
  # TODO: Serialize these as well...
73
- exclude = BaseSchema.Meta.exclude + ("sources", "source_passages", "agent_passages")
73
+ exclude = BaseSchema.Meta.exclude + (
74
+ "project_id",
75
+ "template_id",
76
+ "base_template_id",
77
+ "sources",
78
+ "source_passages",
79
+ "agent_passages",
80
+ )
@@ -918,13 +918,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
918
918
  # skip if there's a finish
919
919
  return None
920
920
  else:
921
- # Example case that would trigger here:
922
- # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
923
- # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
924
- # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
925
- # model='gpt-4o-mini-2024-07-18'
926
- # object='chat.completion.chunk'
927
- warnings.warn(f"Couldn't find delta in chunk: {chunk}")
921
+ # Only warn for non-Claude models since Claude commonly has empty first chunks
922
+ if not chunk.model.startswith("claude-"):
923
+ # Example case that would trigger here:
924
+ # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
925
+ # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
926
+ # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
927
+ # model='gpt-4o-mini-2024-07-18'
928
+ # object='chat.completion.chunk'
929
+ warnings.warn(f"Couldn't find delta in chunk: {chunk}")
928
930
  return None
929
931
 
930
932
  return processed_chunk
@@ -24,7 +24,7 @@ logger = get_logger(__name__)
24
24
 
25
25
 
26
26
  @router.post(
27
- "/chat/completions",
27
+ "/{agent_id}/chat/completions",
28
28
  response_model=None,
29
29
  operation_id="create_chat_completions",
30
30
  responses={
@@ -37,6 +37,7 @@ logger = get_logger(__name__)
37
37
  },
38
38
  )
39
39
  async def create_chat_completions(
40
+ agent_id: str,
40
41
  completion_request: CompletionCreateParams = Body(...),
41
42
  server: "SyncServer" = Depends(get_letta_server),
42
43
  user_id: Optional[str] = Header(None, alias="user_id"),
@@ -51,12 +52,6 @@ async def create_chat_completions(
51
52
 
52
53
  actor = server.user_manager.get_user_or_default(user_id=user_id)
53
54
 
54
- agent_id = str(completion_request.get("user", None))
55
- if agent_id is None:
56
- error_msg = "Must pass agent_id in the 'user' field"
57
- logger.error(error_msg)
58
- raise HTTPException(status_code=400, detail=error_msg)
59
-
60
55
  letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
61
56
  llm_config = letta_agent.agent_state.llm_config
62
57
  if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
@@ -13,13 +13,12 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
13
13
  from letta.log import get_logger
14
14
  from letta.orm.errors import NoResultFound
15
15
  from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
16
- from letta.schemas.block import Block, BlockUpdate, CreateBlock # , BlockLabelUpdate, BlockLimitUpdate
16
+ from letta.schemas.block import Block, BlockUpdate
17
17
  from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
18
- from letta.schemas.letta_message import LettaMessageUnion
18
+ from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
19
19
  from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
20
20
  from letta.schemas.letta_response import LettaResponse
21
21
  from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
22
- from letta.schemas.message import Message, MessageUpdate
23
22
  from letta.schemas.passage import Passage, PassageUpdate
24
23
  from letta.schemas.run import Run
25
24
  from letta.schemas.source import Source
@@ -119,6 +118,7 @@ async def upload_agent_serialized(
119
118
  True,
120
119
  description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
121
120
  ),
121
+ project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
122
122
  ):
123
123
  """
124
124
  Upload a serialized agent JSON file and recreate the agent in the system.
@@ -129,7 +129,11 @@ async def upload_agent_serialized(
129
129
  serialized_data = await file.read()
130
130
  agent_json = json.loads(serialized_data)
131
131
  new_agent = server.agent_manager.deserialize(
132
- serialized_agent=agent_json, actor=actor, append_copy_suffix=append_copy_suffix, override_existing_tools=override_existing_tools
132
+ serialized_agent=agent_json,
133
+ actor=actor,
134
+ append_copy_suffix=append_copy_suffix,
135
+ override_existing_tools=override_existing_tools,
136
+ project_id=project_id,
133
137
  )
134
138
  return new_agent
135
139
 
@@ -526,20 +530,20 @@ def list_messages(
526
530
  )
527
531
 
528
532
 
529
- @router.patch("/{agent_id}/messages/{message_id}", response_model=Message, operation_id="modify_message")
533
+ @router.patch("/{agent_id}/messages/{message_id}", response_model=LettaMessageUpdateUnion, operation_id="modify_message")
530
534
  def modify_message(
531
535
  agent_id: str,
532
536
  message_id: str,
533
- request: MessageUpdate = Body(...),
537
+ request: LettaMessageUpdateUnion = Body(...),
534
538
  server: "SyncServer" = Depends(get_letta_server),
535
539
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
536
540
  ):
537
541
  """
538
542
  Update the details of a message associated with an agent.
539
543
  """
540
- # TODO: Get rid of agent_id here, it's not really relevant
544
+ # TODO: support modifying tool calls/returns
541
545
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
542
- return server.message_manager.update_message_by_id(message_id=message_id, message_update=request, actor=actor)
546
+ return server.message_manager.update_message_by_letta_message(message_id=message_id, letta_message_update=request, actor=actor)
543
547
 
544
548
 
545
549
  @router.post(