letta-nightly 0.7.10.dev20250506104245__py3-none-any.whl → 0.7.11.dev20250507230415__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +8 -4
  3. letta/agents/letta_agent.py +3 -5
  4. letta/agents/letta_agent_batch.py +2 -4
  5. letta/client/client.py +2 -2
  6. letta/functions/async_composio_toolset.py +106 -0
  7. letta/functions/composio_helpers.py +20 -24
  8. letta/llm_api/anthropic.py +16 -5
  9. letta/llm_api/anthropic_client.py +10 -8
  10. letta/llm_api/google_ai_client.py +12 -10
  11. letta/llm_api/google_vertex_client.py +107 -27
  12. letta/llm_api/llm_api_tools.py +9 -3
  13. letta/llm_api/llm_client.py +9 -11
  14. letta/llm_api/llm_client_base.py +6 -5
  15. letta/llm_api/openai_client.py +6 -6
  16. letta/local_llm/constants.py +1 -0
  17. letta/memory.py +8 -5
  18. letta/orm/provider.py +1 -0
  19. letta/schemas/enums.py +5 -0
  20. letta/schemas/llm_config.py +2 -0
  21. letta/schemas/message.py +3 -3
  22. letta/schemas/providers.py +33 -1
  23. letta/server/rest_api/routers/v1/agents.py +10 -5
  24. letta/server/rest_api/routers/v1/llms.py +16 -6
  25. letta/server/rest_api/routers/v1/providers.py +3 -1
  26. letta/server/rest_api/routers/v1/sources.py +1 -0
  27. letta/server/server.py +58 -24
  28. letta/services/provider_manager.py +11 -8
  29. letta/settings.py +2 -0
  30. {letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/METADATA +1 -1
  31. {letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/RECORD +34 -33
  32. {letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/LICENSE +0 -0
  33. {letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/WHEEL +0 -0
  34. {letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/entry_points.txt +0 -0
@@ -5,16 +5,19 @@ from google import genai
5
5
  from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
6
6
 
7
7
  from letta.helpers.datetime_helpers import get_utc_time_int
8
- from letta.helpers.json_helpers import json_dumps
8
+ from letta.helpers.json_helpers import json_dumps, json_loads
9
9
  from letta.llm_api.google_ai_client import GoogleAIClient
10
10
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
11
11
  from letta.local_llm.utils import count_tokens
12
+ from letta.log import get_logger
12
13
  from letta.schemas.llm_config import LLMConfig
13
14
  from letta.schemas.message import Message as PydanticMessage
14
15
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
15
- from letta.settings import model_settings
16
+ from letta.settings import model_settings, settings
16
17
  from letta.utils import get_tool_call_id
17
18
 
19
+ logger = get_logger(__name__)
20
+
18
21
 
19
22
  class GoogleVertexClient(GoogleAIClient):
20
23
 
@@ -35,6 +38,23 @@ class GoogleVertexClient(GoogleAIClient):
35
38
  )
36
39
  return response.model_dump()
37
40
 
41
+ async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
42
+ """
43
+ Performs underlying request to llm and returns raw response.
44
+ """
45
+ client = genai.Client(
46
+ vertexai=True,
47
+ project=model_settings.google_cloud_project,
48
+ location=model_settings.google_cloud_location,
49
+ http_options={"api_version": "v1"},
50
+ )
51
+ response = await client.aio.models.generate_content(
52
+ model=llm_config.model,
53
+ contents=request_data["contents"],
54
+ config=request_data["config"],
55
+ )
56
+ return response.model_dump()
57
+
38
58
  def build_request_data(
39
59
  self,
40
60
  messages: List[PydanticMessage],
@@ -49,16 +69,21 @@ class GoogleVertexClient(GoogleAIClient):
49
69
  request_data["config"] = request_data.pop("generation_config")
50
70
  request_data["config"]["tools"] = request_data.pop("tools")
51
71
 
52
- tool_names = [t["name"] for t in tools]
53
- tool_config = ToolConfig(
54
- function_calling_config=FunctionCallingConfig(
55
- # ANY mode forces the model to predict only function calls
56
- mode=FunctionCallingConfigMode.ANY,
57
- # Provide the list of tools (though empty should also work, it seems not to)
58
- allowed_function_names=tool_names,
72
+ tool_names = [t["name"] for t in tools] if tools else []
73
+ if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
74
+ request_data["config"]["response_mime_type"] = "application/json"
75
+ request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
76
+ del request_data["config"]["tools"]
77
+ else:
78
+ tool_config = ToolConfig(
79
+ function_calling_config=FunctionCallingConfig(
80
+ # ANY mode forces the model to predict only function calls
81
+ mode=FunctionCallingConfigMode.ANY,
82
+ # Provide the list of tools (though empty should also work, it seems not to)
83
+ allowed_function_names=tool_names,
84
+ )
59
85
  )
60
- )
61
- request_data["config"]["tool_config"] = tool_config.model_dump()
86
+ request_data["config"]["tool_config"] = tool_config.model_dump()
62
87
 
63
88
  # Add thinking_config
64
89
  # If enable_reasoner is False, set thinking_budget to 0
@@ -110,12 +135,16 @@ class GoogleVertexClient(GoogleAIClient):
110
135
  for candidate in response.candidates:
111
136
  content = candidate.content
112
137
 
113
- # if "role" not in content or not content["role"]:
114
- # # This means the response is malformed like MALFORMED_FUNCTION_CALL
115
- # # NOTE: must be a ValueError to trigger a retry
116
- # raise ValueError(f"Error in response data from LLM: {response_data}")
117
- # role = content["role"]
118
- # assert role == "model", f"Unknown role in response: {role}"
138
+ if content.role is None or content.parts is None:
139
+ # This means the response is malformed like MALFORMED_FUNCTION_CALL
140
+ # NOTE: must be a ValueError to trigger a retry
141
+ if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
142
+ raise ValueError(f"Error in response data from LLM: {candidate.finish_message[:350]}...")
143
+ else:
144
+ raise ValueError(f"Error in response data from LLM: {response_data}")
145
+
146
+ role = content.role
147
+ assert role == "model", f"Unknown role in response: {role}"
119
148
 
120
149
  parts = content.parts
121
150
 
@@ -142,10 +171,12 @@ class GoogleVertexClient(GoogleAIClient):
142
171
 
143
172
  # NOTE: this also involves stripping the inner monologue out of the function
144
173
  if llm_config.put_inner_thoughts_in_kwargs:
145
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG
174
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
146
175
 
147
- assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
148
- inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
176
+ assert (
177
+ INNER_THOUGHTS_KWARG_VERTEX in function_args
178
+ ), f"Couldn't find inner thoughts in function args:\n{function_call}"
179
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
149
180
  assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
150
181
  else:
151
182
  inner_thoughts = None
@@ -167,15 +198,50 @@ class GoogleVertexClient(GoogleAIClient):
167
198
  )
168
199
 
169
200
  else:
201
+ try:
202
+ # Structured output tool call
203
+ function_call = json_loads(response_message.text)
204
+ function_name = function_call["name"]
205
+ function_args = function_call["args"]
206
+ assert isinstance(function_args, dict), function_args
170
207
 
171
- # Inner thoughts are the content by default
172
- inner_thoughts = response_message.text
208
+ # NOTE: this also involves stripping the inner monologue out of the function
209
+ if llm_config.put_inner_thoughts_in_kwargs:
210
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
173
211
 
174
- # Google AI API doesn't generate tool call IDs
175
- openai_response_message = Message(
176
- role="assistant", # NOTE: "model" -> "assistant"
177
- content=inner_thoughts,
178
- )
212
+ assert (
213
+ INNER_THOUGHTS_KWARG in function_args
214
+ ), f"Couldn't find inner thoughts in function args:\n{function_call}"
215
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
216
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
217
+ else:
218
+ inner_thoughts = None
219
+
220
+ # Google AI API doesn't generate tool call IDs
221
+ openai_response_message = Message(
222
+ role="assistant", # NOTE: "model" -> "assistant"
223
+ content=inner_thoughts,
224
+ tool_calls=[
225
+ ToolCall(
226
+ id=get_tool_call_id(),
227
+ type="function",
228
+ function=FunctionCall(
229
+ name=function_name,
230
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
231
+ ),
232
+ )
233
+ ],
234
+ )
235
+
236
+ except:
237
+ # Inner thoughts are the content by default
238
+ inner_thoughts = response_message.text
239
+
240
+ # Google AI API doesn't generate tool call IDs
241
+ openai_response_message = Message(
242
+ role="assistant", # NOTE: "model" -> "assistant"
243
+ content=inner_thoughts,
244
+ )
179
245
 
180
246
  # Google AI API uses different finish reason strings than OpenAI
181
247
  # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
@@ -244,3 +310,17 @@ class GoogleVertexClient(GoogleAIClient):
244
310
  )
245
311
  except KeyError as e:
246
312
  raise e
313
+
314
+ def get_function_call_response_schema(self, tool: dict) -> dict:
315
+ return {
316
+ "type": "OBJECT",
317
+ "properties": {
318
+ "name": {"type": "STRING", "enum": [tool["name"]]},
319
+ "args": {
320
+ "type": "OBJECT",
321
+ "properties": tool["parameters"]["properties"],
322
+ "required": tool["parameters"]["required"],
323
+ },
324
+ },
325
+ "required": ["name", "args"],
326
+ }
@@ -24,7 +24,7 @@ from letta.llm_api.openai import (
24
24
  from letta.local_llm.chat_completion_proxy import get_chat_completion
25
25
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
26
26
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
27
- from letta.schemas.enums import ProviderType
27
+ from letta.schemas.enums import ProviderCategory
28
28
  from letta.schemas.llm_config import LLMConfig
29
29
  from letta.schemas.message import Message
30
30
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
@@ -172,10 +172,12 @@ def create(
172
172
  if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
173
173
  # only is a problem if we are *not* using an openai proxy
174
174
  raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
175
- elif llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
175
+ elif llm_config.provider_category == ProviderCategory.byok:
176
176
  from letta.services.provider_manager import ProviderManager
177
+ from letta.services.user_manager import UserManager
177
178
 
178
- api_key = ProviderManager().get_override_key(llm_config.provider_name)
179
+ actor = UserManager().get_user_or_default(user_id=user_id)
180
+ api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
179
181
  elif model_settings.openai_api_key is None:
180
182
  # the openai python client requires a dummy API key
181
183
  api_key = "DUMMY_API_KEY"
@@ -379,7 +381,9 @@ def create(
379
381
  extended_thinking=llm_config.enable_reasoner,
380
382
  max_reasoning_tokens=llm_config.max_reasoning_tokens,
381
383
  provider_name=llm_config.provider_name,
384
+ provider_category=llm_config.provider_category,
382
385
  name=name,
386
+ user_id=user_id,
383
387
  )
384
388
 
385
389
  else:
@@ -390,6 +394,8 @@ def create(
390
394
  extended_thinking=llm_config.enable_reasoner,
391
395
  max_reasoning_tokens=llm_config.max_reasoning_tokens,
392
396
  provider_name=llm_config.provider_name,
397
+ provider_category=llm_config.provider_category,
398
+ user_id=user_id,
393
399
  )
394
400
 
395
401
  if llm_config.put_inner_thoughts_in_kwargs:
@@ -1,8 +1,11 @@
1
- from typing import Optional
1
+ from typing import TYPE_CHECKING, Optional
2
2
 
3
3
  from letta.llm_api.llm_client_base import LLMClientBase
4
4
  from letta.schemas.enums import ProviderType
5
5
 
6
+ if TYPE_CHECKING:
7
+ from letta.orm import User
8
+
6
9
 
7
10
  class LLMClient:
8
11
  """Factory class for creating LLM clients based on the model endpoint type."""
@@ -10,9 +13,8 @@ class LLMClient:
10
13
  @staticmethod
11
14
  def create(
12
15
  provider_type: ProviderType,
13
- provider_name: Optional[str] = None,
14
16
  put_inner_thoughts_first: bool = True,
15
- actor_id: Optional[str] = None,
17
+ actor: Optional["User"] = None,
16
18
  ) -> Optional[LLMClientBase]:
17
19
  """
18
20
  Create an LLM client based on the model endpoint type.
@@ -32,33 +34,29 @@ class LLMClient:
32
34
  from letta.llm_api.google_ai_client import GoogleAIClient
33
35
 
34
36
  return GoogleAIClient(
35
- provider_name=provider_name,
36
37
  put_inner_thoughts_first=put_inner_thoughts_first,
37
- actor_id=actor_id,
38
+ actor=actor,
38
39
  )
39
40
  case ProviderType.google_vertex:
40
41
  from letta.llm_api.google_vertex_client import GoogleVertexClient
41
42
 
42
43
  return GoogleVertexClient(
43
- provider_name=provider_name,
44
44
  put_inner_thoughts_first=put_inner_thoughts_first,
45
- actor_id=actor_id,
45
+ actor=actor,
46
46
  )
47
47
  case ProviderType.anthropic:
48
48
  from letta.llm_api.anthropic_client import AnthropicClient
49
49
 
50
50
  return AnthropicClient(
51
- provider_name=provider_name,
52
51
  put_inner_thoughts_first=put_inner_thoughts_first,
53
- actor_id=actor_id,
52
+ actor=actor,
54
53
  )
55
54
  case ProviderType.openai:
56
55
  from letta.llm_api.openai_client import OpenAIClient
57
56
 
58
57
  return OpenAIClient(
59
- provider_name=provider_name,
60
58
  put_inner_thoughts_first=put_inner_thoughts_first,
61
- actor_id=actor_id,
59
+ actor=actor,
62
60
  )
63
61
  case _:
64
62
  return None
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod
2
- from typing import Dict, List, Optional, Union
2
+ from typing import TYPE_CHECKING, Dict, List, Optional, Union
3
3
 
4
4
  from anthropic.types.beta.messages import BetaMessageBatch
5
5
  from openai import AsyncStream, Stream
@@ -11,6 +11,9 @@ from letta.schemas.message import Message
11
11
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
12
12
  from letta.tracing import log_event
13
13
 
14
+ if TYPE_CHECKING:
15
+ from letta.orm import User
16
+
14
17
 
15
18
  class LLMClientBase:
16
19
  """
@@ -20,13 +23,11 @@ class LLMClientBase:
20
23
 
21
24
  def __init__(
22
25
  self,
23
- provider_name: Optional[str] = None,
24
26
  put_inner_thoughts_first: Optional[bool] = True,
25
27
  use_tool_naming: bool = True,
26
- actor_id: Optional[str] = None,
28
+ actor: Optional["User"] = None,
27
29
  ):
28
- self.actor_id = actor_id
29
- self.provider_name = provider_name
30
+ self.actor = actor
30
31
  self.put_inner_thoughts_first = put_inner_thoughts_first
31
32
  self.use_tool_naming = use_tool_naming
32
33
 
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
22
22
  from letta.llm_api.llm_client_base import LLMClientBase
23
23
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
24
24
  from letta.log import get_logger
25
- from letta.schemas.enums import ProviderType
25
+ from letta.schemas.enums import ProviderCategory
26
26
  from letta.schemas.llm_config import LLMConfig
27
27
  from letta.schemas.message import Message as PydanticMessage
28
28
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -78,10 +78,10 @@ def supports_parallel_tool_calling(model: str) -> bool:
78
78
  class OpenAIClient(LLMClientBase):
79
79
  def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
80
80
  api_key = None
81
- if llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
81
+ if llm_config.provider_category == ProviderCategory.byok:
82
82
  from letta.services.provider_manager import ProviderManager
83
83
 
84
- api_key = ProviderManager().get_override_key(llm_config.provider_name)
84
+ api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
85
85
 
86
86
  if not api_key:
87
87
  api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -156,11 +156,11 @@ class OpenAIClient(LLMClientBase):
156
156
  )
157
157
 
158
158
  # always set user id for openai requests
159
- if self.actor_id:
160
- data.user = self.actor_id
159
+ if self.actor:
160
+ data.user = self.actor.id
161
161
 
162
162
  if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
163
- if not self.actor_id:
163
+ if not self.actor:
164
164
  # override user id for inference.letta.com
165
165
  import uuid
166
166
 
@@ -26,6 +26,7 @@ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
26
26
  DEFAULT_WRAPPER_NAME = "chatml"
27
27
 
28
28
  INNER_THOUGHTS_KWARG = "inner_thoughts"
29
+ INNER_THOUGHTS_KWARG_VERTEX = "thinking"
29
30
  INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
30
31
  INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg."
31
32
  INNER_THOUGHTS_CLI_SYMBOL = "💭"
letta/memory.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Callable, Dict, List
1
+ from typing import TYPE_CHECKING, Callable, Dict, List
2
2
 
3
3
  from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
4
4
  from letta.llm_api.llm_api_tools import create
@@ -13,6 +13,9 @@ from letta.settings import summarizer_settings
13
13
  from letta.tracing import trace_method
14
14
  from letta.utils import count_tokens, printd
15
15
 
16
+ if TYPE_CHECKING:
17
+ from letta.orm import User
18
+
16
19
 
17
20
  def get_memory_functions(cls: Memory) -> Dict[str, Callable]:
18
21
  """Get memory functions for a memory class"""
@@ -51,6 +54,7 @@ def _format_summary_history(message_history: List[Message]):
51
54
  def summarize_messages(
52
55
  agent_state: AgentState,
53
56
  message_sequence_to_summarize: List[Message],
57
+ actor: "User",
54
58
  ):
55
59
  """Summarize a message sequence using GPT"""
56
60
  # we need the context_window
@@ -63,7 +67,7 @@ def summarize_messages(
63
67
  trunc_ratio = (summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns) * 0.8 # For good measure...
64
68
  cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
65
69
  summary_input = str(
66
- [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])]
70
+ [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff], actor=actor)]
67
71
  + message_sequence_to_summarize[cutoff:]
68
72
  )
69
73
 
@@ -79,10 +83,9 @@ def summarize_messages(
79
83
  llm_config_no_inner_thoughts.put_inner_thoughts_in_kwargs = False
80
84
 
81
85
  llm_client = LLMClient.create(
82
- provider_name=llm_config_no_inner_thoughts.provider_name,
83
- provider_type=llm_config_no_inner_thoughts.model_endpoint_type,
86
+ provider_type=agent_state.llm_config.model_endpoint_type,
84
87
  put_inner_thoughts_first=False,
85
- actor_id=agent_state.created_by_id,
88
+ actor=actor,
86
89
  )
87
90
  # try to use new client, otherwise fallback to old flow
88
91
  # TODO: we can just directly call the LLM here?
letta/orm/provider.py CHANGED
@@ -26,6 +26,7 @@ class Provider(SqlalchemyBase, OrganizationMixin):
26
26
 
27
27
  name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
28
28
  provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
29
+ provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
29
30
  api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
30
31
  base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")
31
32
 
letta/schemas/enums.py CHANGED
@@ -19,6 +19,11 @@ class ProviderType(str, Enum):
19
19
  bedrock = "bedrock"
20
20
 
21
21
 
22
+ class ProviderCategory(str, Enum):
23
+ base = "base"
24
+ byok = "byok"
25
+
26
+
22
27
  class MessageRole(str, Enum):
23
28
  assistant = "assistant"
24
29
  user = "user"
@@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator
4
4
 
5
5
  from letta.constants import LETTA_MODEL_ENDPOINT
6
6
  from letta.log import get_logger
7
+ from letta.schemas.enums import ProviderCategory
7
8
 
8
9
  logger = get_logger(__name__)
9
10
 
@@ -51,6 +52,7 @@ class LLMConfig(BaseModel):
51
52
  ] = Field(..., description="The endpoint type for the model.")
52
53
  model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
53
54
  provider_name: Optional[str] = Field(None, description="The provider name for the model.")
55
+ provider_category: Optional[ProviderCategory] = Field(None, description="The provider category for the model.")
54
56
  model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
55
57
  context_window: int = Field(..., description="The context window size for the model.")
56
58
  put_inner_thoughts_in_kwargs: Optional[bool] = Field(
letta/schemas/message.py CHANGED
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, field_validator
16
16
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, TOOL_CALL_ID_MAX_LEN
17
17
  from letta.helpers.datetime_helpers import get_utc_time, is_utc_datetime
18
18
  from letta.helpers.json_helpers import json_dumps
19
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG
19
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_VERTEX
20
20
  from letta.schemas.enums import MessageRole
21
21
  from letta.schemas.letta_base import OrmMetadataBase
22
22
  from letta.schemas.letta_message import (
@@ -914,9 +914,9 @@ class Message(BaseMessage):
914
914
  function_args = {"args": function_args}
915
915
 
916
916
  if put_inner_thoughts_in_kwargs and text_content is not None:
917
- assert "inner_thoughts" not in function_args, function_args
917
+ assert INNER_THOUGHTS_KWARG not in function_args, function_args
918
918
  assert len(self.tool_calls) == 1
919
- function_args[INNER_THOUGHTS_KWARG] = text_content
919
+ function_args[INNER_THOUGHTS_KWARG_VERTEX] = text_content
920
920
 
921
921
  parts.append(
922
922
  {