letta-nightly 0.11.7.dev20251007104119__py3-none-any.whl → 0.12.0.dev20251009104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. letta/__init__.py +1 -1
  2. letta/adapters/letta_llm_adapter.py +1 -0
  3. letta/adapters/letta_llm_request_adapter.py +0 -1
  4. letta/adapters/letta_llm_stream_adapter.py +7 -2
  5. letta/adapters/simple_llm_request_adapter.py +88 -0
  6. letta/adapters/simple_llm_stream_adapter.py +192 -0
  7. letta/agents/agent_loop.py +6 -0
  8. letta/agents/ephemeral_summary_agent.py +2 -1
  9. letta/agents/helpers.py +142 -6
  10. letta/agents/letta_agent.py +13 -33
  11. letta/agents/letta_agent_batch.py +2 -4
  12. letta/agents/letta_agent_v2.py +87 -77
  13. letta/agents/letta_agent_v3.py +927 -0
  14. letta/agents/voice_agent.py +2 -6
  15. letta/constants.py +8 -4
  16. letta/database_utils.py +161 -0
  17. letta/errors.py +40 -0
  18. letta/functions/function_sets/base.py +84 -4
  19. letta/functions/function_sets/multi_agent.py +0 -3
  20. letta/functions/schema_generator.py +113 -71
  21. letta/groups/dynamic_multi_agent.py +3 -2
  22. letta/groups/helpers.py +1 -2
  23. letta/groups/round_robin_multi_agent.py +3 -2
  24. letta/groups/sleeptime_multi_agent.py +3 -2
  25. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  26. letta/groups/sleeptime_multi_agent_v3.py +17 -17
  27. letta/groups/supervisor_multi_agent.py +84 -80
  28. letta/helpers/converters.py +3 -0
  29. letta/helpers/message_helper.py +4 -0
  30. letta/helpers/tool_rule_solver.py +92 -5
  31. letta/interfaces/anthropic_streaming_interface.py +409 -0
  32. letta/interfaces/gemini_streaming_interface.py +296 -0
  33. letta/interfaces/openai_streaming_interface.py +752 -1
  34. letta/llm_api/anthropic_client.py +127 -16
  35. letta/llm_api/bedrock_client.py +4 -2
  36. letta/llm_api/deepseek_client.py +4 -1
  37. letta/llm_api/google_vertex_client.py +124 -42
  38. letta/llm_api/groq_client.py +4 -1
  39. letta/llm_api/llm_api_tools.py +11 -4
  40. letta/llm_api/llm_client_base.py +6 -2
  41. letta/llm_api/openai.py +32 -2
  42. letta/llm_api/openai_client.py +423 -18
  43. letta/llm_api/xai_client.py +4 -1
  44. letta/main.py +9 -5
  45. letta/memory.py +1 -0
  46. letta/orm/__init__.py +2 -1
  47. letta/orm/agent.py +10 -0
  48. letta/orm/block.py +7 -16
  49. letta/orm/blocks_agents.py +8 -2
  50. letta/orm/files_agents.py +2 -0
  51. letta/orm/job.py +7 -5
  52. letta/orm/mcp_oauth.py +1 -0
  53. letta/orm/message.py +21 -6
  54. letta/orm/organization.py +2 -0
  55. letta/orm/provider.py +6 -2
  56. letta/orm/run.py +71 -0
  57. letta/orm/run_metrics.py +82 -0
  58. letta/orm/sandbox_config.py +7 -1
  59. letta/orm/sqlalchemy_base.py +0 -306
  60. letta/orm/step.py +6 -5
  61. letta/orm/step_metrics.py +5 -5
  62. letta/otel/tracing.py +28 -3
  63. letta/plugins/defaults.py +4 -4
  64. letta/prompts/system_prompts/__init__.py +2 -0
  65. letta/prompts/system_prompts/letta_v1.py +25 -0
  66. letta/schemas/agent.py +3 -2
  67. letta/schemas/agent_file.py +9 -3
  68. letta/schemas/block.py +23 -10
  69. letta/schemas/enums.py +21 -2
  70. letta/schemas/job.py +17 -4
  71. letta/schemas/letta_message_content.py +71 -2
  72. letta/schemas/letta_stop_reason.py +5 -5
  73. letta/schemas/llm_config.py +53 -3
  74. letta/schemas/memory.py +1 -1
  75. letta/schemas/message.py +564 -117
  76. letta/schemas/openai/responses_request.py +64 -0
  77. letta/schemas/providers/__init__.py +2 -0
  78. letta/schemas/providers/anthropic.py +16 -0
  79. letta/schemas/providers/ollama.py +115 -33
  80. letta/schemas/providers/openrouter.py +52 -0
  81. letta/schemas/providers/vllm.py +2 -1
  82. letta/schemas/run.py +48 -42
  83. letta/schemas/run_metrics.py +21 -0
  84. letta/schemas/step.py +2 -2
  85. letta/schemas/step_metrics.py +1 -1
  86. letta/schemas/tool.py +15 -107
  87. letta/schemas/tool_rule.py +88 -5
  88. letta/serialize_schemas/marshmallow_agent.py +1 -0
  89. letta/server/db.py +79 -408
  90. letta/server/rest_api/app.py +61 -10
  91. letta/server/rest_api/dependencies.py +14 -0
  92. letta/server/rest_api/redis_stream_manager.py +19 -8
  93. letta/server/rest_api/routers/v1/agents.py +364 -292
  94. letta/server/rest_api/routers/v1/blocks.py +14 -20
  95. letta/server/rest_api/routers/v1/identities.py +45 -110
  96. letta/server/rest_api/routers/v1/internal_templates.py +21 -0
  97. letta/server/rest_api/routers/v1/jobs.py +23 -6
  98. letta/server/rest_api/routers/v1/messages.py +1 -1
  99. letta/server/rest_api/routers/v1/runs.py +149 -99
  100. letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
  101. letta/server/rest_api/routers/v1/tools.py +281 -594
  102. letta/server/rest_api/routers/v1/voice.py +1 -1
  103. letta/server/rest_api/streaming_response.py +29 -29
  104. letta/server/rest_api/utils.py +122 -64
  105. letta/server/server.py +160 -887
  106. letta/services/agent_manager.py +236 -919
  107. letta/services/agent_serialization_manager.py +16 -0
  108. letta/services/archive_manager.py +0 -100
  109. letta/services/block_manager.py +211 -168
  110. letta/services/context_window_calculator/token_counter.py +1 -1
  111. letta/services/file_manager.py +1 -1
  112. letta/services/files_agents_manager.py +24 -33
  113. letta/services/group_manager.py +0 -142
  114. letta/services/helpers/agent_manager_helper.py +7 -2
  115. letta/services/helpers/run_manager_helper.py +69 -0
  116. letta/services/job_manager.py +96 -411
  117. letta/services/lettuce/__init__.py +6 -0
  118. letta/services/lettuce/lettuce_client_base.py +86 -0
  119. letta/services/mcp_manager.py +38 -6
  120. letta/services/message_manager.py +165 -362
  121. letta/services/organization_manager.py +0 -36
  122. letta/services/passage_manager.py +0 -345
  123. letta/services/provider_manager.py +0 -80
  124. letta/services/run_manager.py +364 -0
  125. letta/services/sandbox_config_manager.py +0 -234
  126. letta/services/step_manager.py +62 -39
  127. letta/services/summarizer/summarizer.py +9 -7
  128. letta/services/telemetry_manager.py +0 -16
  129. letta/services/tool_executor/builtin_tool_executor.py +35 -0
  130. letta/services/tool_executor/core_tool_executor.py +397 -2
  131. letta/services/tool_executor/files_tool_executor.py +3 -3
  132. letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
  133. letta/services/tool_executor/tool_execution_manager.py +6 -8
  134. letta/services/tool_executor/tool_executor_base.py +3 -3
  135. letta/services/tool_manager.py +85 -339
  136. letta/services/tool_sandbox/base.py +24 -13
  137. letta/services/tool_sandbox/e2b_sandbox.py +16 -1
  138. letta/services/tool_schema_generator.py +123 -0
  139. letta/services/user_manager.py +0 -99
  140. letta/settings.py +20 -4
  141. letta/system.py +5 -1
  142. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/METADATA +3 -5
  143. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/RECORD +146 -135
  144. letta/agents/temporal/activities/__init__.py +0 -4
  145. letta/agents/temporal/activities/example_activity.py +0 -7
  146. letta/agents/temporal/activities/prepare_messages.py +0 -10
  147. letta/agents/temporal/temporal_agent_workflow.py +0 -56
  148. letta/agents/temporal/types.py +0 -25
  149. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/WHEEL +0 -0
  150. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/entry_points.txt +0 -0
  151. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,7 @@ from anthropic.types.beta.message_create_params import MessageCreateParamsNonStr
10
10
  from anthropic.types.beta.messages import BetaMessageBatch
11
11
  from anthropic.types.beta.messages.batch_create_params import Request
12
12
 
13
- from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
13
+ from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE, REQUEST_HEARTBEAT_PARAM
14
14
  from letta.errors import (
15
15
  ContextWindowExceededError,
16
16
  ErrorCode,
@@ -31,6 +31,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
31
31
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
32
32
  from letta.log import get_logger
33
33
  from letta.otel.tracing import trace_method
34
+ from letta.schemas.agent import AgentType
34
35
  from letta.schemas.llm_config import LLMConfig
35
36
  from letta.schemas.message import Message as PydanticMessage
36
37
  from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
@@ -54,15 +55,46 @@ class AnthropicClient(LLMClientBase):
54
55
  @deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
55
56
  def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
56
57
  client = self._get_anthropic_client(llm_config, async_client=False)
57
- response = client.beta.messages.create(**request_data)
58
+ betas: list[str] = []
59
+ # 1M context beta for Sonnet 4/4.5 when enabled
60
+ try:
61
+ from letta.settings import model_settings
62
+
63
+ if model_settings.anthropic_sonnet_1m and (
64
+ llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
65
+ ):
66
+ betas.append("context-1m-2025-08-07")
67
+ except Exception:
68
+ pass
69
+
70
+ if betas:
71
+ response = client.beta.messages.create(**request_data, betas=betas)
72
+ else:
73
+ response = client.beta.messages.create(**request_data)
58
74
  return response.model_dump()
59
75
 
60
76
  @trace_method
61
77
  async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
62
78
  client = await self._get_anthropic_client_async(llm_config, async_client=True)
63
79
 
80
+ betas: list[str] = []
81
+ # interleaved thinking for reasoner
64
82
  if llm_config.enable_reasoner:
65
- response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"])
83
+ betas.append("interleaved-thinking-2025-05-14")
84
+
85
+ # 1M context beta for Sonnet 4/4.5 when enabled
86
+ try:
87
+ from letta.settings import model_settings
88
+
89
+ if model_settings.anthropic_sonnet_1m and (
90
+ llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
91
+ ):
92
+ betas.append("context-1m-2025-08-07")
93
+ except Exception:
94
+ pass
95
+
96
+ if betas:
97
+ response = await client.beta.messages.create(**request_data, betas=betas)
66
98
  else:
67
99
  response = await client.beta.messages.create(**request_data)
68
100
 
@@ -83,11 +115,23 @@ class AnthropicClient(LLMClientBase):
83
115
  if llm_config.enable_reasoner:
84
116
  betas.append("interleaved-thinking-2025-05-14")
85
117
 
118
+ # 1M context beta for Sonnet 4/4.5 when enabled
119
+ try:
120
+ from letta.settings import model_settings
121
+
122
+ if model_settings.anthropic_sonnet_1m and (
123
+ llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
124
+ ):
125
+ betas.append("context-1m-2025-08-07")
126
+ except Exception:
127
+ pass
128
+
86
129
  return await client.beta.messages.create(**request_data, betas=betas)
87
130
 
88
131
  @trace_method
89
132
  async def send_llm_batch_request_async(
90
133
  self,
134
+ agent_type: AgentType,
91
135
  agent_messages_mapping: Dict[str, List[PydanticMessage]],
92
136
  agent_tools_mapping: Dict[str, List[dict]],
93
137
  agent_llm_config_mapping: Dict[str, LLMConfig],
@@ -114,6 +158,7 @@ class AnthropicClient(LLMClientBase):
114
158
  try:
115
159
  requests = {
116
160
  agent_id: self.build_request_data(
161
+ agent_type=agent_type,
117
162
  messages=agent_messages_mapping[agent_id],
118
163
  llm_config=agent_llm_config_mapping[agent_id],
119
164
  tools=agent_tools_mapping[agent_id],
@@ -175,14 +220,19 @@ class AnthropicClient(LLMClientBase):
175
220
  @trace_method
176
221
  def build_request_data(
177
222
  self,
223
+ agent_type: AgentType, # if react, use native content + strip heartbeats
178
224
  messages: List[PydanticMessage],
179
225
  llm_config: LLMConfig,
180
226
  tools: Optional[List[dict]] = None,
181
227
  force_tool_call: Optional[str] = None,
228
+ requires_subsequent_tool_call: bool = False,
182
229
  ) -> dict:
183
230
  # TODO: This needs to get cleaned up. The logic here is pretty confusing.
184
231
  # TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
185
- prefix_fill = True
232
+ prefix_fill = True if agent_type != AgentType.letta_v1_agent else False
233
+ is_v1 = agent_type == AgentType.letta_v1_agent
234
+ # Determine local behavior for putting inner thoughts in kwargs without mutating llm_config
235
+ put_kwargs = bool(llm_config.put_inner_thoughts_in_kwargs) and not is_v1
186
236
  if not self.use_tool_naming:
187
237
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
188
238
 
@@ -222,8 +272,9 @@ class AnthropicClient(LLMClientBase):
222
272
  # Special case for summarization path
223
273
  tools_for_request = None
224
274
  tool_choice = None
225
- elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
275
+ elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
226
276
  # NOTE: reasoning models currently do not allow for `any`
277
+ # NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
227
278
  tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
228
279
  tools_for_request = [OpenAITool(function=f) for f in tools]
229
280
  elif force_tool_call is not None:
@@ -231,11 +282,17 @@ class AnthropicClient(LLMClientBase):
231
282
  tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
232
283
 
233
284
  # need to have this setting to be able to put inner thoughts in kwargs
234
- if not llm_config.put_inner_thoughts_in_kwargs:
235
- logger.warning(
236
- f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
237
- )
238
- llm_config.put_inner_thoughts_in_kwargs = True
285
+ if not put_kwargs:
286
+ if is_v1:
287
+ # For v1 agents, native content is used and kwargs must remain disabled to avoid conflicts
288
+ logger.warning(
289
+ "Forced tool call requested but inner_thoughts_in_kwargs is disabled for v1 agent; proceeding without inner thoughts in kwargs."
290
+ )
291
+ else:
292
+ logger.warning(
293
+ f"Force enabling inner thoughts in kwargs for Claude due to forced tool call: {force_tool_call} (local override only)"
294
+ )
295
+ put_kwargs = True
239
296
  else:
240
297
  tool_choice = {"type": "any", "disable_parallel_tool_use": True}
241
298
  tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
@@ -246,7 +303,7 @@ class AnthropicClient(LLMClientBase):
246
303
 
247
304
  # Add inner thoughts kwarg
248
305
  # TODO: Can probably make this more efficient
249
- if tools_for_request and len(tools_for_request) > 0 and llm_config.put_inner_thoughts_in_kwargs:
306
+ if tools_for_request and len(tools_for_request) > 0 and put_kwargs:
250
307
  tools_with_inner_thoughts = add_inner_thoughts_to_functions(
251
308
  functions=[t.function.model_dump() for t in tools_for_request],
252
309
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -268,8 +325,12 @@ class AnthropicClient(LLMClientBase):
268
325
  data["system"] = self._add_cache_control_to_system_message(system_content)
269
326
  data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
270
327
  messages=messages[1:],
328
+ current_model=llm_config.model,
271
329
  inner_thoughts_xml_tag=inner_thoughts_xml_tag,
272
- put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
330
+ put_inner_thoughts_in_kwargs=put_kwargs,
331
+ # if react, use native content + strip heartbeats
332
+ native_content=is_v1,
333
+ strip_request_heartbeat=is_v1,
273
334
  )
274
335
 
275
336
  # Ensure first message is user
@@ -279,15 +340,27 @@ class AnthropicClient(LLMClientBase):
279
340
  # Handle alternating messages
280
341
  data["messages"] = merge_tool_results_into_user_messages(data["messages"])
281
342
 
282
- # Strip heartbeat pings if extended thinking
283
- if llm_config.enable_reasoner:
284
- data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
343
+ if agent_type == AgentType.letta_v1_agent:
344
+ # Both drop heartbeats in the payload
345
+ data["messages"] = drop_heartbeats(data["messages"])
346
+ # And drop heartbeats in the tools
347
+ if "tools" in data:
348
+ for tool in data["tools"]:
349
+ tool["input_schema"]["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
350
+ if "required" in tool["input_schema"] and REQUEST_HEARTBEAT_PARAM in tool["input_schema"]["required"]:
351
+ # NOTE: required is not always present
352
+ tool["input_schema"]["required"].remove(REQUEST_HEARTBEAT_PARAM)
353
+
354
+ else:
355
+ # Strip heartbeat pings if extended thinking
356
+ if llm_config.enable_reasoner:
357
+ data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
285
358
 
286
359
  # Prefix fill
287
360
  # https://docs.anthropic.com/en/api/messages#body-messages
288
361
  # NOTE: cannot prefill with tools for opus:
289
362
  # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
290
- if prefix_fill and not llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
363
+ if prefix_fill and not put_kwargs and "opus" not in data["model"]:
291
364
  data["messages"].append(
292
365
  # Start the thinking process for the assistant
293
366
  {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
@@ -716,6 +789,44 @@ def is_heartbeat(message: dict, is_ping: bool = False) -> bool:
716
789
  return False
717
790
 
718
791
 
792
+ def drop_heartbeats(messages: List[dict]):
793
+ cleaned_messages = []
794
+
795
+ # Loop through messages
796
+ # For messages with role 'user' and len(content) > 1,
797
+ # Check if content[0].type == 'tool_result'
798
+ # If so, iterate over content[1:] and while content.type == 'text' and is_heartbeat(content.text),
799
+ # merge into content[0].content
800
+
801
+ for message in messages:
802
+ if "role" in message and "content" in message and message["role"] == "user":
803
+ content_parts = message["content"]
804
+
805
+ if isinstance(content_parts, str):
806
+ if is_heartbeat({"role": "user", "content": content_parts}):
807
+ continue
808
+ elif isinstance(content_parts, list) and len(content_parts) == 1 and "text" in content_parts[0]:
809
+ if is_heartbeat({"role": "user", "content": content_parts[0]["text"]}):
810
+ continue # skip
811
+ else:
812
+ cleaned_parts = []
813
+ # Drop all the parts
814
+ for content_part in content_parts:
815
+ if "text" in content_part and is_heartbeat({"role": "user", "content": content_part["text"]}):
816
+ continue # skip
817
+ else:
818
+ cleaned_parts.append(content_part)
819
+
820
+ if len(cleaned_parts) == 0:
821
+ continue
822
+ else:
823
+ message["content"] = cleaned_parts
824
+
825
+ cleaned_messages.append(message)
826
+
827
+ return cleaned_messages
828
+
829
+
719
830
  def merge_heartbeats_into_tool_responses(messages: List[dict]):
720
831
  """For extended thinking mode, we don't want anything other than tool responses in-between assistant actions
721
832
 
@@ -6,7 +6,7 @@ from aioboto3.session import Session
6
6
  from letta.llm_api.anthropic_client import AnthropicClient
7
7
  from letta.log import get_logger
8
8
  from letta.otel.tracing import trace_method
9
- from letta.schemas.enums import ProviderCategory
9
+ from letta.schemas.enums import AgentType, ProviderCategory
10
10
  from letta.schemas.llm_config import LLMConfig
11
11
  from letta.schemas.message import Message as PydanticMessage
12
12
  from letta.services.provider_manager import ProviderManager
@@ -65,12 +65,14 @@ class BedrockClient(AnthropicClient):
65
65
  @trace_method
66
66
  def build_request_data(
67
67
  self,
68
+ agent_type: AgentType,
68
69
  messages: List[PydanticMessage],
69
70
  llm_config: LLMConfig,
70
71
  tools: Optional[List[dict]] = None,
71
72
  force_tool_call: Optional[str] = None,
73
+ requires_subsequent_tool_call: bool = False,
72
74
  ) -> dict:
73
- data = super().build_request_data(messages, llm_config, tools, force_tool_call)
75
+ data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
74
76
  # remove disallowed fields
75
77
  if "tool_choice" in data:
76
78
  del data["tool_choice"]["disable_parallel_tool_use"]
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
10
10
 
11
11
  from letta.llm_api.openai_client import OpenAIClient
12
12
  from letta.otel.tracing import trace_method
13
+ from letta.schemas.enums import AgentType
13
14
  from letta.schemas.llm_config import LLMConfig
14
15
  from letta.schemas.message import Message as PydanticMessage
15
16
  from letta.schemas.openai.chat_completion_request import (
@@ -331,15 +332,17 @@ class DeepseekClient(OpenAIClient):
331
332
  @trace_method
332
333
  def build_request_data(
333
334
  self,
335
+ agent_type: AgentType,
334
336
  messages: List[PydanticMessage],
335
337
  llm_config: LLMConfig,
336
338
  tools: Optional[List[dict]] = None,
337
339
  force_tool_call: Optional[str] = None,
340
+ requires_subsequent_tool_call: bool = False,
338
341
  ) -> dict:
339
342
  # Override put_inner_thoughts_in_kwargs to False for DeepSeek
340
343
  llm_config.put_inner_thoughts_in_kwargs = False
341
344
 
342
- data = super().build_request_data(messages, llm_config, tools, force_tool_call)
345
+ data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
343
346
 
344
347
  def add_functions_to_system_message(system_message: ChatMessage):
345
348
  system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
@@ -1,6 +1,7 @@
1
+ import base64
1
2
  import json
2
3
  import uuid
3
- from typing import List, Optional
4
+ from typing import AsyncIterator, List, Optional
4
5
 
5
6
  from google import genai
6
7
  from google.genai import errors
@@ -34,6 +35,7 @@ from letta.local_llm.json_parser import clean_json_string_extra_backslash
34
35
  from letta.local_llm.utils import count_tokens
35
36
  from letta.log import get_logger
36
37
  from letta.otel.tracing import trace_method
38
+ from letta.schemas.agent import AgentType
37
39
  from letta.schemas.llm_config import LLMConfig
38
40
  from letta.schemas.message import Message as PydanticMessage
39
41
  from letta.schemas.openai.chat_completion_request import Tool
@@ -137,6 +139,15 @@ class GoogleVertexClient(LLMClientBase):
137
139
  raise RuntimeError("Failed to get response data after all retries")
138
140
  return response_data
139
141
 
142
+ @trace_method
143
+ async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
144
+ client = self._get_client()
145
+ return await client.aio.models.generate_content_stream(
146
+ model=llm_config.model,
147
+ contents=request_data["contents"],
148
+ config=request_data["config"],
149
+ )
150
+
140
151
  @staticmethod
141
152
  def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
142
153
  """Google AI API requires all function call returns are immediately followed by a 'model' role message.
@@ -274,14 +285,19 @@ class GoogleVertexClient(LLMClientBase):
274
285
  @trace_method
275
286
  def build_request_data(
276
287
  self,
288
+ agent_type: AgentType, # if react, use native content + strip heartbeats
277
289
  messages: List[PydanticMessage],
278
290
  llm_config: LLMConfig,
279
291
  tools: List[dict],
280
292
  force_tool_call: Optional[str] = None,
293
+ requires_subsequent_tool_call: bool = False,
281
294
  ) -> dict:
282
295
  """
283
296
  Constructs a request object in the expected data format for this client.
284
297
  """
298
+ # NOTE: forcing inner thoughts in kwargs off
299
+ if agent_type == AgentType.letta_v1_agent:
300
+ llm_config.put_inner_thoughts_in_kwargs = False
285
301
 
286
302
  if tools:
287
303
  tool_objs = [Tool(type="function", function=t) for t in tools]
@@ -293,7 +309,12 @@ class GoogleVertexClient(LLMClientBase):
293
309
  tool_names = []
294
310
 
295
311
  contents = self.add_dummy_model_messages(
296
- PydanticMessage.to_google_dicts_from_list(messages),
312
+ PydanticMessage.to_google_dicts_from_list(
313
+ messages,
314
+ current_model=llm_config.model,
315
+ put_inner_thoughts_in_kwargs=False if agent_type == AgentType.letta_v1_agent else True,
316
+ native_content=True if agent_type == AgentType.letta_v1_agent else False,
317
+ ),
297
318
  )
298
319
 
299
320
  request_data = {
@@ -312,16 +333,42 @@ class GoogleVertexClient(LLMClientBase):
312
333
  request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
313
334
  del request_data["config"]["tools"]
314
335
  elif tools:
315
- tool_config = ToolConfig(
316
- function_calling_config=FunctionCallingConfig(
317
- # ANY mode forces the model to predict only function calls
318
- mode=FunctionCallingConfigMode.ANY,
319
- # Provide the list of tools (though empty should also work, it seems not to)
320
- allowed_function_names=tool_names,
336
+ if agent_type == AgentType.letta_v1_agent:
337
+ # don't require tools
338
+ tool_call_mode = FunctionCallingConfigMode.AUTO
339
+ tool_config = ToolConfig(
340
+ function_calling_config=FunctionCallingConfig(
341
+ mode=tool_call_mode,
342
+ )
321
343
  )
322
- )
344
+ else:
345
+ # require tools
346
+ tool_call_mode = FunctionCallingConfigMode.ANY
347
+ tool_config = ToolConfig(
348
+ function_calling_config=FunctionCallingConfig(
349
+ mode=tool_call_mode,
350
+ # Provide the list of tools (though empty should also work, it seems not to)
351
+ allowed_function_names=tool_names,
352
+ )
353
+ )
354
+
323
355
  request_data["config"]["tool_config"] = tool_config.model_dump()
324
356
 
357
+ # https://ai.google.dev/gemini-api/docs/thinking#set-budget
358
+ # 2.5 Pro
359
+ # - Default: dynamic thinking
360
+ # - Dynamic thinking that cannot be disabled
361
+ # - Range: -1 (for dynamic), or 128-32768
362
+ # 2.5 Flash
363
+ # - Default: dynamic thinking
364
+ # - Dynamic thinking that *can* be disabled
365
+ # - Range: -1, 0, or 0-24576
366
+ # 2.5 Flash Lite
367
+ # - Default: no thinking
368
+ # - Dynamic thinking that *can* be disabled
369
+ # - Range: -1, 0, or 512-24576
370
+ # TODO when using v3 agent loop, properly support the native thinking in Gemini
371
+
325
372
  # Add thinking_config for flash
326
373
  # If enable_reasoner is False, set thinking_budget to 0
327
374
  # Otherwise, use the value from max_reasoning_tokens
@@ -334,6 +381,7 @@ class GoogleVertexClient(LLMClientBase):
334
381
  )
335
382
  thinking_config = ThinkingConfig(
336
383
  thinking_budget=(thinking_budget),
384
+ include_thoughts=(thinking_budget > 1),
337
385
  )
338
386
  request_data["config"]["thinking_config"] = thinking_config.model_dump()
339
387
 
@@ -395,13 +443,15 @@ class GoogleVertexClient(LLMClientBase):
395
443
  # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
396
444
  # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
397
445
  # To patch this, if we have multiple parts we can take the last one
398
- if len(parts) > 1:
446
+ if len(parts) > 1 and not llm_config.enable_reasoner:
399
447
  logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
448
+ # only truncate if reasoning is off
400
449
  parts = [parts[-1]]
401
450
 
402
451
  # TODO support parts / multimodal
403
452
  # TODO support parallel tool calling natively
404
453
  # TODO Alternative here is to throw away everything else except for the first part
454
+ openai_response_message = None
405
455
  for response_message in parts:
406
456
  # Convert the actual message style to OpenAI style
407
457
  if response_message.function_call:
@@ -410,8 +460,10 @@ class GoogleVertexClient(LLMClientBase):
410
460
  function_args = function_call.args
411
461
  assert isinstance(function_args, dict), function_args
412
462
 
413
- # NOTE: this also involves stripping the inner monologue out of the function
463
+ # TODO this is kind of funky - really, we should be passing 'native_content' as a kwarg to fork behavior
464
+ inner_thoughts = response_message.text
414
465
  if llm_config.put_inner_thoughts_in_kwargs:
466
+ # NOTE: this also involves stripping the inner monologue out of the function
415
467
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
416
468
 
417
469
  assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
@@ -420,25 +472,44 @@ class GoogleVertexClient(LLMClientBase):
420
472
  inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
421
473
  assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
422
474
  else:
423
- inner_thoughts = None
475
+ pass
476
+ # inner_thoughts = None
477
+ # inner_thoughts = response_message.text
424
478
 
425
479
  # Google AI API doesn't generate tool call IDs
426
- openai_response_message = Message(
427
- role="assistant", # NOTE: "model" -> "assistant"
428
- content=inner_thoughts,
429
- tool_calls=[
430
- ToolCall(
431
- id=get_tool_call_id(),
432
- type="function",
433
- function=FunctionCall(
434
- name=function_name,
435
- arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
436
- ),
437
- )
438
- ],
480
+ tool_call = ToolCall(
481
+ id=get_tool_call_id(),
482
+ type="function",
483
+ function=FunctionCall(
484
+ name=function_name,
485
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
486
+ ),
439
487
  )
440
488
 
489
+ if openai_response_message is None:
490
+ openai_response_message = Message(
491
+ role="assistant", # NOTE: "model" -> "assistant"
492
+ content=inner_thoughts,
493
+ tool_calls=[tool_call],
494
+ )
495
+ else:
496
+ openai_response_message.content = inner_thoughts
497
+ if openai_response_message.tool_calls is None:
498
+ openai_response_message.tool_calls = []
499
+ openai_response_message.tool_calls.append(tool_call)
500
+ if response_message.thought_signature:
501
+ thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
502
+ openai_response_message.reasoning_content_signature = thought_signature
503
+
441
504
  else:
505
+ if response_message.thought:
506
+ if openai_response_message is None:
507
+ openai_response_message = Message(
508
+ role="assistant", # NOTE: "model" -> "assistant"
509
+ reasoning_content=response_message.text,
510
+ )
511
+ else:
512
+ openai_response_message.reasoning_content = response_message.text
442
513
  try:
443
514
  # Structured output tool call
444
515
  function_call = json_loads(response_message.text)
@@ -459,20 +530,25 @@ class GoogleVertexClient(LLMClientBase):
459
530
  inner_thoughts = None
460
531
 
461
532
  # Google AI API doesn't generate tool call IDs
462
- openai_response_message = Message(
463
- role="assistant", # NOTE: "model" -> "assistant"
464
- content=inner_thoughts,
465
- tool_calls=[
466
- ToolCall(
467
- id=get_tool_call_id(),
468
- type="function",
469
- function=FunctionCall(
470
- name=function_name,
471
- arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
472
- ),
473
- )
474
- ],
533
+ tool_call = ToolCall(
534
+ id=get_tool_call_id(),
535
+ type="function",
536
+ function=FunctionCall(
537
+ name=function_name,
538
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
539
+ ),
475
540
  )
541
+ if openai_response_message is None:
542
+ openai_response_message = Message(
543
+ role="assistant", # NOTE: "model" -> "assistant"
544
+ content=inner_thoughts,
545
+ tool_calls=[tool_call],
546
+ )
547
+ else:
548
+ openai_response_message.content = inner_thoughts
549
+ if openai_response_message.tool_calls is None:
550
+ openai_response_message.tool_calls = []
551
+ openai_response_message.tool_calls.append(tool_call)
476
552
 
477
553
  except json.decoder.JSONDecodeError:
478
554
  if candidate.finish_reason == "MAX_TOKENS":
@@ -481,10 +557,16 @@ class GoogleVertexClient(LLMClientBase):
481
557
  inner_thoughts = response_message.text
482
558
 
483
559
  # Google AI API doesn't generate tool call IDs
484
- openai_response_message = Message(
485
- role="assistant", # NOTE: "model" -> "assistant"
486
- content=inner_thoughts,
487
- )
560
+ if openai_response_message is None:
561
+ openai_response_message = Message(
562
+ role="assistant", # NOTE: "model" -> "assistant"
563
+ content=inner_thoughts,
564
+ )
565
+ else:
566
+ openai_response_message.content = inner_thoughts
567
+ if response_message.thought_signature:
568
+ thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
569
+ openai_response_message.reasoning_content_signature = thought_signature
488
570
 
489
571
  # Google AI API uses different finish reason strings than OpenAI
490
572
  # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
@@ -8,6 +8,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
8
8
  from letta.llm_api.openai_client import OpenAIClient
9
9
  from letta.otel.tracing import trace_method
10
10
  from letta.schemas.embedding_config import EmbeddingConfig
11
+ from letta.schemas.enums import AgentType
11
12
  from letta.schemas.llm_config import LLMConfig
12
13
  from letta.schemas.message import Message as PydanticMessage
13
14
  from letta.settings import model_settings
@@ -23,12 +24,14 @@ class GroqClient(OpenAIClient):
23
24
  @trace_method
24
25
  def build_request_data(
25
26
  self,
27
+ agent_type: AgentType,
26
28
  messages: List[PydanticMessage],
27
29
  llm_config: LLMConfig,
28
30
  tools: Optional[List[dict]] = None,
29
31
  force_tool_call: Optional[str] = None,
32
+ requires_subsequent_tool_call: bool = False,
30
33
  ) -> dict:
31
- data = super().build_request_data(messages, llm_config, tools, force_tool_call)
34
+ data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
32
35
 
33
36
  # Groq validation - these fields are not supported and will cause 400 errors
34
37
  # https://console.groq.com/docs/openai
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import os
2
3
  import random
3
4
  import time
4
5
  from typing import List, Optional, Union
@@ -174,11 +175,17 @@ def create(
174
175
 
175
176
  actor = UserManager().get_user_or_default(user_id=user_id)
176
177
  api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
177
- elif model_settings.openai_api_key is None:
178
- # the openai python client requires a dummy API key
179
- api_key = "DUMMY_API_KEY"
180
178
  else:
181
- api_key = model_settings.openai_api_key
179
+ # Prefer OpenRouter key when targeting OpenRouter
180
+ is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
181
+ llm_config.provider_name == "openrouter"
182
+ )
183
+ if is_openrouter:
184
+ api_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
185
+ if not is_openrouter or not api_key:
186
+ api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
187
+ # the openai python client requires some API key string
188
+ api_key = api_key or "DUMMY_API_KEY"
182
189
 
183
190
  if function_call is None and functions is not None and len(functions) > 0:
184
191
  # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice