letta-nightly 0.11.7.dev20251007104119__py3-none-any.whl → 0.12.0.dev20251009104148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +927 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/database_utils.py +161 -0
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +127 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +124 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +2 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/run_metrics.py +82 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +564 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/run_metrics.py +21 -0
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +79 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +149 -99
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/context_window_calculator/token_counter.py +1 -1
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +69 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +364 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- letta/system.py +5 -1
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/RECORD +146 -135
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,7 @@ from anthropic.types.beta.message_create_params import MessageCreateParamsNonStr
|
|
10
10
|
from anthropic.types.beta.messages import BetaMessageBatch
|
11
11
|
from anthropic.types.beta.messages.batch_create_params import Request
|
12
12
|
|
13
|
-
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
13
|
+
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE, REQUEST_HEARTBEAT_PARAM
|
14
14
|
from letta.errors import (
|
15
15
|
ContextWindowExceededError,
|
16
16
|
ErrorCode,
|
@@ -31,6 +31,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
31
31
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
32
32
|
from letta.log import get_logger
|
33
33
|
from letta.otel.tracing import trace_method
|
34
|
+
from letta.schemas.agent import AgentType
|
34
35
|
from letta.schemas.llm_config import LLMConfig
|
35
36
|
from letta.schemas.message import Message as PydanticMessage
|
36
37
|
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
@@ -54,15 +55,46 @@ class AnthropicClient(LLMClientBase):
|
|
54
55
|
@deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
|
55
56
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
56
57
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
57
|
-
|
58
|
+
betas: list[str] = []
|
59
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
60
|
+
try:
|
61
|
+
from letta.settings import model_settings
|
62
|
+
|
63
|
+
if model_settings.anthropic_sonnet_1m and (
|
64
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
65
|
+
):
|
66
|
+
betas.append("context-1m-2025-08-07")
|
67
|
+
except Exception:
|
68
|
+
pass
|
69
|
+
|
70
|
+
if betas:
|
71
|
+
response = client.beta.messages.create(**request_data, betas=betas)
|
72
|
+
else:
|
73
|
+
response = client.beta.messages.create(**request_data)
|
58
74
|
return response.model_dump()
|
59
75
|
|
60
76
|
@trace_method
|
61
77
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
62
78
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
63
79
|
|
80
|
+
betas: list[str] = []
|
81
|
+
# interleaved thinking for reasoner
|
64
82
|
if llm_config.enable_reasoner:
|
65
|
-
|
83
|
+
betas.append("interleaved-thinking-2025-05-14")
|
84
|
+
|
85
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
86
|
+
try:
|
87
|
+
from letta.settings import model_settings
|
88
|
+
|
89
|
+
if model_settings.anthropic_sonnet_1m and (
|
90
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
91
|
+
):
|
92
|
+
betas.append("context-1m-2025-08-07")
|
93
|
+
except Exception:
|
94
|
+
pass
|
95
|
+
|
96
|
+
if betas:
|
97
|
+
response = await client.beta.messages.create(**request_data, betas=betas)
|
66
98
|
else:
|
67
99
|
response = await client.beta.messages.create(**request_data)
|
68
100
|
|
@@ -83,11 +115,23 @@ class AnthropicClient(LLMClientBase):
|
|
83
115
|
if llm_config.enable_reasoner:
|
84
116
|
betas.append("interleaved-thinking-2025-05-14")
|
85
117
|
|
118
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
119
|
+
try:
|
120
|
+
from letta.settings import model_settings
|
121
|
+
|
122
|
+
if model_settings.anthropic_sonnet_1m and (
|
123
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
124
|
+
):
|
125
|
+
betas.append("context-1m-2025-08-07")
|
126
|
+
except Exception:
|
127
|
+
pass
|
128
|
+
|
86
129
|
return await client.beta.messages.create(**request_data, betas=betas)
|
87
130
|
|
88
131
|
@trace_method
|
89
132
|
async def send_llm_batch_request_async(
|
90
133
|
self,
|
134
|
+
agent_type: AgentType,
|
91
135
|
agent_messages_mapping: Dict[str, List[PydanticMessage]],
|
92
136
|
agent_tools_mapping: Dict[str, List[dict]],
|
93
137
|
agent_llm_config_mapping: Dict[str, LLMConfig],
|
@@ -114,6 +158,7 @@ class AnthropicClient(LLMClientBase):
|
|
114
158
|
try:
|
115
159
|
requests = {
|
116
160
|
agent_id: self.build_request_data(
|
161
|
+
agent_type=agent_type,
|
117
162
|
messages=agent_messages_mapping[agent_id],
|
118
163
|
llm_config=agent_llm_config_mapping[agent_id],
|
119
164
|
tools=agent_tools_mapping[agent_id],
|
@@ -175,14 +220,19 @@ class AnthropicClient(LLMClientBase):
|
|
175
220
|
@trace_method
|
176
221
|
def build_request_data(
|
177
222
|
self,
|
223
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
178
224
|
messages: List[PydanticMessage],
|
179
225
|
llm_config: LLMConfig,
|
180
226
|
tools: Optional[List[dict]] = None,
|
181
227
|
force_tool_call: Optional[str] = None,
|
228
|
+
requires_subsequent_tool_call: bool = False,
|
182
229
|
) -> dict:
|
183
230
|
# TODO: This needs to get cleaned up. The logic here is pretty confusing.
|
184
231
|
# TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
|
185
|
-
prefix_fill = True
|
232
|
+
prefix_fill = True if agent_type != AgentType.letta_v1_agent else False
|
233
|
+
is_v1 = agent_type == AgentType.letta_v1_agent
|
234
|
+
# Determine local behavior for putting inner thoughts in kwargs without mutating llm_config
|
235
|
+
put_kwargs = bool(llm_config.put_inner_thoughts_in_kwargs) and not is_v1
|
186
236
|
if not self.use_tool_naming:
|
187
237
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
188
238
|
|
@@ -222,8 +272,9 @@ class AnthropicClient(LLMClientBase):
|
|
222
272
|
# Special case for summarization path
|
223
273
|
tools_for_request = None
|
224
274
|
tool_choice = None
|
225
|
-
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
275
|
+
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
|
226
276
|
# NOTE: reasoning models currently do not allow for `any`
|
277
|
+
# NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
|
227
278
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
228
279
|
tools_for_request = [OpenAITool(function=f) for f in tools]
|
229
280
|
elif force_tool_call is not None:
|
@@ -231,11 +282,17 @@ class AnthropicClient(LLMClientBase):
|
|
231
282
|
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
232
283
|
|
233
284
|
# need to have this setting to be able to put inner thoughts in kwargs
|
234
|
-
if not
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
285
|
+
if not put_kwargs:
|
286
|
+
if is_v1:
|
287
|
+
# For v1 agents, native content is used and kwargs must remain disabled to avoid conflicts
|
288
|
+
logger.warning(
|
289
|
+
"Forced tool call requested but inner_thoughts_in_kwargs is disabled for v1 agent; proceeding without inner thoughts in kwargs."
|
290
|
+
)
|
291
|
+
else:
|
292
|
+
logger.warning(
|
293
|
+
f"Force enabling inner thoughts in kwargs for Claude due to forced tool call: {force_tool_call} (local override only)"
|
294
|
+
)
|
295
|
+
put_kwargs = True
|
239
296
|
else:
|
240
297
|
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
241
298
|
tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
|
@@ -246,7 +303,7 @@ class AnthropicClient(LLMClientBase):
|
|
246
303
|
|
247
304
|
# Add inner thoughts kwarg
|
248
305
|
# TODO: Can probably make this more efficient
|
249
|
-
if tools_for_request and len(tools_for_request) > 0 and
|
306
|
+
if tools_for_request and len(tools_for_request) > 0 and put_kwargs:
|
250
307
|
tools_with_inner_thoughts = add_inner_thoughts_to_functions(
|
251
308
|
functions=[t.function.model_dump() for t in tools_for_request],
|
252
309
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
@@ -268,8 +325,12 @@ class AnthropicClient(LLMClientBase):
|
|
268
325
|
data["system"] = self._add_cache_control_to_system_message(system_content)
|
269
326
|
data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
|
270
327
|
messages=messages[1:],
|
328
|
+
current_model=llm_config.model,
|
271
329
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
272
|
-
put_inner_thoughts_in_kwargs=
|
330
|
+
put_inner_thoughts_in_kwargs=put_kwargs,
|
331
|
+
# if react, use native content + strip heartbeats
|
332
|
+
native_content=is_v1,
|
333
|
+
strip_request_heartbeat=is_v1,
|
273
334
|
)
|
274
335
|
|
275
336
|
# Ensure first message is user
|
@@ -279,15 +340,27 @@ class AnthropicClient(LLMClientBase):
|
|
279
340
|
# Handle alternating messages
|
280
341
|
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
281
342
|
|
282
|
-
|
283
|
-
|
284
|
-
data["messages"] =
|
343
|
+
if agent_type == AgentType.letta_v1_agent:
|
344
|
+
# Both drop heartbeats in the payload
|
345
|
+
data["messages"] = drop_heartbeats(data["messages"])
|
346
|
+
# And drop heartbeats in the tools
|
347
|
+
if "tools" in data:
|
348
|
+
for tool in data["tools"]:
|
349
|
+
tool["input_schema"]["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
|
350
|
+
if "required" in tool["input_schema"] and REQUEST_HEARTBEAT_PARAM in tool["input_schema"]["required"]:
|
351
|
+
# NOTE: required is not always present
|
352
|
+
tool["input_schema"]["required"].remove(REQUEST_HEARTBEAT_PARAM)
|
353
|
+
|
354
|
+
else:
|
355
|
+
# Strip heartbeat pings if extended thinking
|
356
|
+
if llm_config.enable_reasoner:
|
357
|
+
data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
|
285
358
|
|
286
359
|
# Prefix fill
|
287
360
|
# https://docs.anthropic.com/en/api/messages#body-messages
|
288
361
|
# NOTE: cannot prefill with tools for opus:
|
289
362
|
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
290
|
-
if prefix_fill and not
|
363
|
+
if prefix_fill and not put_kwargs and "opus" not in data["model"]:
|
291
364
|
data["messages"].append(
|
292
365
|
# Start the thinking process for the assistant
|
293
366
|
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
@@ -716,6 +789,44 @@ def is_heartbeat(message: dict, is_ping: bool = False) -> bool:
|
|
716
789
|
return False
|
717
790
|
|
718
791
|
|
792
|
+
def drop_heartbeats(messages: List[dict]):
|
793
|
+
cleaned_messages = []
|
794
|
+
|
795
|
+
# Loop through messages
|
796
|
+
# For messages with role 'user' and len(content) > 1,
|
797
|
+
# Check if content[0].type == 'tool_result'
|
798
|
+
# If so, iterate over content[1:] and while content.type == 'text' and is_heartbeat(content.text),
|
799
|
+
# merge into content[0].content
|
800
|
+
|
801
|
+
for message in messages:
|
802
|
+
if "role" in message and "content" in message and message["role"] == "user":
|
803
|
+
content_parts = message["content"]
|
804
|
+
|
805
|
+
if isinstance(content_parts, str):
|
806
|
+
if is_heartbeat({"role": "user", "content": content_parts}):
|
807
|
+
continue
|
808
|
+
elif isinstance(content_parts, list) and len(content_parts) == 1 and "text" in content_parts[0]:
|
809
|
+
if is_heartbeat({"role": "user", "content": content_parts[0]["text"]}):
|
810
|
+
continue # skip
|
811
|
+
else:
|
812
|
+
cleaned_parts = []
|
813
|
+
# Drop all the parts
|
814
|
+
for content_part in content_parts:
|
815
|
+
if "text" in content_part and is_heartbeat({"role": "user", "content": content_part["text"]}):
|
816
|
+
continue # skip
|
817
|
+
else:
|
818
|
+
cleaned_parts.append(content_part)
|
819
|
+
|
820
|
+
if len(cleaned_parts) == 0:
|
821
|
+
continue
|
822
|
+
else:
|
823
|
+
message["content"] = cleaned_parts
|
824
|
+
|
825
|
+
cleaned_messages.append(message)
|
826
|
+
|
827
|
+
return cleaned_messages
|
828
|
+
|
829
|
+
|
719
830
|
def merge_heartbeats_into_tool_responses(messages: List[dict]):
|
720
831
|
"""For extended thinking mode, we don't want anything other than tool responses in-between assistant actions
|
721
832
|
|
letta/llm_api/bedrock_client.py
CHANGED
@@ -6,7 +6,7 @@ from aioboto3.session import Session
|
|
6
6
|
from letta.llm_api.anthropic_client import AnthropicClient
|
7
7
|
from letta.log import get_logger
|
8
8
|
from letta.otel.tracing import trace_method
|
9
|
-
from letta.schemas.enums import ProviderCategory
|
9
|
+
from letta.schemas.enums import AgentType, ProviderCategory
|
10
10
|
from letta.schemas.llm_config import LLMConfig
|
11
11
|
from letta.schemas.message import Message as PydanticMessage
|
12
12
|
from letta.services.provider_manager import ProviderManager
|
@@ -65,12 +65,14 @@ class BedrockClient(AnthropicClient):
|
|
65
65
|
@trace_method
|
66
66
|
def build_request_data(
|
67
67
|
self,
|
68
|
+
agent_type: AgentType,
|
68
69
|
messages: List[PydanticMessage],
|
69
70
|
llm_config: LLMConfig,
|
70
71
|
tools: Optional[List[dict]] = None,
|
71
72
|
force_tool_call: Optional[str] = None,
|
73
|
+
requires_subsequent_tool_call: bool = False,
|
72
74
|
) -> dict:
|
73
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
75
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
74
76
|
# remove disallowed fields
|
75
77
|
if "tool_choice" in data:
|
76
78
|
del data["tool_choice"]["disable_parallel_tool_use"]
|
letta/llm_api/deepseek_client.py
CHANGED
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
10
10
|
|
11
11
|
from letta.llm_api.openai_client import OpenAIClient
|
12
12
|
from letta.otel.tracing import trace_method
|
13
|
+
from letta.schemas.enums import AgentType
|
13
14
|
from letta.schemas.llm_config import LLMConfig
|
14
15
|
from letta.schemas.message import Message as PydanticMessage
|
15
16
|
from letta.schemas.openai.chat_completion_request import (
|
@@ -331,15 +332,17 @@ class DeepseekClient(OpenAIClient):
|
|
331
332
|
@trace_method
|
332
333
|
def build_request_data(
|
333
334
|
self,
|
335
|
+
agent_type: AgentType,
|
334
336
|
messages: List[PydanticMessage],
|
335
337
|
llm_config: LLMConfig,
|
336
338
|
tools: Optional[List[dict]] = None,
|
337
339
|
force_tool_call: Optional[str] = None,
|
340
|
+
requires_subsequent_tool_call: bool = False,
|
338
341
|
) -> dict:
|
339
342
|
# Override put_inner_thoughts_in_kwargs to False for DeepSeek
|
340
343
|
llm_config.put_inner_thoughts_in_kwargs = False
|
341
344
|
|
342
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
345
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
343
346
|
|
344
347
|
def add_functions_to_system_message(system_message: ChatMessage):
|
345
348
|
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
|
@@ -1,6 +1,7 @@
|
|
1
|
+
import base64
|
1
2
|
import json
|
2
3
|
import uuid
|
3
|
-
from typing import List, Optional
|
4
|
+
from typing import AsyncIterator, List, Optional
|
4
5
|
|
5
6
|
from google import genai
|
6
7
|
from google.genai import errors
|
@@ -34,6 +35,7 @@ from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
34
35
|
from letta.local_llm.utils import count_tokens
|
35
36
|
from letta.log import get_logger
|
36
37
|
from letta.otel.tracing import trace_method
|
38
|
+
from letta.schemas.agent import AgentType
|
37
39
|
from letta.schemas.llm_config import LLMConfig
|
38
40
|
from letta.schemas.message import Message as PydanticMessage
|
39
41
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -137,6 +139,15 @@ class GoogleVertexClient(LLMClientBase):
|
|
137
139
|
raise RuntimeError("Failed to get response data after all retries")
|
138
140
|
return response_data
|
139
141
|
|
142
|
+
@trace_method
|
143
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
|
144
|
+
client = self._get_client()
|
145
|
+
return await client.aio.models.generate_content_stream(
|
146
|
+
model=llm_config.model,
|
147
|
+
contents=request_data["contents"],
|
148
|
+
config=request_data["config"],
|
149
|
+
)
|
150
|
+
|
140
151
|
@staticmethod
|
141
152
|
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
|
142
153
|
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
@@ -274,14 +285,19 @@ class GoogleVertexClient(LLMClientBase):
|
|
274
285
|
@trace_method
|
275
286
|
def build_request_data(
|
276
287
|
self,
|
288
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
277
289
|
messages: List[PydanticMessage],
|
278
290
|
llm_config: LLMConfig,
|
279
291
|
tools: List[dict],
|
280
292
|
force_tool_call: Optional[str] = None,
|
293
|
+
requires_subsequent_tool_call: bool = False,
|
281
294
|
) -> dict:
|
282
295
|
"""
|
283
296
|
Constructs a request object in the expected data format for this client.
|
284
297
|
"""
|
298
|
+
# NOTE: forcing inner thoughts in kwargs off
|
299
|
+
if agent_type == AgentType.letta_v1_agent:
|
300
|
+
llm_config.put_inner_thoughts_in_kwargs = False
|
285
301
|
|
286
302
|
if tools:
|
287
303
|
tool_objs = [Tool(type="function", function=t) for t in tools]
|
@@ -293,7 +309,12 @@ class GoogleVertexClient(LLMClientBase):
|
|
293
309
|
tool_names = []
|
294
310
|
|
295
311
|
contents = self.add_dummy_model_messages(
|
296
|
-
PydanticMessage.to_google_dicts_from_list(
|
312
|
+
PydanticMessage.to_google_dicts_from_list(
|
313
|
+
messages,
|
314
|
+
current_model=llm_config.model,
|
315
|
+
put_inner_thoughts_in_kwargs=False if agent_type == AgentType.letta_v1_agent else True,
|
316
|
+
native_content=True if agent_type == AgentType.letta_v1_agent else False,
|
317
|
+
),
|
297
318
|
)
|
298
319
|
|
299
320
|
request_data = {
|
@@ -312,16 +333,42 @@ class GoogleVertexClient(LLMClientBase):
|
|
312
333
|
request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
|
313
334
|
del request_data["config"]["tools"]
|
314
335
|
elif tools:
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
336
|
+
if agent_type == AgentType.letta_v1_agent:
|
337
|
+
# don't require tools
|
338
|
+
tool_call_mode = FunctionCallingConfigMode.AUTO
|
339
|
+
tool_config = ToolConfig(
|
340
|
+
function_calling_config=FunctionCallingConfig(
|
341
|
+
mode=tool_call_mode,
|
342
|
+
)
|
321
343
|
)
|
322
|
-
|
344
|
+
else:
|
345
|
+
# require tools
|
346
|
+
tool_call_mode = FunctionCallingConfigMode.ANY
|
347
|
+
tool_config = ToolConfig(
|
348
|
+
function_calling_config=FunctionCallingConfig(
|
349
|
+
mode=tool_call_mode,
|
350
|
+
# Provide the list of tools (though empty should also work, it seems not to)
|
351
|
+
allowed_function_names=tool_names,
|
352
|
+
)
|
353
|
+
)
|
354
|
+
|
323
355
|
request_data["config"]["tool_config"] = tool_config.model_dump()
|
324
356
|
|
357
|
+
# https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
358
|
+
# 2.5 Pro
|
359
|
+
# - Default: dynamic thinking
|
360
|
+
# - Dynamic thinking that cannot be disabled
|
361
|
+
# - Range: -1 (for dynamic), or 128-32768
|
362
|
+
# 2.5 Flash
|
363
|
+
# - Default: dynamic thinking
|
364
|
+
# - Dynamic thinking that *can* be disabled
|
365
|
+
# - Range: -1, 0, or 0-24576
|
366
|
+
# 2.5 Flash Lite
|
367
|
+
# - Default: no thinking
|
368
|
+
# - Dynamic thinking that *can* be disabled
|
369
|
+
# - Range: -1, 0, or 512-24576
|
370
|
+
# TODO when using v3 agent loop, properly support the native thinking in Gemini
|
371
|
+
|
325
372
|
# Add thinking_config for flash
|
326
373
|
# If enable_reasoner is False, set thinking_budget to 0
|
327
374
|
# Otherwise, use the value from max_reasoning_tokens
|
@@ -334,6 +381,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
334
381
|
)
|
335
382
|
thinking_config = ThinkingConfig(
|
336
383
|
thinking_budget=(thinking_budget),
|
384
|
+
include_thoughts=(thinking_budget > 1),
|
337
385
|
)
|
338
386
|
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
339
387
|
|
@@ -395,13 +443,15 @@ class GoogleVertexClient(LLMClientBase):
|
|
395
443
|
# NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
|
396
444
|
# {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
|
397
445
|
# To patch this, if we have multiple parts we can take the last one
|
398
|
-
if len(parts) > 1:
|
446
|
+
if len(parts) > 1 and not llm_config.enable_reasoner:
|
399
447
|
logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
|
448
|
+
# only truncate if reasoning is off
|
400
449
|
parts = [parts[-1]]
|
401
450
|
|
402
451
|
# TODO support parts / multimodal
|
403
452
|
# TODO support parallel tool calling natively
|
404
453
|
# TODO Alternative here is to throw away everything else except for the first part
|
454
|
+
openai_response_message = None
|
405
455
|
for response_message in parts:
|
406
456
|
# Convert the actual message style to OpenAI style
|
407
457
|
if response_message.function_call:
|
@@ -410,8 +460,10 @@ class GoogleVertexClient(LLMClientBase):
|
|
410
460
|
function_args = function_call.args
|
411
461
|
assert isinstance(function_args, dict), function_args
|
412
462
|
|
413
|
-
#
|
463
|
+
# TODO this is kind of funky - really, we should be passing 'native_content' as a kwarg to fork behavior
|
464
|
+
inner_thoughts = response_message.text
|
414
465
|
if llm_config.put_inner_thoughts_in_kwargs:
|
466
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
415
467
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
416
468
|
|
417
469
|
assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
|
@@ -420,25 +472,44 @@ class GoogleVertexClient(LLMClientBase):
|
|
420
472
|
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
421
473
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
422
474
|
else:
|
423
|
-
|
475
|
+
pass
|
476
|
+
# inner_thoughts = None
|
477
|
+
# inner_thoughts = response_message.text
|
424
478
|
|
425
479
|
# Google AI API doesn't generate tool call IDs
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
function=FunctionCall(
|
434
|
-
name=function_name,
|
435
|
-
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
436
|
-
),
|
437
|
-
)
|
438
|
-
],
|
480
|
+
tool_call = ToolCall(
|
481
|
+
id=get_tool_call_id(),
|
482
|
+
type="function",
|
483
|
+
function=FunctionCall(
|
484
|
+
name=function_name,
|
485
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
486
|
+
),
|
439
487
|
)
|
440
488
|
|
489
|
+
if openai_response_message is None:
|
490
|
+
openai_response_message = Message(
|
491
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
492
|
+
content=inner_thoughts,
|
493
|
+
tool_calls=[tool_call],
|
494
|
+
)
|
495
|
+
else:
|
496
|
+
openai_response_message.content = inner_thoughts
|
497
|
+
if openai_response_message.tool_calls is None:
|
498
|
+
openai_response_message.tool_calls = []
|
499
|
+
openai_response_message.tool_calls.append(tool_call)
|
500
|
+
if response_message.thought_signature:
|
501
|
+
thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
|
502
|
+
openai_response_message.reasoning_content_signature = thought_signature
|
503
|
+
|
441
504
|
else:
|
505
|
+
if response_message.thought:
|
506
|
+
if openai_response_message is None:
|
507
|
+
openai_response_message = Message(
|
508
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
509
|
+
reasoning_content=response_message.text,
|
510
|
+
)
|
511
|
+
else:
|
512
|
+
openai_response_message.reasoning_content = response_message.text
|
442
513
|
try:
|
443
514
|
# Structured output tool call
|
444
515
|
function_call = json_loads(response_message.text)
|
@@ -459,20 +530,25 @@ class GoogleVertexClient(LLMClientBase):
|
|
459
530
|
inner_thoughts = None
|
460
531
|
|
461
532
|
# Google AI API doesn't generate tool call IDs
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
function=FunctionCall(
|
470
|
-
name=function_name,
|
471
|
-
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
472
|
-
),
|
473
|
-
)
|
474
|
-
],
|
533
|
+
tool_call = ToolCall(
|
534
|
+
id=get_tool_call_id(),
|
535
|
+
type="function",
|
536
|
+
function=FunctionCall(
|
537
|
+
name=function_name,
|
538
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
539
|
+
),
|
475
540
|
)
|
541
|
+
if openai_response_message is None:
|
542
|
+
openai_response_message = Message(
|
543
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
544
|
+
content=inner_thoughts,
|
545
|
+
tool_calls=[tool_call],
|
546
|
+
)
|
547
|
+
else:
|
548
|
+
openai_response_message.content = inner_thoughts
|
549
|
+
if openai_response_message.tool_calls is None:
|
550
|
+
openai_response_message.tool_calls = []
|
551
|
+
openai_response_message.tool_calls.append(tool_call)
|
476
552
|
|
477
553
|
except json.decoder.JSONDecodeError:
|
478
554
|
if candidate.finish_reason == "MAX_TOKENS":
|
@@ -481,10 +557,16 @@ class GoogleVertexClient(LLMClientBase):
|
|
481
557
|
inner_thoughts = response_message.text
|
482
558
|
|
483
559
|
# Google AI API doesn't generate tool call IDs
|
484
|
-
openai_response_message
|
485
|
-
|
486
|
-
|
487
|
-
|
560
|
+
if openai_response_message is None:
|
561
|
+
openai_response_message = Message(
|
562
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
563
|
+
content=inner_thoughts,
|
564
|
+
)
|
565
|
+
else:
|
566
|
+
openai_response_message.content = inner_thoughts
|
567
|
+
if response_message.thought_signature:
|
568
|
+
thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
|
569
|
+
openai_response_message.reasoning_content_signature = thought_signature
|
488
570
|
|
489
571
|
# Google AI API uses different finish reason strings than OpenAI
|
490
572
|
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
letta/llm_api/groq_client.py
CHANGED
@@ -8,6 +8,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
8
8
|
from letta.llm_api.openai_client import OpenAIClient
|
9
9
|
from letta.otel.tracing import trace_method
|
10
10
|
from letta.schemas.embedding_config import EmbeddingConfig
|
11
|
+
from letta.schemas.enums import AgentType
|
11
12
|
from letta.schemas.llm_config import LLMConfig
|
12
13
|
from letta.schemas.message import Message as PydanticMessage
|
13
14
|
from letta.settings import model_settings
|
@@ -23,12 +24,14 @@ class GroqClient(OpenAIClient):
|
|
23
24
|
@trace_method
|
24
25
|
def build_request_data(
|
25
26
|
self,
|
27
|
+
agent_type: AgentType,
|
26
28
|
messages: List[PydanticMessage],
|
27
29
|
llm_config: LLMConfig,
|
28
30
|
tools: Optional[List[dict]] = None,
|
29
31
|
force_tool_call: Optional[str] = None,
|
32
|
+
requires_subsequent_tool_call: bool = False,
|
30
33
|
) -> dict:
|
31
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
34
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
32
35
|
|
33
36
|
# Groq validation - these fields are not supported and will cause 400 errors
|
34
37
|
# https://console.groq.com/docs/openai
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import os
|
2
3
|
import random
|
3
4
|
import time
|
4
5
|
from typing import List, Optional, Union
|
@@ -174,11 +175,17 @@ def create(
|
|
174
175
|
|
175
176
|
actor = UserManager().get_user_or_default(user_id=user_id)
|
176
177
|
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
|
177
|
-
elif model_settings.openai_api_key is None:
|
178
|
-
# the openai python client requires a dummy API key
|
179
|
-
api_key = "DUMMY_API_KEY"
|
180
178
|
else:
|
181
|
-
|
179
|
+
# Prefer OpenRouter key when targeting OpenRouter
|
180
|
+
is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
|
181
|
+
llm_config.provider_name == "openrouter"
|
182
|
+
)
|
183
|
+
if is_openrouter:
|
184
|
+
api_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
|
185
|
+
if not is_openrouter or not api_key:
|
186
|
+
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
187
|
+
# the openai python client requires some API key string
|
188
|
+
api_key = api_key or "DUMMY_API_KEY"
|
182
189
|
|
183
190
|
if function_call is None and functions is not None and len(functions) > 0:
|
184
191
|
# force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
|