letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +899 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +126 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +123 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +1 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +504 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +86 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +126 -85
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +85 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +301 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,7 @@ from anthropic.types.beta.message_create_params import MessageCreateParamsNonStr
|
|
10
10
|
from anthropic.types.beta.messages import BetaMessageBatch
|
11
11
|
from anthropic.types.beta.messages.batch_create_params import Request
|
12
12
|
|
13
|
-
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
13
|
+
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE, REQUEST_HEARTBEAT_PARAM
|
14
14
|
from letta.errors import (
|
15
15
|
ContextWindowExceededError,
|
16
16
|
ErrorCode,
|
@@ -31,6 +31,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
31
31
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
32
32
|
from letta.log import get_logger
|
33
33
|
from letta.otel.tracing import trace_method
|
34
|
+
from letta.schemas.agent import AgentType
|
34
35
|
from letta.schemas.llm_config import LLMConfig
|
35
36
|
from letta.schemas.message import Message as PydanticMessage
|
36
37
|
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
@@ -54,15 +55,46 @@ class AnthropicClient(LLMClientBase):
|
|
54
55
|
@deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
|
55
56
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
56
57
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
57
|
-
|
58
|
+
betas: list[str] = []
|
59
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
60
|
+
try:
|
61
|
+
from letta.settings import model_settings
|
62
|
+
|
63
|
+
if model_settings.anthropic_sonnet_1m and (
|
64
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
65
|
+
):
|
66
|
+
betas.append("context-1m-2025-08-07")
|
67
|
+
except Exception:
|
68
|
+
pass
|
69
|
+
|
70
|
+
if betas:
|
71
|
+
response = client.beta.messages.create(**request_data, betas=betas)
|
72
|
+
else:
|
73
|
+
response = client.beta.messages.create(**request_data)
|
58
74
|
return response.model_dump()
|
59
75
|
|
60
76
|
@trace_method
|
61
77
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
62
78
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
63
79
|
|
80
|
+
betas: list[str] = []
|
81
|
+
# interleaved thinking for reasoner
|
64
82
|
if llm_config.enable_reasoner:
|
65
|
-
|
83
|
+
betas.append("interleaved-thinking-2025-05-14")
|
84
|
+
|
85
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
86
|
+
try:
|
87
|
+
from letta.settings import model_settings
|
88
|
+
|
89
|
+
if model_settings.anthropic_sonnet_1m and (
|
90
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
91
|
+
):
|
92
|
+
betas.append("context-1m-2025-08-07")
|
93
|
+
except Exception:
|
94
|
+
pass
|
95
|
+
|
96
|
+
if betas:
|
97
|
+
response = await client.beta.messages.create(**request_data, betas=betas)
|
66
98
|
else:
|
67
99
|
response = await client.beta.messages.create(**request_data)
|
68
100
|
|
@@ -83,11 +115,23 @@ class AnthropicClient(LLMClientBase):
|
|
83
115
|
if llm_config.enable_reasoner:
|
84
116
|
betas.append("interleaved-thinking-2025-05-14")
|
85
117
|
|
118
|
+
# 1M context beta for Sonnet 4/4.5 when enabled
|
119
|
+
try:
|
120
|
+
from letta.settings import model_settings
|
121
|
+
|
122
|
+
if model_settings.anthropic_sonnet_1m and (
|
123
|
+
llm_config.model.startswith("claude-sonnet-4") or llm_config.model.startswith("claude-sonnet-4-5")
|
124
|
+
):
|
125
|
+
betas.append("context-1m-2025-08-07")
|
126
|
+
except Exception:
|
127
|
+
pass
|
128
|
+
|
86
129
|
return await client.beta.messages.create(**request_data, betas=betas)
|
87
130
|
|
88
131
|
@trace_method
|
89
132
|
async def send_llm_batch_request_async(
|
90
133
|
self,
|
134
|
+
agent_type: AgentType,
|
91
135
|
agent_messages_mapping: Dict[str, List[PydanticMessage]],
|
92
136
|
agent_tools_mapping: Dict[str, List[dict]],
|
93
137
|
agent_llm_config_mapping: Dict[str, LLMConfig],
|
@@ -114,6 +158,7 @@ class AnthropicClient(LLMClientBase):
|
|
114
158
|
try:
|
115
159
|
requests = {
|
116
160
|
agent_id: self.build_request_data(
|
161
|
+
agent_type=agent_type,
|
117
162
|
messages=agent_messages_mapping[agent_id],
|
118
163
|
llm_config=agent_llm_config_mapping[agent_id],
|
119
164
|
tools=agent_tools_mapping[agent_id],
|
@@ -175,14 +220,19 @@ class AnthropicClient(LLMClientBase):
|
|
175
220
|
@trace_method
|
176
221
|
def build_request_data(
|
177
222
|
self,
|
223
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
178
224
|
messages: List[PydanticMessage],
|
179
225
|
llm_config: LLMConfig,
|
180
226
|
tools: Optional[List[dict]] = None,
|
181
227
|
force_tool_call: Optional[str] = None,
|
228
|
+
requires_subsequent_tool_call: bool = False,
|
182
229
|
) -> dict:
|
183
230
|
# TODO: This needs to get cleaned up. The logic here is pretty confusing.
|
184
231
|
# TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
|
185
|
-
prefix_fill = True
|
232
|
+
prefix_fill = True if agent_type != AgentType.letta_v1_agent else False
|
233
|
+
is_v1 = agent_type == AgentType.letta_v1_agent
|
234
|
+
# Determine local behavior for putting inner thoughts in kwargs without mutating llm_config
|
235
|
+
put_kwargs = bool(llm_config.put_inner_thoughts_in_kwargs) and not is_v1
|
186
236
|
if not self.use_tool_naming:
|
187
237
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
188
238
|
|
@@ -222,8 +272,9 @@ class AnthropicClient(LLMClientBase):
|
|
222
272
|
# Special case for summarization path
|
223
273
|
tools_for_request = None
|
224
274
|
tool_choice = None
|
225
|
-
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner:
|
275
|
+
elif self.is_reasoning_model(llm_config) and llm_config.enable_reasoner or agent_type == AgentType.letta_v1_agent:
|
226
276
|
# NOTE: reasoning models currently do not allow for `any`
|
277
|
+
# NOTE: react agents should always have auto on, since the precense/absense of tool calls controls chaining
|
227
278
|
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
228
279
|
tools_for_request = [OpenAITool(function=f) for f in tools]
|
229
280
|
elif force_tool_call is not None:
|
@@ -231,11 +282,17 @@ class AnthropicClient(LLMClientBase):
|
|
231
282
|
tools_for_request = [OpenAITool(function=f) for f in tools if f["name"] == force_tool_call]
|
232
283
|
|
233
284
|
# need to have this setting to be able to put inner thoughts in kwargs
|
234
|
-
if not
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
285
|
+
if not put_kwargs:
|
286
|
+
if is_v1:
|
287
|
+
# For v1 agents, native content is used and kwargs must remain disabled to avoid conflicts
|
288
|
+
logger.warning(
|
289
|
+
"Forced tool call requested but inner_thoughts_in_kwargs is disabled for v1 agent; proceeding without inner thoughts in kwargs."
|
290
|
+
)
|
291
|
+
else:
|
292
|
+
logger.warning(
|
293
|
+
f"Force enabling inner thoughts in kwargs for Claude due to forced tool call: {force_tool_call} (local override only)"
|
294
|
+
)
|
295
|
+
put_kwargs = True
|
239
296
|
else:
|
240
297
|
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
241
298
|
tools_for_request = [OpenAITool(function=f) for f in tools] if tools is not None else None
|
@@ -246,7 +303,7 @@ class AnthropicClient(LLMClientBase):
|
|
246
303
|
|
247
304
|
# Add inner thoughts kwarg
|
248
305
|
# TODO: Can probably make this more efficient
|
249
|
-
if tools_for_request and len(tools_for_request) > 0 and
|
306
|
+
if tools_for_request and len(tools_for_request) > 0 and put_kwargs:
|
250
307
|
tools_with_inner_thoughts = add_inner_thoughts_to_functions(
|
251
308
|
functions=[t.function.model_dump() for t in tools_for_request],
|
252
309
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
@@ -269,7 +326,10 @@ class AnthropicClient(LLMClientBase):
|
|
269
326
|
data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
|
270
327
|
messages=messages[1:],
|
271
328
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
272
|
-
put_inner_thoughts_in_kwargs=
|
329
|
+
put_inner_thoughts_in_kwargs=put_kwargs,
|
330
|
+
# if react, use native content + strip heartbeats
|
331
|
+
native_content=is_v1,
|
332
|
+
strip_request_heartbeat=is_v1,
|
273
333
|
)
|
274
334
|
|
275
335
|
# Ensure first message is user
|
@@ -279,15 +339,27 @@ class AnthropicClient(LLMClientBase):
|
|
279
339
|
# Handle alternating messages
|
280
340
|
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
281
341
|
|
282
|
-
|
283
|
-
|
284
|
-
data["messages"] =
|
342
|
+
if agent_type == AgentType.letta_v1_agent:
|
343
|
+
# Both drop heartbeats in the payload
|
344
|
+
data["messages"] = drop_heartbeats(data["messages"])
|
345
|
+
# And drop heartbeats in the tools
|
346
|
+
if "tools" in data:
|
347
|
+
for tool in data["tools"]:
|
348
|
+
tool["input_schema"]["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
|
349
|
+
if "required" in tool["input_schema"] and REQUEST_HEARTBEAT_PARAM in tool["input_schema"]["required"]:
|
350
|
+
# NOTE: required is not always present
|
351
|
+
tool["input_schema"]["required"].remove(REQUEST_HEARTBEAT_PARAM)
|
352
|
+
|
353
|
+
else:
|
354
|
+
# Strip heartbeat pings if extended thinking
|
355
|
+
if llm_config.enable_reasoner:
|
356
|
+
data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
|
285
357
|
|
286
358
|
# Prefix fill
|
287
359
|
# https://docs.anthropic.com/en/api/messages#body-messages
|
288
360
|
# NOTE: cannot prefill with tools for opus:
|
289
361
|
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
290
|
-
if prefix_fill and not
|
362
|
+
if prefix_fill and not put_kwargs and "opus" not in data["model"]:
|
291
363
|
data["messages"].append(
|
292
364
|
# Start the thinking process for the assistant
|
293
365
|
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
@@ -716,6 +788,44 @@ def is_heartbeat(message: dict, is_ping: bool = False) -> bool:
|
|
716
788
|
return False
|
717
789
|
|
718
790
|
|
791
|
+
def drop_heartbeats(messages: List[dict]):
|
792
|
+
cleaned_messages = []
|
793
|
+
|
794
|
+
# Loop through messages
|
795
|
+
# For messages with role 'user' and len(content) > 1,
|
796
|
+
# Check if content[0].type == 'tool_result'
|
797
|
+
# If so, iterate over content[1:] and while content.type == 'text' and is_heartbeat(content.text),
|
798
|
+
# merge into content[0].content
|
799
|
+
|
800
|
+
for message in messages:
|
801
|
+
if "role" in message and "content" in message and message["role"] == "user":
|
802
|
+
content_parts = message["content"]
|
803
|
+
|
804
|
+
if isinstance(content_parts, str):
|
805
|
+
if is_heartbeat({"role": "user", "content": content_parts}):
|
806
|
+
continue
|
807
|
+
elif isinstance(content_parts, list) and len(content_parts) == 1 and "text" in content_parts[0]:
|
808
|
+
if is_heartbeat({"role": "user", "content": content_parts[0]["text"]}):
|
809
|
+
continue # skip
|
810
|
+
else:
|
811
|
+
cleaned_parts = []
|
812
|
+
# Drop all the parts
|
813
|
+
for content_part in content_parts:
|
814
|
+
if "text" in content_part and is_heartbeat({"role": "user", "content": content_part["text"]}):
|
815
|
+
continue # skip
|
816
|
+
else:
|
817
|
+
cleaned_parts.append(content_part)
|
818
|
+
|
819
|
+
if len(cleaned_parts) == 0:
|
820
|
+
continue
|
821
|
+
else:
|
822
|
+
message["content"] = cleaned_parts
|
823
|
+
|
824
|
+
cleaned_messages.append(message)
|
825
|
+
|
826
|
+
return cleaned_messages
|
827
|
+
|
828
|
+
|
719
829
|
def merge_heartbeats_into_tool_responses(messages: List[dict]):
|
720
830
|
"""For extended thinking mode, we don't want anything other than tool responses in-between assistant actions
|
721
831
|
|
letta/llm_api/bedrock_client.py
CHANGED
@@ -6,7 +6,7 @@ from aioboto3.session import Session
|
|
6
6
|
from letta.llm_api.anthropic_client import AnthropicClient
|
7
7
|
from letta.log import get_logger
|
8
8
|
from letta.otel.tracing import trace_method
|
9
|
-
from letta.schemas.enums import ProviderCategory
|
9
|
+
from letta.schemas.enums import AgentType, ProviderCategory
|
10
10
|
from letta.schemas.llm_config import LLMConfig
|
11
11
|
from letta.schemas.message import Message as PydanticMessage
|
12
12
|
from letta.services.provider_manager import ProviderManager
|
@@ -65,12 +65,14 @@ class BedrockClient(AnthropicClient):
|
|
65
65
|
@trace_method
|
66
66
|
def build_request_data(
|
67
67
|
self,
|
68
|
+
agent_type: AgentType,
|
68
69
|
messages: List[PydanticMessage],
|
69
70
|
llm_config: LLMConfig,
|
70
71
|
tools: Optional[List[dict]] = None,
|
71
72
|
force_tool_call: Optional[str] = None,
|
73
|
+
requires_subsequent_tool_call: bool = False,
|
72
74
|
) -> dict:
|
73
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
75
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
74
76
|
# remove disallowed fields
|
75
77
|
if "tool_choice" in data:
|
76
78
|
del data["tool_choice"]["disable_parallel_tool_use"]
|
letta/llm_api/deepseek_client.py
CHANGED
@@ -10,6 +10,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
10
10
|
|
11
11
|
from letta.llm_api.openai_client import OpenAIClient
|
12
12
|
from letta.otel.tracing import trace_method
|
13
|
+
from letta.schemas.enums import AgentType
|
13
14
|
from letta.schemas.llm_config import LLMConfig
|
14
15
|
from letta.schemas.message import Message as PydanticMessage
|
15
16
|
from letta.schemas.openai.chat_completion_request import (
|
@@ -331,15 +332,17 @@ class DeepseekClient(OpenAIClient):
|
|
331
332
|
@trace_method
|
332
333
|
def build_request_data(
|
333
334
|
self,
|
335
|
+
agent_type: AgentType,
|
334
336
|
messages: List[PydanticMessage],
|
335
337
|
llm_config: LLMConfig,
|
336
338
|
tools: Optional[List[dict]] = None,
|
337
339
|
force_tool_call: Optional[str] = None,
|
340
|
+
requires_subsequent_tool_call: bool = False,
|
338
341
|
) -> dict:
|
339
342
|
# Override put_inner_thoughts_in_kwargs to False for DeepSeek
|
340
343
|
llm_config.put_inner_thoughts_in_kwargs = False
|
341
344
|
|
342
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
345
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
343
346
|
|
344
347
|
def add_functions_to_system_message(system_message: ChatMessage):
|
345
348
|
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
|
@@ -1,6 +1,7 @@
|
|
1
|
+
import base64
|
1
2
|
import json
|
2
3
|
import uuid
|
3
|
-
from typing import List, Optional
|
4
|
+
from typing import AsyncIterator, List, Optional
|
4
5
|
|
5
6
|
from google import genai
|
6
7
|
from google.genai import errors
|
@@ -34,6 +35,7 @@ from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
34
35
|
from letta.local_llm.utils import count_tokens
|
35
36
|
from letta.log import get_logger
|
36
37
|
from letta.otel.tracing import trace_method
|
38
|
+
from letta.schemas.agent import AgentType
|
37
39
|
from letta.schemas.llm_config import LLMConfig
|
38
40
|
from letta.schemas.message import Message as PydanticMessage
|
39
41
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -137,6 +139,15 @@ class GoogleVertexClient(LLMClientBase):
|
|
137
139
|
raise RuntimeError("Failed to get response data after all retries")
|
138
140
|
return response_data
|
139
141
|
|
142
|
+
@trace_method
|
143
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
|
144
|
+
client = self._get_client()
|
145
|
+
return await client.aio.models.generate_content_stream(
|
146
|
+
model=llm_config.model,
|
147
|
+
contents=request_data["contents"],
|
148
|
+
config=request_data["config"],
|
149
|
+
)
|
150
|
+
|
140
151
|
@staticmethod
|
141
152
|
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
|
142
153
|
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
@@ -274,14 +285,19 @@ class GoogleVertexClient(LLMClientBase):
|
|
274
285
|
@trace_method
|
275
286
|
def build_request_data(
|
276
287
|
self,
|
288
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
277
289
|
messages: List[PydanticMessage],
|
278
290
|
llm_config: LLMConfig,
|
279
291
|
tools: List[dict],
|
280
292
|
force_tool_call: Optional[str] = None,
|
293
|
+
requires_subsequent_tool_call: bool = False,
|
281
294
|
) -> dict:
|
282
295
|
"""
|
283
296
|
Constructs a request object in the expected data format for this client.
|
284
297
|
"""
|
298
|
+
# NOTE: forcing inner thoughts in kwargs off
|
299
|
+
if agent_type == AgentType.letta_v1_agent:
|
300
|
+
llm_config.put_inner_thoughts_in_kwargs = False
|
285
301
|
|
286
302
|
if tools:
|
287
303
|
tool_objs = [Tool(type="function", function=t) for t in tools]
|
@@ -293,7 +309,11 @@ class GoogleVertexClient(LLMClientBase):
|
|
293
309
|
tool_names = []
|
294
310
|
|
295
311
|
contents = self.add_dummy_model_messages(
|
296
|
-
PydanticMessage.to_google_dicts_from_list(
|
312
|
+
PydanticMessage.to_google_dicts_from_list(
|
313
|
+
messages,
|
314
|
+
put_inner_thoughts_in_kwargs=False if agent_type == AgentType.letta_v1_agent else True,
|
315
|
+
native_content=True if agent_type == AgentType.letta_v1_agent else False,
|
316
|
+
),
|
297
317
|
)
|
298
318
|
|
299
319
|
request_data = {
|
@@ -312,16 +332,42 @@ class GoogleVertexClient(LLMClientBase):
|
|
312
332
|
request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
|
313
333
|
del request_data["config"]["tools"]
|
314
334
|
elif tools:
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
335
|
+
if agent_type == AgentType.letta_v1_agent:
|
336
|
+
# don't require tools
|
337
|
+
tool_call_mode = FunctionCallingConfigMode.AUTO
|
338
|
+
tool_config = ToolConfig(
|
339
|
+
function_calling_config=FunctionCallingConfig(
|
340
|
+
mode=tool_call_mode,
|
341
|
+
)
|
321
342
|
)
|
322
|
-
|
343
|
+
else:
|
344
|
+
# require tools
|
345
|
+
tool_call_mode = FunctionCallingConfigMode.ANY
|
346
|
+
tool_config = ToolConfig(
|
347
|
+
function_calling_config=FunctionCallingConfig(
|
348
|
+
mode=tool_call_mode,
|
349
|
+
# Provide the list of tools (though empty should also work, it seems not to)
|
350
|
+
allowed_function_names=tool_names,
|
351
|
+
)
|
352
|
+
)
|
353
|
+
|
323
354
|
request_data["config"]["tool_config"] = tool_config.model_dump()
|
324
355
|
|
356
|
+
# https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
357
|
+
# 2.5 Pro
|
358
|
+
# - Default: dynamic thinking
|
359
|
+
# - Dynamic thinking that cannot be disabled
|
360
|
+
# - Range: -1 (for dynamic), or 128-32768
|
361
|
+
# 2.5 Flash
|
362
|
+
# - Default: dynamic thinking
|
363
|
+
# - Dynamic thinking that *can* be disabled
|
364
|
+
# - Range: -1, 0, or 0-24576
|
365
|
+
# 2.5 Flash Lite
|
366
|
+
# - Default: no thinking
|
367
|
+
# - Dynamic thinking that *can* be disabled
|
368
|
+
# - Range: -1, 0, or 512-24576
|
369
|
+
# TODO when using v3 agent loop, properly support the native thinking in Gemini
|
370
|
+
|
325
371
|
# Add thinking_config for flash
|
326
372
|
# If enable_reasoner is False, set thinking_budget to 0
|
327
373
|
# Otherwise, use the value from max_reasoning_tokens
|
@@ -334,6 +380,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
334
380
|
)
|
335
381
|
thinking_config = ThinkingConfig(
|
336
382
|
thinking_budget=(thinking_budget),
|
383
|
+
include_thoughts=(thinking_budget > 1),
|
337
384
|
)
|
338
385
|
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
339
386
|
|
@@ -395,13 +442,15 @@ class GoogleVertexClient(LLMClientBase):
|
|
395
442
|
# NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
|
396
443
|
# {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
|
397
444
|
# To patch this, if we have multiple parts we can take the last one
|
398
|
-
if len(parts) > 1:
|
445
|
+
if len(parts) > 1 and not llm_config.enable_reasoner:
|
399
446
|
logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
|
447
|
+
# only truncate if reasoning is off
|
400
448
|
parts = [parts[-1]]
|
401
449
|
|
402
450
|
# TODO support parts / multimodal
|
403
451
|
# TODO support parallel tool calling natively
|
404
452
|
# TODO Alternative here is to throw away everything else except for the first part
|
453
|
+
openai_response_message = None
|
405
454
|
for response_message in parts:
|
406
455
|
# Convert the actual message style to OpenAI style
|
407
456
|
if response_message.function_call:
|
@@ -410,8 +459,10 @@ class GoogleVertexClient(LLMClientBase):
|
|
410
459
|
function_args = function_call.args
|
411
460
|
assert isinstance(function_args, dict), function_args
|
412
461
|
|
413
|
-
#
|
462
|
+
# TODO this is kind of funky - really, we should be passing 'native_content' as a kwarg to fork behavior
|
463
|
+
inner_thoughts = response_message.text
|
414
464
|
if llm_config.put_inner_thoughts_in_kwargs:
|
465
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
415
466
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
416
467
|
|
417
468
|
assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
|
@@ -420,25 +471,44 @@ class GoogleVertexClient(LLMClientBase):
|
|
420
471
|
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
421
472
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
422
473
|
else:
|
423
|
-
|
474
|
+
pass
|
475
|
+
# inner_thoughts = None
|
476
|
+
# inner_thoughts = response_message.text
|
424
477
|
|
425
478
|
# Google AI API doesn't generate tool call IDs
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
function=FunctionCall(
|
434
|
-
name=function_name,
|
435
|
-
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
436
|
-
),
|
437
|
-
)
|
438
|
-
],
|
479
|
+
tool_call = ToolCall(
|
480
|
+
id=get_tool_call_id(),
|
481
|
+
type="function",
|
482
|
+
function=FunctionCall(
|
483
|
+
name=function_name,
|
484
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
485
|
+
),
|
439
486
|
)
|
440
487
|
|
488
|
+
if openai_response_message is None:
|
489
|
+
openai_response_message = Message(
|
490
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
491
|
+
content=inner_thoughts,
|
492
|
+
tool_calls=[tool_call],
|
493
|
+
)
|
494
|
+
else:
|
495
|
+
openai_response_message.content = inner_thoughts
|
496
|
+
if openai_response_message.tool_calls is None:
|
497
|
+
openai_response_message.tool_calls = []
|
498
|
+
openai_response_message.tool_calls.append(tool_call)
|
499
|
+
if response_message.thought_signature:
|
500
|
+
thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
|
501
|
+
openai_response_message.reasoning_content_signature = thought_signature
|
502
|
+
|
441
503
|
else:
|
504
|
+
if response_message.thought:
|
505
|
+
if openai_response_message is None:
|
506
|
+
openai_response_message = Message(
|
507
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
508
|
+
reasoning_content=response_message.text,
|
509
|
+
)
|
510
|
+
else:
|
511
|
+
openai_response_message.reasoning_content = response_message.text
|
442
512
|
try:
|
443
513
|
# Structured output tool call
|
444
514
|
function_call = json_loads(response_message.text)
|
@@ -459,20 +529,25 @@ class GoogleVertexClient(LLMClientBase):
|
|
459
529
|
inner_thoughts = None
|
460
530
|
|
461
531
|
# Google AI API doesn't generate tool call IDs
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
function=FunctionCall(
|
470
|
-
name=function_name,
|
471
|
-
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
472
|
-
),
|
473
|
-
)
|
474
|
-
],
|
532
|
+
tool_call = ToolCall(
|
533
|
+
id=get_tool_call_id(),
|
534
|
+
type="function",
|
535
|
+
function=FunctionCall(
|
536
|
+
name=function_name,
|
537
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
538
|
+
),
|
475
539
|
)
|
540
|
+
if openai_response_message is None:
|
541
|
+
openai_response_message = Message(
|
542
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
543
|
+
content=inner_thoughts,
|
544
|
+
tool_calls=[tool_call],
|
545
|
+
)
|
546
|
+
else:
|
547
|
+
openai_response_message.content = inner_thoughts
|
548
|
+
if openai_response_message.tool_calls is None:
|
549
|
+
openai_response_message.tool_calls = []
|
550
|
+
openai_response_message.tool_calls.append(tool_call)
|
476
551
|
|
477
552
|
except json.decoder.JSONDecodeError:
|
478
553
|
if candidate.finish_reason == "MAX_TOKENS":
|
@@ -481,10 +556,16 @@ class GoogleVertexClient(LLMClientBase):
|
|
481
556
|
inner_thoughts = response_message.text
|
482
557
|
|
483
558
|
# Google AI API doesn't generate tool call IDs
|
484
|
-
openai_response_message
|
485
|
-
|
486
|
-
|
487
|
-
|
559
|
+
if openai_response_message is None:
|
560
|
+
openai_response_message = Message(
|
561
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
562
|
+
content=inner_thoughts,
|
563
|
+
)
|
564
|
+
else:
|
565
|
+
openai_response_message.content = inner_thoughts
|
566
|
+
if response_message.thought_signature:
|
567
|
+
thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8")
|
568
|
+
openai_response_message.reasoning_content_signature = thought_signature
|
488
569
|
|
489
570
|
# Google AI API uses different finish reason strings than OpenAI
|
490
571
|
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
letta/llm_api/groq_client.py
CHANGED
@@ -8,6 +8,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
8
8
|
from letta.llm_api.openai_client import OpenAIClient
|
9
9
|
from letta.otel.tracing import trace_method
|
10
10
|
from letta.schemas.embedding_config import EmbeddingConfig
|
11
|
+
from letta.schemas.enums import AgentType
|
11
12
|
from letta.schemas.llm_config import LLMConfig
|
12
13
|
from letta.schemas.message import Message as PydanticMessage
|
13
14
|
from letta.settings import model_settings
|
@@ -23,12 +24,14 @@ class GroqClient(OpenAIClient):
|
|
23
24
|
@trace_method
|
24
25
|
def build_request_data(
|
25
26
|
self,
|
27
|
+
agent_type: AgentType,
|
26
28
|
messages: List[PydanticMessage],
|
27
29
|
llm_config: LLMConfig,
|
28
30
|
tools: Optional[List[dict]] = None,
|
29
31
|
force_tool_call: Optional[str] = None,
|
32
|
+
requires_subsequent_tool_call: bool = False,
|
30
33
|
) -> dict:
|
31
|
-
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
34
|
+
data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
|
32
35
|
|
33
36
|
# Groq validation - these fields are not supported and will cause 400 errors
|
34
37
|
# https://console.groq.com/docs/openai
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import os
|
2
3
|
import random
|
3
4
|
import time
|
4
5
|
from typing import List, Optional, Union
|
@@ -174,11 +175,17 @@ def create(
|
|
174
175
|
|
175
176
|
actor = UserManager().get_user_or_default(user_id=user_id)
|
176
177
|
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
|
177
|
-
elif model_settings.openai_api_key is None:
|
178
|
-
# the openai python client requires a dummy API key
|
179
|
-
api_key = "DUMMY_API_KEY"
|
180
178
|
else:
|
181
|
-
|
179
|
+
# Prefer OpenRouter key when targeting OpenRouter
|
180
|
+
is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
|
181
|
+
llm_config.provider_name == "openrouter"
|
182
|
+
)
|
183
|
+
if is_openrouter:
|
184
|
+
api_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
|
185
|
+
if not is_openrouter or not api_key:
|
186
|
+
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
187
|
+
# the openai python client requires some API key string
|
188
|
+
api_key = api_key or "DUMMY_API_KEY"
|
182
189
|
|
183
190
|
if function_call is None and functions is not None and len(functions) > 0:
|
184
191
|
# force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
|