letta-nightly 0.11.7.dev20251007104119__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +899 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +126 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +123 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +1 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +504 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +86 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +126 -85
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +85 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +301 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
letta/llm_api/llm_client_base.py
CHANGED
@@ -9,7 +9,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
9
9
|
from letta.errors import LLMError
|
10
10
|
from letta.otel.tracing import log_event, trace_method
|
11
11
|
from letta.schemas.embedding_config import EmbeddingConfig
|
12
|
-
from letta.schemas.enums import ProviderCategory
|
12
|
+
from letta.schemas.enums import AgentType, ProviderCategory
|
13
13
|
from letta.schemas.llm_config import LLMConfig
|
14
14
|
from letta.schemas.message import Message
|
15
15
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
@@ -40,6 +40,7 @@ class LLMClientBase:
|
|
40
40
|
@trace_method
|
41
41
|
def send_llm_request(
|
42
42
|
self,
|
43
|
+
agent_type: AgentType,
|
43
44
|
messages: List[Message],
|
44
45
|
llm_config: LLMConfig,
|
45
46
|
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
@@ -52,7 +53,7 @@ class LLMClientBase:
|
|
52
53
|
If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
|
53
54
|
Otherwise returns a ChatCompletionResponse.
|
54
55
|
"""
|
55
|
-
request_data = self.build_request_data(messages, llm_config, tools, force_tool_call)
|
56
|
+
request_data = self.build_request_data(agent_type, messages, llm_config, tools, force_tool_call)
|
56
57
|
|
57
58
|
try:
|
58
59
|
log_event(name="llm_request_sent", attributes=request_data)
|
@@ -108,6 +109,7 @@ class LLMClientBase:
|
|
108
109
|
|
109
110
|
async def send_llm_batch_request_async(
|
110
111
|
self,
|
112
|
+
agent_type: AgentType,
|
111
113
|
agent_messages_mapping: Dict[str, List[Message]],
|
112
114
|
agent_tools_mapping: Dict[str, List[dict]],
|
113
115
|
agent_llm_config_mapping: Dict[str, LLMConfig],
|
@@ -120,10 +122,12 @@ class LLMClientBase:
|
|
120
122
|
@abstractmethod
|
121
123
|
def build_request_data(
|
122
124
|
self,
|
125
|
+
agent_type: AgentType,
|
123
126
|
messages: List[Message],
|
124
127
|
llm_config: LLMConfig,
|
125
128
|
tools: List[dict],
|
126
129
|
force_tool_call: Optional[str] = None,
|
130
|
+
requires_subsequent_tool_call: bool = False,
|
127
131
|
) -> dict:
|
128
132
|
"""
|
129
133
|
Constructs a request object in the expected data format for this client.
|
letta/llm_api/openai.py
CHANGED
@@ -40,6 +40,7 @@ from letta.schemas.openai.chat_completion_response import (
|
|
40
40
|
UsageStatistics,
|
41
41
|
)
|
42
42
|
from letta.schemas.openai.embedding_response import EmbeddingResponse
|
43
|
+
from letta.settings import model_settings
|
43
44
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
44
45
|
from letta.utils import get_tool_call_id, smart_urljoin
|
45
46
|
|
@@ -82,6 +83,12 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
82
83
|
headers = {"Content-Type": "application/json"}
|
83
84
|
if api_key is not None:
|
84
85
|
headers["Authorization"] = f"Bearer {api_key}"
|
86
|
+
# Add optional OpenRouter headers if hitting OpenRouter
|
87
|
+
if "openrouter.ai" in url:
|
88
|
+
if model_settings.openrouter_referer:
|
89
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
90
|
+
if model_settings.openrouter_title:
|
91
|
+
headers["X-Title"] = model_settings.openrouter_title
|
85
92
|
|
86
93
|
logger.debug(f"Sending request to {url}")
|
87
94
|
response = None
|
@@ -139,6 +146,11 @@ async def openai_get_model_list_async(
|
|
139
146
|
headers = {"Content-Type": "application/json"}
|
140
147
|
if api_key is not None:
|
141
148
|
headers["Authorization"] = f"Bearer {api_key}"
|
149
|
+
if "openrouter.ai" in url:
|
150
|
+
if model_settings.openrouter_referer:
|
151
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
152
|
+
if model_settings.openrouter_title:
|
153
|
+
headers["X-Title"] = model_settings.openrouter_title
|
142
154
|
|
143
155
|
logger.debug(f"Sending request to {url}")
|
144
156
|
|
@@ -550,7 +562,16 @@ def openai_chat_completions_request_stream(
|
|
550
562
|
|
551
563
|
data = prepare_openai_payload(chat_completion_request)
|
552
564
|
data["stream"] = True
|
553
|
-
|
565
|
+
kwargs = {"api_key": api_key, "base_url": url, "max_retries": 0}
|
566
|
+
if "openrouter.ai" in url:
|
567
|
+
headers = {}
|
568
|
+
if model_settings.openrouter_referer:
|
569
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
570
|
+
if model_settings.openrouter_title:
|
571
|
+
headers["X-Title"] = model_settings.openrouter_title
|
572
|
+
if headers:
|
573
|
+
kwargs["default_headers"] = headers
|
574
|
+
client = OpenAI(**kwargs)
|
554
575
|
try:
|
555
576
|
stream = client.chat.completions.create(**data)
|
556
577
|
for chunk in stream:
|
@@ -574,7 +595,16 @@ def openai_chat_completions_request(
|
|
574
595
|
https://platform.openai.com/docs/guides/text-generation?lang=curl
|
575
596
|
"""
|
576
597
|
data = prepare_openai_payload(chat_completion_request)
|
577
|
-
|
598
|
+
kwargs = {"api_key": api_key, "base_url": url, "max_retries": 0}
|
599
|
+
if "openrouter.ai" in url:
|
600
|
+
headers = {}
|
601
|
+
if model_settings.openrouter_referer:
|
602
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
603
|
+
if model_settings.openrouter_title:
|
604
|
+
headers["X-Title"] = model_settings.openrouter_title
|
605
|
+
if headers:
|
606
|
+
kwargs["default_headers"] = headers
|
607
|
+
client = OpenAI(**kwargs)
|
578
608
|
log_event(name="llm_request_sent", attributes=data)
|
579
609
|
chat_completion = client.chat.completions.create(**data)
|
580
610
|
log_event(name="llm_response_received", attributes=chat_completion.model_dump())
|
letta/llm_api/openai_client.py
CHANGED
@@ -4,10 +4,13 @@ from typing import List, Optional
|
|
4
4
|
|
5
5
|
import openai
|
6
6
|
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
7
|
+
from openai.types import Reasoning
|
7
8
|
from openai.types.chat.chat_completion import ChatCompletion
|
8
9
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
10
|
+
from openai.types.responses import ResponseTextConfigParam
|
11
|
+
from openai.types.responses.response_stream_event import ResponseStreamEvent
|
9
12
|
|
10
|
-
from letta.constants import LETTA_MODEL_ENDPOINT
|
13
|
+
from letta.constants import LETTA_MODEL_ENDPOINT, REQUEST_HEARTBEAT_PARAM
|
11
14
|
from letta.errors import (
|
12
15
|
ContextWindowExceededError,
|
13
16
|
ErrorCode,
|
@@ -26,6 +29,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
26
29
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
27
30
|
from letta.log import get_logger
|
28
31
|
from letta.otel.tracing import trace_method
|
32
|
+
from letta.schemas.agent import AgentType
|
29
33
|
from letta.schemas.embedding_config import EmbeddingConfig
|
30
34
|
from letta.schemas.letta_message_content import MessageContentType
|
31
35
|
from letta.schemas.llm_config import LLMConfig
|
@@ -38,7 +42,15 @@ from letta.schemas.openai.chat_completion_request import (
|
|
38
42
|
ToolFunctionChoice,
|
39
43
|
cast_message_to_subtype,
|
40
44
|
)
|
41
|
-
from letta.schemas.openai.chat_completion_response import
|
45
|
+
from letta.schemas.openai.chat_completion_response import (
|
46
|
+
ChatCompletionResponse,
|
47
|
+
Choice,
|
48
|
+
FunctionCall,
|
49
|
+
Message as ChoiceMessage,
|
50
|
+
ToolCall,
|
51
|
+
UsageStatistics,
|
52
|
+
)
|
53
|
+
from letta.schemas.openai.responses_request import ResponsesRequest
|
42
54
|
from letta.settings import model_settings
|
43
55
|
|
44
56
|
logger = get_logger(__name__)
|
@@ -117,6 +129,11 @@ def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
|
|
117
129
|
return False
|
118
130
|
|
119
131
|
|
132
|
+
def use_responses_api(llm_config: LLMConfig) -> bool:
|
133
|
+
# TODO can opt in all reasoner models to use the Responses API
|
134
|
+
return is_openai_reasoning_model(llm_config.model)
|
135
|
+
|
136
|
+
|
120
137
|
def supports_content_none(llm_config: LLMConfig) -> bool:
|
121
138
|
"""Certain providers don't support the content None."""
|
122
139
|
if "gpt-oss" in llm_config.model:
|
@@ -128,12 +145,32 @@ class OpenAIClient(LLMClientBase):
|
|
128
145
|
def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
|
129
146
|
api_key, _, _ = self.get_byok_overrides(llm_config)
|
130
147
|
|
148
|
+
# Default to global OpenAI key when no BYOK override
|
131
149
|
if not api_key:
|
132
150
|
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
133
|
-
|
134
|
-
api_key = api_key or "DUMMY_API_KEY"
|
151
|
+
|
135
152
|
kwargs = {"api_key": api_key, "base_url": llm_config.model_endpoint}
|
136
153
|
|
154
|
+
# OpenRouter-specific overrides: use OpenRouter key and optional headers
|
155
|
+
is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
|
156
|
+
llm_config.provider_name == "openrouter"
|
157
|
+
)
|
158
|
+
if is_openrouter:
|
159
|
+
or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
|
160
|
+
if or_key:
|
161
|
+
kwargs["api_key"] = or_key
|
162
|
+
# Attach optional headers if provided
|
163
|
+
headers = {}
|
164
|
+
if model_settings.openrouter_referer:
|
165
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
166
|
+
if model_settings.openrouter_title:
|
167
|
+
headers["X-Title"] = model_settings.openrouter_title
|
168
|
+
if headers:
|
169
|
+
kwargs["default_headers"] = headers
|
170
|
+
|
171
|
+
# The OpenAI client requires some API key value
|
172
|
+
kwargs["api_key"] = kwargs.get("api_key") or "DUMMY_API_KEY"
|
173
|
+
|
137
174
|
return kwargs
|
138
175
|
|
139
176
|
def _prepare_client_kwargs_embedding(self, embedding_config: EmbeddingConfig) -> dict:
|
@@ -148,10 +185,25 @@ class OpenAIClient(LLMClientBase):
|
|
148
185
|
|
149
186
|
if not api_key:
|
150
187
|
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
151
|
-
# supposedly the openai python client requires a dummy API key
|
152
|
-
api_key = api_key or "DUMMY_API_KEY"
|
153
188
|
kwargs = {"api_key": api_key, "base_url": llm_config.model_endpoint}
|
154
189
|
|
190
|
+
is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
|
191
|
+
llm_config.provider_name == "openrouter"
|
192
|
+
)
|
193
|
+
if is_openrouter:
|
194
|
+
or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
|
195
|
+
if or_key:
|
196
|
+
kwargs["api_key"] = or_key
|
197
|
+
headers = {}
|
198
|
+
if model_settings.openrouter_referer:
|
199
|
+
headers["HTTP-Referer"] = model_settings.openrouter_referer
|
200
|
+
if model_settings.openrouter_title:
|
201
|
+
headers["X-Title"] = model_settings.openrouter_title
|
202
|
+
if headers:
|
203
|
+
kwargs["default_headers"] = headers
|
204
|
+
|
205
|
+
kwargs["api_key"] = kwargs.get("api_key") or "DUMMY_API_KEY"
|
206
|
+
|
155
207
|
return kwargs
|
156
208
|
|
157
209
|
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
@@ -160,17 +212,182 @@ class OpenAIClient(LLMClientBase):
|
|
160
212
|
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
161
213
|
return supports_structured_output(llm_config)
|
162
214
|
|
215
|
+
@trace_method
|
216
|
+
def build_request_data_responses(
|
217
|
+
self,
|
218
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
219
|
+
messages: List[PydanticMessage],
|
220
|
+
llm_config: LLMConfig,
|
221
|
+
tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
|
222
|
+
force_tool_call: Optional[str] = None,
|
223
|
+
requires_subsequent_tool_call: bool = False,
|
224
|
+
) -> dict:
|
225
|
+
"""
|
226
|
+
Constructs a request object in the expected data format for the OpenAI Responses API.
|
227
|
+
"""
|
228
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
229
|
+
raise ValueError("Inner thoughts in kwargs are not supported for the OpenAI Responses API")
|
230
|
+
|
231
|
+
openai_messages_list = PydanticMessage.to_openai_responses_dicts_from_list(messages)
|
232
|
+
# Add multi-modal support for Responses API by rewriting user messages
|
233
|
+
# into input_text/input_image parts.
|
234
|
+
openai_messages_list = fill_image_content_in_responses_input(openai_messages_list, messages)
|
235
|
+
|
236
|
+
if llm_config.model:
|
237
|
+
model = llm_config.model
|
238
|
+
else:
|
239
|
+
logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
240
|
+
model = None
|
241
|
+
|
242
|
+
# Default to auto, unless there's a forced tool call coming from above or requires_subsequent_tool_call is True
|
243
|
+
tool_choice = None
|
244
|
+
if tools: # only set tool_choice if tools exist
|
245
|
+
if force_tool_call is not None:
|
246
|
+
tool_choice = {"type": "function", "name": force_tool_call}
|
247
|
+
elif requires_subsequent_tool_call:
|
248
|
+
tool_choice = "required"
|
249
|
+
else:
|
250
|
+
tool_choice = "auto"
|
251
|
+
|
252
|
+
# Convert the tools from the ChatCompletions style to the Responses style
|
253
|
+
if tools:
|
254
|
+
# Get proper typing
|
255
|
+
typed_tools: List[OpenAITool] = [OpenAITool(type="function", function=f) for f in tools]
|
256
|
+
|
257
|
+
# Strip request heartbeat
|
258
|
+
# TODO relax this?
|
259
|
+
if agent_type == AgentType.letta_v1_agent:
|
260
|
+
new_tools = []
|
261
|
+
for tool in typed_tools:
|
262
|
+
# Remove request_heartbeat from the properties if it exists
|
263
|
+
if tool.function.parameters and "properties" in tool.function.parameters:
|
264
|
+
tool.function.parameters["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
|
265
|
+
# Also remove from required list if present
|
266
|
+
if "required" in tool.function.parameters and REQUEST_HEARTBEAT_PARAM in tool.function.parameters["required"]:
|
267
|
+
tool.function.parameters["required"].remove(REQUEST_HEARTBEAT_PARAM)
|
268
|
+
new_tools.append(tool.model_copy(deep=True))
|
269
|
+
typed_tools = new_tools
|
270
|
+
|
271
|
+
# Convert to strict mode
|
272
|
+
if supports_structured_output(llm_config):
|
273
|
+
for tool in typed_tools:
|
274
|
+
try:
|
275
|
+
structured_output_version = convert_to_structured_output(tool.function.model_dump())
|
276
|
+
tool.function = FunctionSchema(**structured_output_version)
|
277
|
+
except ValueError as e:
|
278
|
+
logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
279
|
+
|
280
|
+
# Finally convert to a Responses-friendly dict
|
281
|
+
responses_tools = [
|
282
|
+
{
|
283
|
+
"type": "function",
|
284
|
+
"name": t.function.name,
|
285
|
+
"description": t.function.description,
|
286
|
+
"parameters": t.function.parameters,
|
287
|
+
"strict": True,
|
288
|
+
}
|
289
|
+
for t in typed_tools
|
290
|
+
]
|
291
|
+
|
292
|
+
else:
|
293
|
+
# Finally convert to a Responses-friendly dict
|
294
|
+
responses_tools = [
|
295
|
+
{
|
296
|
+
"type": "function",
|
297
|
+
"name": t.function.name,
|
298
|
+
"description": t.function.description,
|
299
|
+
"parameters": t.function.parameters,
|
300
|
+
# "strict": True,
|
301
|
+
}
|
302
|
+
for t in typed_tools
|
303
|
+
]
|
304
|
+
else:
|
305
|
+
responses_tools = None
|
306
|
+
|
307
|
+
# Prepare the request payload
|
308
|
+
data = ResponsesRequest(
|
309
|
+
# Responses specific
|
310
|
+
store=False,
|
311
|
+
include=["reasoning.encrypted_content"],
|
312
|
+
# More or less generic to ChatCompletions API
|
313
|
+
model=model,
|
314
|
+
input=openai_messages_list,
|
315
|
+
tools=responses_tools,
|
316
|
+
tool_choice=tool_choice,
|
317
|
+
max_output_tokens=llm_config.max_tokens,
|
318
|
+
temperature=llm_config.temperature if supports_temperature_param(model) else None,
|
319
|
+
parallel_tool_calls=False,
|
320
|
+
)
|
321
|
+
|
322
|
+
# Add verbosity control for GPT-5 models
|
323
|
+
if supports_verbosity_control(model) and llm_config.verbosity:
|
324
|
+
# data.verbosity = llm_config.verbosity
|
325
|
+
# https://cookbook.openai.com/examples/gpt-5/gpt-5_new_params_and_tools
|
326
|
+
data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)
|
327
|
+
|
328
|
+
# Add reasoning effort control for reasoning models
|
329
|
+
if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
|
330
|
+
# data.reasoning_effort = llm_config.reasoning_effort
|
331
|
+
data.reasoning = Reasoning(
|
332
|
+
effort=llm_config.reasoning_effort,
|
333
|
+
# NOTE: hardcoding summary level, could put in llm_config?
|
334
|
+
summary="detailed",
|
335
|
+
)
|
336
|
+
|
337
|
+
# TODO I don't see this in Responses?
|
338
|
+
# Add frequency penalty
|
339
|
+
# if llm_config.frequency_penalty is not None:
|
340
|
+
# data.frequency_penalty = llm_config.frequency_penalty
|
341
|
+
|
342
|
+
# Add parallel tool calling
|
343
|
+
if tools and supports_parallel_tool_calling(model):
|
344
|
+
data.parallel_tool_calls = False
|
345
|
+
|
346
|
+
# always set user id for openai requests
|
347
|
+
if self.actor:
|
348
|
+
data.user = self.actor.id
|
349
|
+
|
350
|
+
if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
|
351
|
+
if not self.actor:
|
352
|
+
# override user id for inference.letta.com
|
353
|
+
import uuid
|
354
|
+
|
355
|
+
data.user = str(uuid.UUID(int=0))
|
356
|
+
|
357
|
+
data.model = "memgpt-openai"
|
358
|
+
|
359
|
+
request_data = data.model_dump(exclude_unset=True)
|
360
|
+
# print("responses request data", request_data)
|
361
|
+
return request_data
|
362
|
+
|
163
363
|
@trace_method
|
164
364
|
def build_request_data(
|
165
365
|
self,
|
366
|
+
agent_type: AgentType, # if react, use native content + strip heartbeats
|
166
367
|
messages: List[PydanticMessage],
|
167
368
|
llm_config: LLMConfig,
|
168
369
|
tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
|
169
370
|
force_tool_call: Optional[str] = None,
|
371
|
+
requires_subsequent_tool_call: bool = False,
|
170
372
|
) -> dict:
|
171
373
|
"""
|
172
374
|
Constructs a request object in the expected data format for the OpenAI API.
|
173
375
|
"""
|
376
|
+
# Shortcut for GPT-5 to use Responses API, but only for letta_v1_agent
|
377
|
+
if use_responses_api(llm_config) and agent_type == AgentType.letta_v1_agent:
|
378
|
+
return self.build_request_data_responses(
|
379
|
+
agent_type=agent_type,
|
380
|
+
messages=messages,
|
381
|
+
llm_config=llm_config,
|
382
|
+
tools=tools,
|
383
|
+
force_tool_call=force_tool_call,
|
384
|
+
requires_subsequent_tool_call=requires_subsequent_tool_call,
|
385
|
+
)
|
386
|
+
|
387
|
+
if agent_type == AgentType.letta_v1_agent:
|
388
|
+
# Safety hard override in case it got set somewhere by accident
|
389
|
+
llm_config.put_inner_thoughts_in_kwargs = False
|
390
|
+
|
174
391
|
if tools and llm_config.put_inner_thoughts_in_kwargs:
|
175
392
|
# Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
176
393
|
# TODO(fix)
|
@@ -208,15 +425,16 @@ class OpenAIClient(LLMClientBase):
|
|
208
425
|
# TODO: This vllm checking is very brittle and is a patch at most
|
209
426
|
tool_choice = None
|
210
427
|
if tools: # only set tool_choice if tools exist
|
211
|
-
if
|
428
|
+
if force_tool_call is not None:
|
429
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
|
430
|
+
elif requires_subsequent_tool_call:
|
431
|
+
tool_choice = "required"
|
432
|
+
elif self.requires_auto_tool_choice(llm_config) or agent_type == AgentType.letta_v1_agent:
|
212
433
|
tool_choice = "auto"
|
213
434
|
else:
|
214
435
|
# only set if tools is non-Null
|
215
436
|
tool_choice = "required"
|
216
437
|
|
217
|
-
if force_tool_call is not None:
|
218
|
-
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
|
219
|
-
|
220
438
|
if not supports_content_none(llm_config):
|
221
439
|
for message in openai_message_list:
|
222
440
|
if message.content is None:
|
@@ -260,6 +478,21 @@ class OpenAIClient(LLMClientBase):
|
|
260
478
|
|
261
479
|
data.model = "memgpt-openai"
|
262
480
|
|
481
|
+
# For some reason, request heartbeats are still leaking into here...
|
482
|
+
# So strip them manually for v3
|
483
|
+
if agent_type == AgentType.letta_v1_agent:
|
484
|
+
new_tools = []
|
485
|
+
if data.tools:
|
486
|
+
for tool in data.tools:
|
487
|
+
# Remove request_heartbeat from the properties if it exists
|
488
|
+
if tool.function.parameters and "properties" in tool.function.parameters:
|
489
|
+
tool.function.parameters["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
|
490
|
+
# Also remove from required list if present
|
491
|
+
if "required" in tool.function.parameters and REQUEST_HEARTBEAT_PARAM in tool.function.parameters["required"]:
|
492
|
+
tool.function.parameters["required"].remove(REQUEST_HEARTBEAT_PARAM)
|
493
|
+
new_tools.append(tool.model_copy(deep=True))
|
494
|
+
data.tools = new_tools
|
495
|
+
|
263
496
|
if data.tools is not None and len(data.tools) > 0:
|
264
497
|
# Convert to structured output style (which has 'strict' and no optionals)
|
265
498
|
for tool in data.tools:
|
@@ -270,6 +503,14 @@ class OpenAIClient(LLMClientBase):
|
|
270
503
|
except ValueError as e:
|
271
504
|
logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
|
272
505
|
request_data = data.model_dump(exclude_unset=True)
|
506
|
+
|
507
|
+
# If Ollama
|
508
|
+
# if llm_config.handle.startswith("ollama/") and llm_config.enable_reasoner:
|
509
|
+
# Sadly, reasoning via the OpenAI proxy on Ollama only works for Harmony/gpt-oss
|
510
|
+
# Ollama's OpenAI layer simply looks for the presence of 'reasoining' or 'reasoning_effort'
|
511
|
+
# If set, then in the backend "medium" thinking is turned on
|
512
|
+
# request_data["reasoning_effort"] = "medium"
|
513
|
+
|
273
514
|
return request_data
|
274
515
|
|
275
516
|
@trace_method
|
@@ -278,8 +519,13 @@ class OpenAIClient(LLMClientBase):
|
|
278
519
|
Performs underlying synchronous request to OpenAI API and returns raw response dict.
|
279
520
|
"""
|
280
521
|
client = OpenAI(**self._prepare_client_kwargs(llm_config))
|
281
|
-
|
282
|
-
|
522
|
+
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
|
523
|
+
if "input" in request_data and "messages" not in request_data:
|
524
|
+
resp = client.responses.create(**request_data)
|
525
|
+
return resp.model_dump()
|
526
|
+
else:
|
527
|
+
response: ChatCompletion = client.chat.completions.create(**request_data)
|
528
|
+
return response.model_dump()
|
283
529
|
|
284
530
|
@trace_method
|
285
531
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
@@ -288,8 +534,13 @@ class OpenAIClient(LLMClientBase):
|
|
288
534
|
"""
|
289
535
|
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
290
536
|
client = AsyncOpenAI(**kwargs)
|
291
|
-
|
292
|
-
|
537
|
+
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
|
538
|
+
if "input" in request_data and "messages" not in request_data:
|
539
|
+
resp = await client.responses.create(**request_data)
|
540
|
+
return resp.model_dump()
|
541
|
+
else:
|
542
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
543
|
+
return response.model_dump()
|
293
544
|
|
294
545
|
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
295
546
|
return is_openai_reasoning_model(llm_config.model)
|
@@ -305,6 +556,84 @@ class OpenAIClient(LLMClientBase):
|
|
305
556
|
Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
|
306
557
|
Handles potential extraction of inner thoughts if they were added via kwargs.
|
307
558
|
"""
|
559
|
+
if "object" in response_data and response_data["object"] == "response":
|
560
|
+
# Map Responses API shape to Chat Completions shape
|
561
|
+
# See example payload in tests/integration_test_send_message_v2.py
|
562
|
+
model = response_data.get("model")
|
563
|
+
|
564
|
+
# Extract usage
|
565
|
+
usage = response_data.get("usage", {}) or {}
|
566
|
+
prompt_tokens = usage.get("input_tokens") or 0
|
567
|
+
completion_tokens = usage.get("output_tokens") or 0
|
568
|
+
total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens)
|
569
|
+
|
570
|
+
# Extract assistant message text from the outputs list
|
571
|
+
outputs = response_data.get("output") or []
|
572
|
+
assistant_text_parts = []
|
573
|
+
reasoning_summary_parts = None
|
574
|
+
reasoning_content_signature = None
|
575
|
+
tool_calls = None
|
576
|
+
finish_reason = "stop" if (response_data.get("status") == "completed") else None
|
577
|
+
|
578
|
+
# Optionally capture reasoning presence
|
579
|
+
found_reasoning = False
|
580
|
+
for out in outputs:
|
581
|
+
out_type = (out or {}).get("type")
|
582
|
+
if out_type == "message":
|
583
|
+
content_list = (out or {}).get("content") or []
|
584
|
+
for part in content_list:
|
585
|
+
if (part or {}).get("type") == "output_text":
|
586
|
+
text_val = (part or {}).get("text")
|
587
|
+
if text_val:
|
588
|
+
assistant_text_parts.append(text_val)
|
589
|
+
elif out_type == "reasoning":
|
590
|
+
found_reasoning = True
|
591
|
+
reasoning_summary_parts = [part.get("text") for part in out.get("summary")]
|
592
|
+
reasoning_content_signature = out.get("encrypted_content")
|
593
|
+
elif out_type == "function_call":
|
594
|
+
tool_calls = [
|
595
|
+
ToolCall(
|
596
|
+
id=out.get("call_id"),
|
597
|
+
type="function",
|
598
|
+
function=FunctionCall(
|
599
|
+
name=out.get("name"),
|
600
|
+
arguments=out.get("arguments"),
|
601
|
+
),
|
602
|
+
)
|
603
|
+
]
|
604
|
+
|
605
|
+
assistant_text = "\n".join(assistant_text_parts) if assistant_text_parts else None
|
606
|
+
|
607
|
+
# Build ChatCompletionResponse-compatible structure
|
608
|
+
# Imports for these Pydantic models are already present in this module
|
609
|
+
choice = Choice(
|
610
|
+
index=0,
|
611
|
+
finish_reason=finish_reason,
|
612
|
+
message=ChoiceMessage(
|
613
|
+
role="assistant",
|
614
|
+
content=assistant_text or "",
|
615
|
+
reasoning_content="\n".join(reasoning_summary_parts) if reasoning_summary_parts else None,
|
616
|
+
reasoning_content_signature=reasoning_content_signature if reasoning_summary_parts else None,
|
617
|
+
redacted_reasoning_content=None,
|
618
|
+
omitted_reasoning_content=False,
|
619
|
+
tool_calls=tool_calls,
|
620
|
+
),
|
621
|
+
)
|
622
|
+
|
623
|
+
chat_completion_response = ChatCompletionResponse(
|
624
|
+
id=response_data.get("id", ""),
|
625
|
+
choices=[choice],
|
626
|
+
created=int(response_data.get("created_at") or 0),
|
627
|
+
model=model or (llm_config.model if hasattr(llm_config, "model") else None),
|
628
|
+
usage=UsageStatistics(
|
629
|
+
prompt_tokens=prompt_tokens,
|
630
|
+
completion_tokens=completion_tokens,
|
631
|
+
total_tokens=total_tokens,
|
632
|
+
),
|
633
|
+
)
|
634
|
+
|
635
|
+
return chat_completion_response
|
636
|
+
|
308
637
|
# OpenAI's response structure directly maps to ChatCompletionResponse
|
309
638
|
# We just need to instantiate the Pydantic model for validation and type safety.
|
310
639
|
chat_completion_response = ChatCompletionResponse(**response_data)
|
@@ -322,15 +651,36 @@ class OpenAIClient(LLMClientBase):
|
|
322
651
|
return chat_completion_response
|
323
652
|
|
324
653
|
@trace_method
|
325
|
-
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
654
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk | ResponseStreamEvent]:
|
326
655
|
"""
|
327
656
|
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
328
657
|
"""
|
329
658
|
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
330
659
|
client = AsyncOpenAI(**kwargs)
|
331
|
-
|
332
|
-
|
333
|
-
|
660
|
+
|
661
|
+
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
|
662
|
+
if "input" in request_data and "messages" not in request_data:
|
663
|
+
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
|
664
|
+
**request_data,
|
665
|
+
stream=True,
|
666
|
+
# stream_options={"include_usage": True},
|
667
|
+
)
|
668
|
+
else:
|
669
|
+
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
670
|
+
**request_data,
|
671
|
+
stream=True,
|
672
|
+
stream_options={"include_usage": True},
|
673
|
+
)
|
674
|
+
return response_stream
|
675
|
+
|
676
|
+
@trace_method
|
677
|
+
async def stream_async_responses(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ResponseStreamEvent]:
|
678
|
+
"""
|
679
|
+
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
680
|
+
"""
|
681
|
+
kwargs = await self._prepare_client_kwargs_async(llm_config)
|
682
|
+
client = AsyncOpenAI(**kwargs)
|
683
|
+
response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(**request_data, stream=True)
|
334
684
|
return response_stream
|
335
685
|
|
336
686
|
@trace_method
|
@@ -539,3 +889,58 @@ def fill_image_content_in_messages(openai_message_list: List[dict], pydantic_mes
|
|
539
889
|
new_message_list.append({"role": "user", "content": message_content})
|
540
890
|
|
541
891
|
return new_message_list
|
892
|
+
|
893
|
+
|
894
|
+
def fill_image_content_in_responses_input(openai_message_list: List[dict], pydantic_message_list: List[PydanticMessage]) -> List[dict]:
|
895
|
+
"""
|
896
|
+
Rewrite user messages in the Responses API input to embed multi-modal parts inside
|
897
|
+
the message's content array (not as top-level items).
|
898
|
+
|
899
|
+
Expected structure for Responses API input messages:
|
900
|
+
{ "type": "message", "role": "user", "content": [
|
901
|
+
{"type": "input_text", "text": "..."},
|
902
|
+
{"type": "input_image", "image_url": {"url": "data:<mime>;base64,<data>", "detail": "auto"}}
|
903
|
+
] }
|
904
|
+
|
905
|
+
Non-user items are left unchanged.
|
906
|
+
"""
|
907
|
+
user_msgs = [m for m in pydantic_message_list if getattr(m, "role", None) == "user"]
|
908
|
+
user_idx = 0
|
909
|
+
|
910
|
+
rewritten: List[dict] = []
|
911
|
+
for item in openai_message_list:
|
912
|
+
if isinstance(item, dict) and item.get("role") == "user":
|
913
|
+
if user_idx >= len(user_msgs):
|
914
|
+
rewritten.append(item)
|
915
|
+
continue
|
916
|
+
|
917
|
+
pm = user_msgs[user_idx]
|
918
|
+
user_idx += 1
|
919
|
+
|
920
|
+
# Only rewrite if the pydantic message actually contains multiple parts or images
|
921
|
+
if not isinstance(pm.content, list) or (len(pm.content) == 1 and pm.content[0].type == MessageContentType.text):
|
922
|
+
rewritten.append(item)
|
923
|
+
continue
|
924
|
+
|
925
|
+
parts: List[dict] = []
|
926
|
+
for content in pm.content:
|
927
|
+
if content.type == MessageContentType.text:
|
928
|
+
parts.append({"type": "input_text", "text": content.text})
|
929
|
+
elif content.type == MessageContentType.image:
|
930
|
+
# For Responses API, image_url is a string and detail is required
|
931
|
+
data_url = f"data:{content.source.media_type};base64,{content.source.data}"
|
932
|
+
parts.append(
|
933
|
+
{"type": "input_image", "image_url": data_url, "detail": getattr(content.source, "detail", None) or "auto"}
|
934
|
+
)
|
935
|
+
else:
|
936
|
+
# Skip unsupported content types for Responses input
|
937
|
+
continue
|
938
|
+
|
939
|
+
# Update message content to include multi-modal parts (EasyInputMessageParam style)
|
940
|
+
new_item = dict(item)
|
941
|
+
new_item["content"] = parts
|
942
|
+
rewritten.append(new_item)
|
943
|
+
else:
|
944
|
+
rewritten.append(item)
|
945
|
+
|
946
|
+
return rewritten
|