letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. letta/adapters/letta_llm_adapter.py +1 -0
  2. letta/adapters/letta_llm_request_adapter.py +0 -1
  3. letta/adapters/letta_llm_stream_adapter.py +7 -2
  4. letta/adapters/simple_llm_request_adapter.py +88 -0
  5. letta/adapters/simple_llm_stream_adapter.py +192 -0
  6. letta/agents/agent_loop.py +6 -0
  7. letta/agents/ephemeral_summary_agent.py +2 -1
  8. letta/agents/helpers.py +142 -6
  9. letta/agents/letta_agent.py +13 -33
  10. letta/agents/letta_agent_batch.py +2 -4
  11. letta/agents/letta_agent_v2.py +87 -77
  12. letta/agents/letta_agent_v3.py +899 -0
  13. letta/agents/voice_agent.py +2 -6
  14. letta/constants.py +8 -4
  15. letta/errors.py +40 -0
  16. letta/functions/function_sets/base.py +84 -4
  17. letta/functions/function_sets/multi_agent.py +0 -3
  18. letta/functions/schema_generator.py +113 -71
  19. letta/groups/dynamic_multi_agent.py +3 -2
  20. letta/groups/helpers.py +1 -2
  21. letta/groups/round_robin_multi_agent.py +3 -2
  22. letta/groups/sleeptime_multi_agent.py +3 -2
  23. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  24. letta/groups/sleeptime_multi_agent_v3.py +17 -17
  25. letta/groups/supervisor_multi_agent.py +84 -80
  26. letta/helpers/converters.py +3 -0
  27. letta/helpers/message_helper.py +4 -0
  28. letta/helpers/tool_rule_solver.py +92 -5
  29. letta/interfaces/anthropic_streaming_interface.py +409 -0
  30. letta/interfaces/gemini_streaming_interface.py +296 -0
  31. letta/interfaces/openai_streaming_interface.py +752 -1
  32. letta/llm_api/anthropic_client.py +126 -16
  33. letta/llm_api/bedrock_client.py +4 -2
  34. letta/llm_api/deepseek_client.py +4 -1
  35. letta/llm_api/google_vertex_client.py +123 -42
  36. letta/llm_api/groq_client.py +4 -1
  37. letta/llm_api/llm_api_tools.py +11 -4
  38. letta/llm_api/llm_client_base.py +6 -2
  39. letta/llm_api/openai.py +32 -2
  40. letta/llm_api/openai_client.py +423 -18
  41. letta/llm_api/xai_client.py +4 -1
  42. letta/main.py +9 -5
  43. letta/memory.py +1 -0
  44. letta/orm/__init__.py +1 -1
  45. letta/orm/agent.py +10 -0
  46. letta/orm/block.py +7 -16
  47. letta/orm/blocks_agents.py +8 -2
  48. letta/orm/files_agents.py +2 -0
  49. letta/orm/job.py +7 -5
  50. letta/orm/mcp_oauth.py +1 -0
  51. letta/orm/message.py +21 -6
  52. letta/orm/organization.py +2 -0
  53. letta/orm/provider.py +6 -2
  54. letta/orm/run.py +71 -0
  55. letta/orm/sandbox_config.py +7 -1
  56. letta/orm/sqlalchemy_base.py +0 -306
  57. letta/orm/step.py +6 -5
  58. letta/orm/step_metrics.py +5 -5
  59. letta/otel/tracing.py +28 -3
  60. letta/plugins/defaults.py +4 -4
  61. letta/prompts/system_prompts/__init__.py +2 -0
  62. letta/prompts/system_prompts/letta_v1.py +25 -0
  63. letta/schemas/agent.py +3 -2
  64. letta/schemas/agent_file.py +9 -3
  65. letta/schemas/block.py +23 -10
  66. letta/schemas/enums.py +21 -2
  67. letta/schemas/job.py +17 -4
  68. letta/schemas/letta_message_content.py +71 -2
  69. letta/schemas/letta_stop_reason.py +5 -5
  70. letta/schemas/llm_config.py +53 -3
  71. letta/schemas/memory.py +1 -1
  72. letta/schemas/message.py +504 -117
  73. letta/schemas/openai/responses_request.py +64 -0
  74. letta/schemas/providers/__init__.py +2 -0
  75. letta/schemas/providers/anthropic.py +16 -0
  76. letta/schemas/providers/ollama.py +115 -33
  77. letta/schemas/providers/openrouter.py +52 -0
  78. letta/schemas/providers/vllm.py +2 -1
  79. letta/schemas/run.py +48 -42
  80. letta/schemas/step.py +2 -2
  81. letta/schemas/step_metrics.py +1 -1
  82. letta/schemas/tool.py +15 -107
  83. letta/schemas/tool_rule.py +88 -5
  84. letta/serialize_schemas/marshmallow_agent.py +1 -0
  85. letta/server/db.py +86 -408
  86. letta/server/rest_api/app.py +61 -10
  87. letta/server/rest_api/dependencies.py +14 -0
  88. letta/server/rest_api/redis_stream_manager.py +19 -8
  89. letta/server/rest_api/routers/v1/agents.py +364 -292
  90. letta/server/rest_api/routers/v1/blocks.py +14 -20
  91. letta/server/rest_api/routers/v1/identities.py +45 -110
  92. letta/server/rest_api/routers/v1/internal_templates.py +21 -0
  93. letta/server/rest_api/routers/v1/jobs.py +23 -6
  94. letta/server/rest_api/routers/v1/messages.py +1 -1
  95. letta/server/rest_api/routers/v1/runs.py +126 -85
  96. letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
  97. letta/server/rest_api/routers/v1/tools.py +281 -594
  98. letta/server/rest_api/routers/v1/voice.py +1 -1
  99. letta/server/rest_api/streaming_response.py +29 -29
  100. letta/server/rest_api/utils.py +122 -64
  101. letta/server/server.py +160 -887
  102. letta/services/agent_manager.py +236 -919
  103. letta/services/agent_serialization_manager.py +16 -0
  104. letta/services/archive_manager.py +0 -100
  105. letta/services/block_manager.py +211 -168
  106. letta/services/file_manager.py +1 -1
  107. letta/services/files_agents_manager.py +24 -33
  108. letta/services/group_manager.py +0 -142
  109. letta/services/helpers/agent_manager_helper.py +7 -2
  110. letta/services/helpers/run_manager_helper.py +85 -0
  111. letta/services/job_manager.py +96 -411
  112. letta/services/lettuce/__init__.py +6 -0
  113. letta/services/lettuce/lettuce_client_base.py +86 -0
  114. letta/services/mcp_manager.py +38 -6
  115. letta/services/message_manager.py +165 -362
  116. letta/services/organization_manager.py +0 -36
  117. letta/services/passage_manager.py +0 -345
  118. letta/services/provider_manager.py +0 -80
  119. letta/services/run_manager.py +301 -0
  120. letta/services/sandbox_config_manager.py +0 -234
  121. letta/services/step_manager.py +62 -39
  122. letta/services/summarizer/summarizer.py +9 -7
  123. letta/services/telemetry_manager.py +0 -16
  124. letta/services/tool_executor/builtin_tool_executor.py +35 -0
  125. letta/services/tool_executor/core_tool_executor.py +397 -2
  126. letta/services/tool_executor/files_tool_executor.py +3 -3
  127. letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
  128. letta/services/tool_executor/tool_execution_manager.py +6 -8
  129. letta/services/tool_executor/tool_executor_base.py +3 -3
  130. letta/services/tool_manager.py +85 -339
  131. letta/services/tool_sandbox/base.py +24 -13
  132. letta/services/tool_sandbox/e2b_sandbox.py +16 -1
  133. letta/services/tool_schema_generator.py +123 -0
  134. letta/services/user_manager.py +0 -99
  135. letta/settings.py +20 -4
  136. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
  137. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
  138. letta/agents/temporal/activities/__init__.py +0 -4
  139. letta/agents/temporal/activities/example_activity.py +0 -7
  140. letta/agents/temporal/activities/prepare_messages.py +0 -10
  141. letta/agents/temporal/temporal_agent_workflow.py +0 -56
  142. letta/agents/temporal/types.py +0 -25
  143. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
  144. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
  145. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -9,7 +9,7 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
9
9
  from letta.errors import LLMError
10
10
  from letta.otel.tracing import log_event, trace_method
11
11
  from letta.schemas.embedding_config import EmbeddingConfig
12
- from letta.schemas.enums import ProviderCategory
12
+ from letta.schemas.enums import AgentType, ProviderCategory
13
13
  from letta.schemas.llm_config import LLMConfig
14
14
  from letta.schemas.message import Message
15
15
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
@@ -40,6 +40,7 @@ class LLMClientBase:
40
40
  @trace_method
41
41
  def send_llm_request(
42
42
  self,
43
+ agent_type: AgentType,
43
44
  messages: List[Message],
44
45
  llm_config: LLMConfig,
45
46
  tools: Optional[List[dict]] = None, # TODO: change to Tool object
@@ -52,7 +53,7 @@ class LLMClientBase:
52
53
  If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
53
54
  Otherwise returns a ChatCompletionResponse.
54
55
  """
55
- request_data = self.build_request_data(messages, llm_config, tools, force_tool_call)
56
+ request_data = self.build_request_data(agent_type, messages, llm_config, tools, force_tool_call)
56
57
 
57
58
  try:
58
59
  log_event(name="llm_request_sent", attributes=request_data)
@@ -108,6 +109,7 @@ class LLMClientBase:
108
109
 
109
110
  async def send_llm_batch_request_async(
110
111
  self,
112
+ agent_type: AgentType,
111
113
  agent_messages_mapping: Dict[str, List[Message]],
112
114
  agent_tools_mapping: Dict[str, List[dict]],
113
115
  agent_llm_config_mapping: Dict[str, LLMConfig],
@@ -120,10 +122,12 @@ class LLMClientBase:
120
122
  @abstractmethod
121
123
  def build_request_data(
122
124
  self,
125
+ agent_type: AgentType,
123
126
  messages: List[Message],
124
127
  llm_config: LLMConfig,
125
128
  tools: List[dict],
126
129
  force_tool_call: Optional[str] = None,
130
+ requires_subsequent_tool_call: bool = False,
127
131
  ) -> dict:
128
132
  """
129
133
  Constructs a request object in the expected data format for this client.
letta/llm_api/openai.py CHANGED
@@ -40,6 +40,7 @@ from letta.schemas.openai.chat_completion_response import (
40
40
  UsageStatistics,
41
41
  )
42
42
  from letta.schemas.openai.embedding_response import EmbeddingResponse
43
+ from letta.settings import model_settings
43
44
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
44
45
  from letta.utils import get_tool_call_id, smart_urljoin
45
46
 
@@ -82,6 +83,12 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
82
83
  headers = {"Content-Type": "application/json"}
83
84
  if api_key is not None:
84
85
  headers["Authorization"] = f"Bearer {api_key}"
86
+ # Add optional OpenRouter headers if hitting OpenRouter
87
+ if "openrouter.ai" in url:
88
+ if model_settings.openrouter_referer:
89
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
90
+ if model_settings.openrouter_title:
91
+ headers["X-Title"] = model_settings.openrouter_title
85
92
 
86
93
  logger.debug(f"Sending request to {url}")
87
94
  response = None
@@ -139,6 +146,11 @@ async def openai_get_model_list_async(
139
146
  headers = {"Content-Type": "application/json"}
140
147
  if api_key is not None:
141
148
  headers["Authorization"] = f"Bearer {api_key}"
149
+ if "openrouter.ai" in url:
150
+ if model_settings.openrouter_referer:
151
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
152
+ if model_settings.openrouter_title:
153
+ headers["X-Title"] = model_settings.openrouter_title
142
154
 
143
155
  logger.debug(f"Sending request to {url}")
144
156
 
@@ -550,7 +562,16 @@ def openai_chat_completions_request_stream(
550
562
 
551
563
  data = prepare_openai_payload(chat_completion_request)
552
564
  data["stream"] = True
553
- client = OpenAI(api_key=api_key, base_url=url, max_retries=0)
565
+ kwargs = {"api_key": api_key, "base_url": url, "max_retries": 0}
566
+ if "openrouter.ai" in url:
567
+ headers = {}
568
+ if model_settings.openrouter_referer:
569
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
570
+ if model_settings.openrouter_title:
571
+ headers["X-Title"] = model_settings.openrouter_title
572
+ if headers:
573
+ kwargs["default_headers"] = headers
574
+ client = OpenAI(**kwargs)
554
575
  try:
555
576
  stream = client.chat.completions.create(**data)
556
577
  for chunk in stream:
@@ -574,7 +595,16 @@ def openai_chat_completions_request(
574
595
  https://platform.openai.com/docs/guides/text-generation?lang=curl
575
596
  """
576
597
  data = prepare_openai_payload(chat_completion_request)
577
- client = OpenAI(api_key=api_key, base_url=url, max_retries=0)
598
+ kwargs = {"api_key": api_key, "base_url": url, "max_retries": 0}
599
+ if "openrouter.ai" in url:
600
+ headers = {}
601
+ if model_settings.openrouter_referer:
602
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
603
+ if model_settings.openrouter_title:
604
+ headers["X-Title"] = model_settings.openrouter_title
605
+ if headers:
606
+ kwargs["default_headers"] = headers
607
+ client = OpenAI(**kwargs)
578
608
  log_event(name="llm_request_sent", attributes=data)
579
609
  chat_completion = client.chat.completions.create(**data)
580
610
  log_event(name="llm_response_received", attributes=chat_completion.model_dump())
@@ -4,10 +4,13 @@ from typing import List, Optional
4
4
 
5
5
  import openai
6
6
  from openai import AsyncOpenAI, AsyncStream, OpenAI
7
+ from openai.types import Reasoning
7
8
  from openai.types.chat.chat_completion import ChatCompletion
8
9
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
10
+ from openai.types.responses import ResponseTextConfigParam
11
+ from openai.types.responses.response_stream_event import ResponseStreamEvent
9
12
 
10
- from letta.constants import LETTA_MODEL_ENDPOINT
13
+ from letta.constants import LETTA_MODEL_ENDPOINT, REQUEST_HEARTBEAT_PARAM
11
14
  from letta.errors import (
12
15
  ContextWindowExceededError,
13
16
  ErrorCode,
@@ -26,6 +29,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
26
29
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
27
30
  from letta.log import get_logger
28
31
  from letta.otel.tracing import trace_method
32
+ from letta.schemas.agent import AgentType
29
33
  from letta.schemas.embedding_config import EmbeddingConfig
30
34
  from letta.schemas.letta_message_content import MessageContentType
31
35
  from letta.schemas.llm_config import LLMConfig
@@ -38,7 +42,15 @@ from letta.schemas.openai.chat_completion_request import (
38
42
  ToolFunctionChoice,
39
43
  cast_message_to_subtype,
40
44
  )
41
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
45
+ from letta.schemas.openai.chat_completion_response import (
46
+ ChatCompletionResponse,
47
+ Choice,
48
+ FunctionCall,
49
+ Message as ChoiceMessage,
50
+ ToolCall,
51
+ UsageStatistics,
52
+ )
53
+ from letta.schemas.openai.responses_request import ResponsesRequest
42
54
  from letta.settings import model_settings
43
55
 
44
56
  logger = get_logger(__name__)
@@ -117,6 +129,11 @@ def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
117
129
  return False
118
130
 
119
131
 
132
+ def use_responses_api(llm_config: LLMConfig) -> bool:
133
+ # TODO can opt in all reasoner models to use the Responses API
134
+ return is_openai_reasoning_model(llm_config.model)
135
+
136
+
120
137
  def supports_content_none(llm_config: LLMConfig) -> bool:
121
138
  """Certain providers don't support the content None."""
122
139
  if "gpt-oss" in llm_config.model:
@@ -128,12 +145,32 @@ class OpenAIClient(LLMClientBase):
128
145
  def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
129
146
  api_key, _, _ = self.get_byok_overrides(llm_config)
130
147
 
148
+ # Default to global OpenAI key when no BYOK override
131
149
  if not api_key:
132
150
  api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
133
- # supposedly the openai python client requires a dummy API key
134
- api_key = api_key or "DUMMY_API_KEY"
151
+
135
152
  kwargs = {"api_key": api_key, "base_url": llm_config.model_endpoint}
136
153
 
154
+ # OpenRouter-specific overrides: use OpenRouter key and optional headers
155
+ is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
156
+ llm_config.provider_name == "openrouter"
157
+ )
158
+ if is_openrouter:
159
+ or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
160
+ if or_key:
161
+ kwargs["api_key"] = or_key
162
+ # Attach optional headers if provided
163
+ headers = {}
164
+ if model_settings.openrouter_referer:
165
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
166
+ if model_settings.openrouter_title:
167
+ headers["X-Title"] = model_settings.openrouter_title
168
+ if headers:
169
+ kwargs["default_headers"] = headers
170
+
171
+ # The OpenAI client requires some API key value
172
+ kwargs["api_key"] = kwargs.get("api_key") or "DUMMY_API_KEY"
173
+
137
174
  return kwargs
138
175
 
139
176
  def _prepare_client_kwargs_embedding(self, embedding_config: EmbeddingConfig) -> dict:
@@ -148,10 +185,25 @@ class OpenAIClient(LLMClientBase):
148
185
 
149
186
  if not api_key:
150
187
  api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
151
- # supposedly the openai python client requires a dummy API key
152
- api_key = api_key or "DUMMY_API_KEY"
153
188
  kwargs = {"api_key": api_key, "base_url": llm_config.model_endpoint}
154
189
 
190
+ is_openrouter = (llm_config.model_endpoint and "openrouter.ai" in llm_config.model_endpoint) or (
191
+ llm_config.provider_name == "openrouter"
192
+ )
193
+ if is_openrouter:
194
+ or_key = model_settings.openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
195
+ if or_key:
196
+ kwargs["api_key"] = or_key
197
+ headers = {}
198
+ if model_settings.openrouter_referer:
199
+ headers["HTTP-Referer"] = model_settings.openrouter_referer
200
+ if model_settings.openrouter_title:
201
+ headers["X-Title"] = model_settings.openrouter_title
202
+ if headers:
203
+ kwargs["default_headers"] = headers
204
+
205
+ kwargs["api_key"] = kwargs.get("api_key") or "DUMMY_API_KEY"
206
+
155
207
  return kwargs
156
208
 
157
209
  def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
@@ -160,17 +212,182 @@ class OpenAIClient(LLMClientBase):
160
212
  def supports_structured_output(self, llm_config: LLMConfig) -> bool:
161
213
  return supports_structured_output(llm_config)
162
214
 
215
+ @trace_method
216
+ def build_request_data_responses(
217
+ self,
218
+ agent_type: AgentType, # if react, use native content + strip heartbeats
219
+ messages: List[PydanticMessage],
220
+ llm_config: LLMConfig,
221
+ tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
222
+ force_tool_call: Optional[str] = None,
223
+ requires_subsequent_tool_call: bool = False,
224
+ ) -> dict:
225
+ """
226
+ Constructs a request object in the expected data format for the OpenAI Responses API.
227
+ """
228
+ if llm_config.put_inner_thoughts_in_kwargs:
229
+ raise ValueError("Inner thoughts in kwargs are not supported for the OpenAI Responses API")
230
+
231
+ openai_messages_list = PydanticMessage.to_openai_responses_dicts_from_list(messages)
232
+ # Add multi-modal support for Responses API by rewriting user messages
233
+ # into input_text/input_image parts.
234
+ openai_messages_list = fill_image_content_in_responses_input(openai_messages_list, messages)
235
+
236
+ if llm_config.model:
237
+ model = llm_config.model
238
+ else:
239
+ logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
240
+ model = None
241
+
242
+ # Default to auto, unless there's a forced tool call coming from above or requires_subsequent_tool_call is True
243
+ tool_choice = None
244
+ if tools: # only set tool_choice if tools exist
245
+ if force_tool_call is not None:
246
+ tool_choice = {"type": "function", "name": force_tool_call}
247
+ elif requires_subsequent_tool_call:
248
+ tool_choice = "required"
249
+ else:
250
+ tool_choice = "auto"
251
+
252
+ # Convert the tools from the ChatCompletions style to the Responses style
253
+ if tools:
254
+ # Get proper typing
255
+ typed_tools: List[OpenAITool] = [OpenAITool(type="function", function=f) for f in tools]
256
+
257
+ # Strip request heartbeat
258
+ # TODO relax this?
259
+ if agent_type == AgentType.letta_v1_agent:
260
+ new_tools = []
261
+ for tool in typed_tools:
262
+ # Remove request_heartbeat from the properties if it exists
263
+ if tool.function.parameters and "properties" in tool.function.parameters:
264
+ tool.function.parameters["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
265
+ # Also remove from required list if present
266
+ if "required" in tool.function.parameters and REQUEST_HEARTBEAT_PARAM in tool.function.parameters["required"]:
267
+ tool.function.parameters["required"].remove(REQUEST_HEARTBEAT_PARAM)
268
+ new_tools.append(tool.model_copy(deep=True))
269
+ typed_tools = new_tools
270
+
271
+ # Convert to strict mode
272
+ if supports_structured_output(llm_config):
273
+ for tool in typed_tools:
274
+ try:
275
+ structured_output_version = convert_to_structured_output(tool.function.model_dump())
276
+ tool.function = FunctionSchema(**structured_output_version)
277
+ except ValueError as e:
278
+ logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
279
+
280
+ # Finally convert to a Responses-friendly dict
281
+ responses_tools = [
282
+ {
283
+ "type": "function",
284
+ "name": t.function.name,
285
+ "description": t.function.description,
286
+ "parameters": t.function.parameters,
287
+ "strict": True,
288
+ }
289
+ for t in typed_tools
290
+ ]
291
+
292
+ else:
293
+ # Finally convert to a Responses-friendly dict
294
+ responses_tools = [
295
+ {
296
+ "type": "function",
297
+ "name": t.function.name,
298
+ "description": t.function.description,
299
+ "parameters": t.function.parameters,
300
+ # "strict": True,
301
+ }
302
+ for t in typed_tools
303
+ ]
304
+ else:
305
+ responses_tools = None
306
+
307
+ # Prepare the request payload
308
+ data = ResponsesRequest(
309
+ # Responses specific
310
+ store=False,
311
+ include=["reasoning.encrypted_content"],
312
+ # More or less generic to ChatCompletions API
313
+ model=model,
314
+ input=openai_messages_list,
315
+ tools=responses_tools,
316
+ tool_choice=tool_choice,
317
+ max_output_tokens=llm_config.max_tokens,
318
+ temperature=llm_config.temperature if supports_temperature_param(model) else None,
319
+ parallel_tool_calls=False,
320
+ )
321
+
322
+ # Add verbosity control for GPT-5 models
323
+ if supports_verbosity_control(model) and llm_config.verbosity:
324
+ # data.verbosity = llm_config.verbosity
325
+ # https://cookbook.openai.com/examples/gpt-5/gpt-5_new_params_and_tools
326
+ data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)
327
+
328
+ # Add reasoning effort control for reasoning models
329
+ if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
330
+ # data.reasoning_effort = llm_config.reasoning_effort
331
+ data.reasoning = Reasoning(
332
+ effort=llm_config.reasoning_effort,
333
+ # NOTE: hardcoding summary level, could put in llm_config?
334
+ summary="detailed",
335
+ )
336
+
337
+ # TODO I don't see this in Responses?
338
+ # Add frequency penalty
339
+ # if llm_config.frequency_penalty is not None:
340
+ # data.frequency_penalty = llm_config.frequency_penalty
341
+
342
+ # Add parallel tool calling
343
+ if tools and supports_parallel_tool_calling(model):
344
+ data.parallel_tool_calls = False
345
+
346
+ # always set user id for openai requests
347
+ if self.actor:
348
+ data.user = self.actor.id
349
+
350
+ if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
351
+ if not self.actor:
352
+ # override user id for inference.letta.com
353
+ import uuid
354
+
355
+ data.user = str(uuid.UUID(int=0))
356
+
357
+ data.model = "memgpt-openai"
358
+
359
+ request_data = data.model_dump(exclude_unset=True)
360
+ # print("responses request data", request_data)
361
+ return request_data
362
+
163
363
  @trace_method
164
364
  def build_request_data(
165
365
  self,
366
+ agent_type: AgentType, # if react, use native content + strip heartbeats
166
367
  messages: List[PydanticMessage],
167
368
  llm_config: LLMConfig,
168
369
  tools: Optional[List[dict]] = None, # Keep as dict for now as per base class
169
370
  force_tool_call: Optional[str] = None,
371
+ requires_subsequent_tool_call: bool = False,
170
372
  ) -> dict:
171
373
  """
172
374
  Constructs a request object in the expected data format for the OpenAI API.
173
375
  """
376
+ # Shortcut for GPT-5 to use Responses API, but only for letta_v1_agent
377
+ if use_responses_api(llm_config) and agent_type == AgentType.letta_v1_agent:
378
+ return self.build_request_data_responses(
379
+ agent_type=agent_type,
380
+ messages=messages,
381
+ llm_config=llm_config,
382
+ tools=tools,
383
+ force_tool_call=force_tool_call,
384
+ requires_subsequent_tool_call=requires_subsequent_tool_call,
385
+ )
386
+
387
+ if agent_type == AgentType.letta_v1_agent:
388
+ # Safety hard override in case it got set somewhere by accident
389
+ llm_config.put_inner_thoughts_in_kwargs = False
390
+
174
391
  if tools and llm_config.put_inner_thoughts_in_kwargs:
175
392
  # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
176
393
  # TODO(fix)
@@ -208,15 +425,16 @@ class OpenAIClient(LLMClientBase):
208
425
  # TODO: This vllm checking is very brittle and is a patch at most
209
426
  tool_choice = None
210
427
  if tools: # only set tool_choice if tools exist
211
- if self.requires_auto_tool_choice(llm_config):
428
+ if force_tool_call is not None:
429
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
430
+ elif requires_subsequent_tool_call:
431
+ tool_choice = "required"
432
+ elif self.requires_auto_tool_choice(llm_config) or agent_type == AgentType.letta_v1_agent:
212
433
  tool_choice = "auto"
213
434
  else:
214
435
  # only set if tools is non-Null
215
436
  tool_choice = "required"
216
437
 
217
- if force_tool_call is not None:
218
- tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
219
-
220
438
  if not supports_content_none(llm_config):
221
439
  for message in openai_message_list:
222
440
  if message.content is None:
@@ -260,6 +478,21 @@ class OpenAIClient(LLMClientBase):
260
478
 
261
479
  data.model = "memgpt-openai"
262
480
 
481
+ # For some reason, request heartbeats are still leaking into here...
482
+ # So strip them manually for v3
483
+ if agent_type == AgentType.letta_v1_agent:
484
+ new_tools = []
485
+ if data.tools:
486
+ for tool in data.tools:
487
+ # Remove request_heartbeat from the properties if it exists
488
+ if tool.function.parameters and "properties" in tool.function.parameters:
489
+ tool.function.parameters["properties"].pop(REQUEST_HEARTBEAT_PARAM, None)
490
+ # Also remove from required list if present
491
+ if "required" in tool.function.parameters and REQUEST_HEARTBEAT_PARAM in tool.function.parameters["required"]:
492
+ tool.function.parameters["required"].remove(REQUEST_HEARTBEAT_PARAM)
493
+ new_tools.append(tool.model_copy(deep=True))
494
+ data.tools = new_tools
495
+
263
496
  if data.tools is not None and len(data.tools) > 0:
264
497
  # Convert to structured output style (which has 'strict' and no optionals)
265
498
  for tool in data.tools:
@@ -270,6 +503,14 @@ class OpenAIClient(LLMClientBase):
270
503
  except ValueError as e:
271
504
  logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
272
505
  request_data = data.model_dump(exclude_unset=True)
506
+
507
+ # If Ollama
508
+ # if llm_config.handle.startswith("ollama/") and llm_config.enable_reasoner:
509
+ # Sadly, reasoning via the OpenAI proxy on Ollama only works for Harmony/gpt-oss
510
+ # Ollama's OpenAI layer simply looks for the presence of 'reasoining' or 'reasoning_effort'
511
+ # If set, then in the backend "medium" thinking is turned on
512
+ # request_data["reasoning_effort"] = "medium"
513
+
273
514
  return request_data
274
515
 
275
516
  @trace_method
@@ -278,8 +519,13 @@ class OpenAIClient(LLMClientBase):
278
519
  Performs underlying synchronous request to OpenAI API and returns raw response dict.
279
520
  """
280
521
  client = OpenAI(**self._prepare_client_kwargs(llm_config))
281
- response: ChatCompletion = client.chat.completions.create(**request_data)
282
- return response.model_dump()
522
+ # Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
523
+ if "input" in request_data and "messages" not in request_data:
524
+ resp = client.responses.create(**request_data)
525
+ return resp.model_dump()
526
+ else:
527
+ response: ChatCompletion = client.chat.completions.create(**request_data)
528
+ return response.model_dump()
283
529
 
284
530
  @trace_method
285
531
  async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
@@ -288,8 +534,13 @@ class OpenAIClient(LLMClientBase):
288
534
  """
289
535
  kwargs = await self._prepare_client_kwargs_async(llm_config)
290
536
  client = AsyncOpenAI(**kwargs)
291
- response: ChatCompletion = await client.chat.completions.create(**request_data)
292
- return response.model_dump()
537
+ # Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
538
+ if "input" in request_data and "messages" not in request_data:
539
+ resp = await client.responses.create(**request_data)
540
+ return resp.model_dump()
541
+ else:
542
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
543
+ return response.model_dump()
293
544
 
294
545
  def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
295
546
  return is_openai_reasoning_model(llm_config.model)
@@ -305,6 +556,84 @@ class OpenAIClient(LLMClientBase):
305
556
  Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
306
557
  Handles potential extraction of inner thoughts if they were added via kwargs.
307
558
  """
559
+ if "object" in response_data and response_data["object"] == "response":
560
+ # Map Responses API shape to Chat Completions shape
561
+ # See example payload in tests/integration_test_send_message_v2.py
562
+ model = response_data.get("model")
563
+
564
+ # Extract usage
565
+ usage = response_data.get("usage", {}) or {}
566
+ prompt_tokens = usage.get("input_tokens") or 0
567
+ completion_tokens = usage.get("output_tokens") or 0
568
+ total_tokens = usage.get("total_tokens") or (prompt_tokens + completion_tokens)
569
+
570
+ # Extract assistant message text from the outputs list
571
+ outputs = response_data.get("output") or []
572
+ assistant_text_parts = []
573
+ reasoning_summary_parts = None
574
+ reasoning_content_signature = None
575
+ tool_calls = None
576
+ finish_reason = "stop" if (response_data.get("status") == "completed") else None
577
+
578
+ # Optionally capture reasoning presence
579
+ found_reasoning = False
580
+ for out in outputs:
581
+ out_type = (out or {}).get("type")
582
+ if out_type == "message":
583
+ content_list = (out or {}).get("content") or []
584
+ for part in content_list:
585
+ if (part or {}).get("type") == "output_text":
586
+ text_val = (part or {}).get("text")
587
+ if text_val:
588
+ assistant_text_parts.append(text_val)
589
+ elif out_type == "reasoning":
590
+ found_reasoning = True
591
+ reasoning_summary_parts = [part.get("text") for part in out.get("summary")]
592
+ reasoning_content_signature = out.get("encrypted_content")
593
+ elif out_type == "function_call":
594
+ tool_calls = [
595
+ ToolCall(
596
+ id=out.get("call_id"),
597
+ type="function",
598
+ function=FunctionCall(
599
+ name=out.get("name"),
600
+ arguments=out.get("arguments"),
601
+ ),
602
+ )
603
+ ]
604
+
605
+ assistant_text = "\n".join(assistant_text_parts) if assistant_text_parts else None
606
+
607
+ # Build ChatCompletionResponse-compatible structure
608
+ # Imports for these Pydantic models are already present in this module
609
+ choice = Choice(
610
+ index=0,
611
+ finish_reason=finish_reason,
612
+ message=ChoiceMessage(
613
+ role="assistant",
614
+ content=assistant_text or "",
615
+ reasoning_content="\n".join(reasoning_summary_parts) if reasoning_summary_parts else None,
616
+ reasoning_content_signature=reasoning_content_signature if reasoning_summary_parts else None,
617
+ redacted_reasoning_content=None,
618
+ omitted_reasoning_content=False,
619
+ tool_calls=tool_calls,
620
+ ),
621
+ )
622
+
623
+ chat_completion_response = ChatCompletionResponse(
624
+ id=response_data.get("id", ""),
625
+ choices=[choice],
626
+ created=int(response_data.get("created_at") or 0),
627
+ model=model or (llm_config.model if hasattr(llm_config, "model") else None),
628
+ usage=UsageStatistics(
629
+ prompt_tokens=prompt_tokens,
630
+ completion_tokens=completion_tokens,
631
+ total_tokens=total_tokens,
632
+ ),
633
+ )
634
+
635
+ return chat_completion_response
636
+
308
637
  # OpenAI's response structure directly maps to ChatCompletionResponse
309
638
  # We just need to instantiate the Pydantic model for validation and type safety.
310
639
  chat_completion_response = ChatCompletionResponse(**response_data)
@@ -322,15 +651,36 @@ class OpenAIClient(LLMClientBase):
322
651
  return chat_completion_response
323
652
 
324
653
  @trace_method
325
- async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
654
+ async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk | ResponseStreamEvent]:
326
655
  """
327
656
  Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
328
657
  """
329
658
  kwargs = await self._prepare_client_kwargs_async(llm_config)
330
659
  client = AsyncOpenAI(**kwargs)
331
- response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
332
- **request_data, stream=True, stream_options={"include_usage": True}
333
- )
660
+
661
+ # Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
662
+ if "input" in request_data and "messages" not in request_data:
663
+ response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(
664
+ **request_data,
665
+ stream=True,
666
+ # stream_options={"include_usage": True},
667
+ )
668
+ else:
669
+ response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
670
+ **request_data,
671
+ stream=True,
672
+ stream_options={"include_usage": True},
673
+ )
674
+ return response_stream
675
+
676
+ @trace_method
677
+ async def stream_async_responses(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ResponseStreamEvent]:
678
+ """
679
+ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
680
+ """
681
+ kwargs = await self._prepare_client_kwargs_async(llm_config)
682
+ client = AsyncOpenAI(**kwargs)
683
+ response_stream: AsyncStream[ResponseStreamEvent] = await client.responses.create(**request_data, stream=True)
334
684
  return response_stream
335
685
 
336
686
  @trace_method
@@ -539,3 +889,58 @@ def fill_image_content_in_messages(openai_message_list: List[dict], pydantic_mes
539
889
  new_message_list.append({"role": "user", "content": message_content})
540
890
 
541
891
  return new_message_list
892
+
893
+
894
+ def fill_image_content_in_responses_input(openai_message_list: List[dict], pydantic_message_list: List[PydanticMessage]) -> List[dict]:
895
+ """
896
+ Rewrite user messages in the Responses API input to embed multi-modal parts inside
897
+ the message's content array (not as top-level items).
898
+
899
+ Expected structure for Responses API input messages:
900
+ { "type": "message", "role": "user", "content": [
901
+ {"type": "input_text", "text": "..."},
902
+ {"type": "input_image", "image_url": {"url": "data:<mime>;base64,<data>", "detail": "auto"}}
903
+ ] }
904
+
905
+ Non-user items are left unchanged.
906
+ """
907
+ user_msgs = [m for m in pydantic_message_list if getattr(m, "role", None) == "user"]
908
+ user_idx = 0
909
+
910
+ rewritten: List[dict] = []
911
+ for item in openai_message_list:
912
+ if isinstance(item, dict) and item.get("role") == "user":
913
+ if user_idx >= len(user_msgs):
914
+ rewritten.append(item)
915
+ continue
916
+
917
+ pm = user_msgs[user_idx]
918
+ user_idx += 1
919
+
920
+ # Only rewrite if the pydantic message actually contains multiple parts or images
921
+ if not isinstance(pm.content, list) or (len(pm.content) == 1 and pm.content[0].type == MessageContentType.text):
922
+ rewritten.append(item)
923
+ continue
924
+
925
+ parts: List[dict] = []
926
+ for content in pm.content:
927
+ if content.type == MessageContentType.text:
928
+ parts.append({"type": "input_text", "text": content.text})
929
+ elif content.type == MessageContentType.image:
930
+ # For Responses API, image_url is a string and detail is required
931
+ data_url = f"data:{content.source.media_type};base64,{content.source.data}"
932
+ parts.append(
933
+ {"type": "input_image", "image_url": data_url, "detail": getattr(content.source, "detail", None) or "auto"}
934
+ )
935
+ else:
936
+ # Skip unsupported content types for Responses input
937
+ continue
938
+
939
+ # Update message content to include multi-modal parts (EasyInputMessageParam style)
940
+ new_item = dict(item)
941
+ new_item["content"] = parts
942
+ rewritten.append(new_item)
943
+ else:
944
+ rewritten.append(item)
945
+
946
+ return rewritten