letta-nightly 0.12.1.dev20251024104217__py3-none-any.whl → 0.13.0.dev20251025104015__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (159) hide show
  1. letta/__init__.py +2 -3
  2. letta/adapters/letta_llm_adapter.py +1 -0
  3. letta/adapters/simple_llm_request_adapter.py +8 -5
  4. letta/adapters/simple_llm_stream_adapter.py +22 -6
  5. letta/agents/agent_loop.py +10 -3
  6. letta/agents/base_agent.py +4 -1
  7. letta/agents/helpers.py +41 -9
  8. letta/agents/letta_agent.py +11 -10
  9. letta/agents/letta_agent_v2.py +47 -37
  10. letta/agents/letta_agent_v3.py +395 -300
  11. letta/agents/voice_agent.py +8 -6
  12. letta/agents/voice_sleeptime_agent.py +3 -3
  13. letta/constants.py +30 -7
  14. letta/errors.py +20 -0
  15. letta/functions/function_sets/base.py +55 -3
  16. letta/functions/mcp_client/types.py +33 -57
  17. letta/functions/schema_generator.py +135 -23
  18. letta/groups/sleeptime_multi_agent_v3.py +6 -11
  19. letta/groups/sleeptime_multi_agent_v4.py +227 -0
  20. letta/helpers/converters.py +78 -4
  21. letta/helpers/crypto_utils.py +6 -2
  22. letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +9 -11
  23. letta/interfaces/anthropic_streaming_interface.py +3 -4
  24. letta/interfaces/gemini_streaming_interface.py +4 -6
  25. letta/interfaces/openai_streaming_interface.py +63 -28
  26. letta/llm_api/anthropic_client.py +7 -4
  27. letta/llm_api/deepseek_client.py +6 -4
  28. letta/llm_api/google_ai_client.py +3 -12
  29. letta/llm_api/google_vertex_client.py +1 -1
  30. letta/llm_api/helpers.py +90 -61
  31. letta/llm_api/llm_api_tools.py +4 -1
  32. letta/llm_api/openai.py +12 -12
  33. letta/llm_api/openai_client.py +53 -16
  34. letta/local_llm/constants.py +4 -3
  35. letta/local_llm/json_parser.py +5 -2
  36. letta/local_llm/utils.py +2 -3
  37. letta/log.py +171 -7
  38. letta/orm/agent.py +43 -9
  39. letta/orm/archive.py +4 -0
  40. letta/orm/custom_columns.py +15 -0
  41. letta/orm/identity.py +11 -11
  42. letta/orm/mcp_server.py +9 -0
  43. letta/orm/message.py +6 -1
  44. letta/orm/run_metrics.py +7 -2
  45. letta/orm/sqlalchemy_base.py +2 -2
  46. letta/orm/tool.py +3 -0
  47. letta/otel/tracing.py +2 -0
  48. letta/prompts/prompt_generator.py +7 -2
  49. letta/schemas/agent.py +41 -10
  50. letta/schemas/agent_file.py +3 -0
  51. letta/schemas/archive.py +4 -2
  52. letta/schemas/block.py +2 -1
  53. letta/schemas/enums.py +36 -3
  54. letta/schemas/file.py +3 -3
  55. letta/schemas/folder.py +2 -1
  56. letta/schemas/group.py +2 -1
  57. letta/schemas/identity.py +18 -9
  58. letta/schemas/job.py +3 -1
  59. letta/schemas/letta_message.py +71 -12
  60. letta/schemas/letta_request.py +7 -3
  61. letta/schemas/letta_stop_reason.py +0 -25
  62. letta/schemas/llm_config.py +8 -2
  63. letta/schemas/mcp.py +80 -83
  64. letta/schemas/mcp_server.py +349 -0
  65. letta/schemas/memory.py +20 -8
  66. letta/schemas/message.py +212 -67
  67. letta/schemas/providers/anthropic.py +13 -6
  68. letta/schemas/providers/azure.py +6 -4
  69. letta/schemas/providers/base.py +8 -4
  70. letta/schemas/providers/bedrock.py +6 -2
  71. letta/schemas/providers/cerebras.py +7 -3
  72. letta/schemas/providers/deepseek.py +2 -1
  73. letta/schemas/providers/google_gemini.py +15 -6
  74. letta/schemas/providers/groq.py +2 -1
  75. letta/schemas/providers/lmstudio.py +9 -6
  76. letta/schemas/providers/mistral.py +2 -1
  77. letta/schemas/providers/openai.py +7 -2
  78. letta/schemas/providers/together.py +9 -3
  79. letta/schemas/providers/xai.py +7 -3
  80. letta/schemas/run.py +7 -2
  81. letta/schemas/run_metrics.py +2 -1
  82. letta/schemas/sandbox_config.py +2 -2
  83. letta/schemas/secret.py +3 -158
  84. letta/schemas/source.py +2 -2
  85. letta/schemas/step.py +2 -2
  86. letta/schemas/tool.py +24 -1
  87. letta/schemas/usage.py +0 -1
  88. letta/server/rest_api/app.py +123 -7
  89. letta/server/rest_api/dependencies.py +3 -0
  90. letta/server/rest_api/interface.py +7 -4
  91. letta/server/rest_api/redis_stream_manager.py +16 -1
  92. letta/server/rest_api/routers/v1/__init__.py +7 -0
  93. letta/server/rest_api/routers/v1/agents.py +332 -322
  94. letta/server/rest_api/routers/v1/archives.py +127 -40
  95. letta/server/rest_api/routers/v1/blocks.py +54 -6
  96. letta/server/rest_api/routers/v1/chat_completions.py +146 -0
  97. letta/server/rest_api/routers/v1/folders.py +27 -35
  98. letta/server/rest_api/routers/v1/groups.py +23 -35
  99. letta/server/rest_api/routers/v1/identities.py +24 -10
  100. letta/server/rest_api/routers/v1/internal_runs.py +107 -0
  101. letta/server/rest_api/routers/v1/internal_templates.py +162 -179
  102. letta/server/rest_api/routers/v1/jobs.py +15 -27
  103. letta/server/rest_api/routers/v1/mcp_servers.py +309 -0
  104. letta/server/rest_api/routers/v1/messages.py +23 -34
  105. letta/server/rest_api/routers/v1/organizations.py +6 -27
  106. letta/server/rest_api/routers/v1/providers.py +35 -62
  107. letta/server/rest_api/routers/v1/runs.py +30 -43
  108. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -4
  109. letta/server/rest_api/routers/v1/sources.py +26 -42
  110. letta/server/rest_api/routers/v1/steps.py +16 -29
  111. letta/server/rest_api/routers/v1/tools.py +17 -13
  112. letta/server/rest_api/routers/v1/users.py +5 -17
  113. letta/server/rest_api/routers/v1/voice.py +18 -27
  114. letta/server/rest_api/streaming_response.py +5 -2
  115. letta/server/rest_api/utils.py +187 -25
  116. letta/server/server.py +27 -22
  117. letta/server/ws_api/server.py +5 -4
  118. letta/services/agent_manager.py +148 -26
  119. letta/services/agent_serialization_manager.py +6 -1
  120. letta/services/archive_manager.py +168 -15
  121. letta/services/block_manager.py +14 -4
  122. letta/services/file_manager.py +33 -29
  123. letta/services/group_manager.py +10 -0
  124. letta/services/helpers/agent_manager_helper.py +65 -11
  125. letta/services/identity_manager.py +105 -4
  126. letta/services/job_manager.py +11 -1
  127. letta/services/mcp/base_client.py +2 -2
  128. letta/services/mcp/oauth_utils.py +33 -8
  129. letta/services/mcp_manager.py +174 -78
  130. letta/services/mcp_server_manager.py +1331 -0
  131. letta/services/message_manager.py +109 -4
  132. letta/services/organization_manager.py +4 -4
  133. letta/services/passage_manager.py +9 -25
  134. letta/services/provider_manager.py +91 -15
  135. letta/services/run_manager.py +72 -15
  136. letta/services/sandbox_config_manager.py +45 -3
  137. letta/services/source_manager.py +15 -8
  138. letta/services/step_manager.py +24 -1
  139. letta/services/streaming_service.py +581 -0
  140. letta/services/summarizer/summarizer.py +1 -1
  141. letta/services/tool_executor/core_tool_executor.py +111 -0
  142. letta/services/tool_executor/files_tool_executor.py +5 -3
  143. letta/services/tool_executor/sandbox_tool_executor.py +2 -2
  144. letta/services/tool_executor/tool_execution_manager.py +1 -1
  145. letta/services/tool_manager.py +10 -3
  146. letta/services/tool_sandbox/base.py +61 -1
  147. letta/services/tool_sandbox/local_sandbox.py +1 -3
  148. letta/services/user_manager.py +2 -2
  149. letta/settings.py +49 -5
  150. letta/system.py +14 -5
  151. letta/utils.py +73 -1
  152. letta/validators.py +105 -0
  153. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/METADATA +4 -2
  154. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/RECORD +157 -151
  155. letta/schemas/letta_ping.py +0 -28
  156. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  157. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/WHEEL +0 -0
  158. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/entry_points.txt +0 -0
  159. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,9 @@
1
1
  from typing import TYPE_CHECKING, Any, Dict
2
2
 
3
- import openai
4
3
  from fastapi import APIRouter, Body, Depends
5
- from fastapi.responses import StreamingResponse
6
4
 
7
- from letta.agents.voice_agent import VoiceAgent
8
5
  from letta.log import get_logger
9
6
  from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
10
- from letta.server.rest_api.utils import get_user_message_from_chat_completions_request
11
- from letta.settings import model_settings
12
7
 
13
8
  if TYPE_CHECKING:
14
9
  from letta.server.server import SyncServer
@@ -23,11 +18,16 @@ logger = get_logger(__name__)
23
18
  "/{agent_id}/chat/completions",
24
19
  response_model=None,
25
20
  operation_id="create_voice_chat_completions",
21
+ deprecated=True,
26
22
  responses={
27
23
  200: {
28
24
  "description": "Successful response",
29
25
  "content": {"text/event-stream": {}},
30
- }
26
+ },
27
+ 410: {
28
+ "description": "Endpoint deprecated",
29
+ "content": {"application/json": {"example": {"detail": "This endpoint has been deprecated"}}},
30
+ },
31
31
  },
32
32
  )
33
33
  async def create_voice_chat_completions(
@@ -36,28 +36,19 @@ async def create_voice_chat_completions(
36
36
  server: "SyncServer" = Depends(get_letta_server),
37
37
  headers: HeaderParams = Depends(get_headers),
38
38
  ):
39
- actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
39
+ """
40
+ DEPRECATED: This voice-beta endpoint has been deprecated.
40
41
 
41
- # Create OpenAI async client
42
- client = openai.AsyncClient(
43
- api_key=model_settings.openai_api_key,
44
- max_retries=0,
45
- http_client=server.httpx_client,
46
- )
42
+ The voice functionality has been integrated into the main chat completions endpoint.
43
+ Please use the standard /v1/agents/{agent_id}/messages endpoint instead.
47
44
 
48
- # Instantiate our LowLatencyAgent
49
- agent = VoiceAgent(
50
- agent_id=agent_id,
51
- openai_client=client,
52
- message_manager=server.message_manager,
53
- agent_manager=server.agent_manager,
54
- block_manager=server.block_manager,
55
- run_manager=server.run_manager,
56
- passage_manager=server.passage_manager,
57
- actor=actor,
58
- )
45
+ This endpoint will be removed in a future version.
46
+ """
47
+ from fastapi import HTTPException
48
+
49
+ logger.warning(f"Deprecated voice-beta endpoint called for agent {agent_id}")
59
50
 
60
- # Return the streaming generator
61
- return StreamingResponse(
62
- agent.step_stream(input_messages=get_user_message_from_chat_completions_request(completion_request)), media_type="text/event-stream"
51
+ raise HTTPException(
52
+ status_code=410,
53
+ detail="The /voice-beta endpoint has been deprecated and is no longer available.",
63
54
  )
@@ -5,6 +5,8 @@
5
5
  import asyncio
6
6
  import json
7
7
  from collections.abc import AsyncIterator
8
+ from datetime import datetime, timezone
9
+ from uuid import uuid4
8
10
 
9
11
  import anyio
10
12
  from fastapi import HTTPException
@@ -14,7 +16,7 @@ from starlette.types import Send
14
16
  from letta.errors import LettaUnexpectedStreamCancellationError, PendingApprovalError
15
17
  from letta.log import get_logger
16
18
  from letta.schemas.enums import RunStatus
17
- from letta.schemas.letta_ping import LettaPing
19
+ from letta.schemas.letta_message import LettaPing
18
20
  from letta.schemas.user import User
19
21
  from letta.server.rest_api.utils import capture_sentry_exception
20
22
  from letta.services.run_manager import RunManager
@@ -34,6 +36,7 @@ class RunCancelledException(Exception):
34
36
 
35
37
  async def add_keepalive_to_stream(
36
38
  stream_generator: AsyncIterator[str | bytes],
39
+ run_id: str,
37
40
  keepalive_interval: float = 30.0,
38
41
  ) -> AsyncIterator[str | bytes]:
39
42
  """
@@ -83,7 +86,7 @@ async def add_keepalive_to_stream(
83
86
  # No data received within keepalive interval
84
87
  if not stream_exhausted:
85
88
  # Send keepalive ping in the same format as [DONE]
86
- yield f"data: {LettaPing().model_dump_json()}\n\n"
89
+ yield f"data: {LettaPing(id=f'ping-{uuid4()}', date=datetime.now(timezone.utc), run_id=run_id).model_dump_json()}\n\n"
87
90
  else:
88
91
  # Stream is done but queue might be processing
89
92
  # Check if there's anything left
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import uuid
5
5
  from enum import Enum
6
- from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
6
+ from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
7
7
 
8
8
  from fastapi import Header, HTTPException
9
9
  from openai.types.chat import ChatCompletionMessageParam
@@ -27,6 +27,7 @@ from letta.otel.metric_registry import MetricRegistry
27
27
  from letta.otel.tracing import tracer
28
28
  from letta.schemas.agent import AgentState
29
29
  from letta.schemas.enums import MessageRole
30
+ from letta.schemas.letta_message import ToolReturn as LettaToolReturn
30
31
  from letta.schemas.letta_message_content import (
31
32
  OmittedReasoningContent,
32
33
  ReasoningContent,
@@ -120,7 +121,7 @@ async def sse_async_generator(
120
121
  err_msg = f"Expected LettaUsageStatistics, got {type(usage)}"
121
122
  logger.error(err_msg)
122
123
  raise ValueError(err_msg)
123
- yield sse_formatter(usage.model_dump(exclude={"steps_messages"}))
124
+ yield sse_formatter(usage.model_dump())
124
125
 
125
126
  except ContextWindowExceededError as e:
126
127
  capture_sentry_exception(e)
@@ -168,7 +169,23 @@ def create_input_messages(input_messages: List[MessageCreate], agent_id: str, ti
168
169
  return messages
169
170
 
170
171
 
171
- def create_approval_response_message_from_input(agent_state: AgentState, input_message: ApprovalCreate) -> List[Message]:
172
+ def create_approval_response_message_from_input(
173
+ agent_state: AgentState, input_message: ApprovalCreate, run_id: Optional[str] = None
174
+ ) -> List[Message]:
175
+ def maybe_convert_tool_return_message(maybe_tool_return: LettaToolReturn):
176
+ if isinstance(maybe_tool_return, LettaToolReturn):
177
+ packaged_function_response = package_function_response(
178
+ maybe_tool_return.status == "success", maybe_tool_return.tool_return, agent_state.timezone
179
+ )
180
+ return ToolReturn(
181
+ tool_call_id=maybe_tool_return.tool_call_id,
182
+ status=maybe_tool_return.status,
183
+ func_response=packaged_function_response,
184
+ stdout=maybe_tool_return.stdout,
185
+ stderr=maybe_tool_return.stderr,
186
+ )
187
+ return maybe_tool_return
188
+
172
189
  return [
173
190
  Message(
174
191
  role=MessageRole.approval,
@@ -177,6 +194,8 @@ def create_approval_response_message_from_input(agent_state: AgentState, input_m
177
194
  approval_request_id=input_message.approval_request_id,
178
195
  approve=input_message.approve,
179
196
  denial_reason=input_message.reason,
197
+ approvals=[maybe_convert_tool_return_message(approval) for approval in input_message.approvals],
198
+ run_id=run_id,
180
199
  )
181
200
  ]
182
201
 
@@ -184,45 +203,77 @@ def create_approval_response_message_from_input(agent_state: AgentState, input_m
184
203
  def create_approval_request_message_from_llm_response(
185
204
  agent_id: str,
186
205
  model: str,
187
- function_name: str,
188
- function_arguments: Dict,
189
- tool_call_id: str,
190
- actor: User,
191
- continue_stepping: bool = False,
206
+ requested_tool_calls: List[OpenAIToolCall],
207
+ allowed_tool_calls: List[OpenAIToolCall] = [],
192
208
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
193
209
  pre_computed_assistant_message_id: Optional[str] = None,
194
210
  step_id: str | None = None,
195
211
  run_id: str = None,
196
- append_request_heartbeat: bool = True,
197
212
  ) -> Message:
213
+ messages = []
214
+ if allowed_tool_calls:
215
+ oai_tool_calls = [
216
+ OpenAIToolCall(
217
+ id=tool_call.id,
218
+ function=OpenAIFunction(
219
+ name=tool_call.function.name,
220
+ arguments=tool_call.function.arguments,
221
+ ),
222
+ type="function",
223
+ )
224
+ for tool_call in allowed_tool_calls
225
+ ]
226
+ tool_message = Message(
227
+ role=MessageRole.assistant,
228
+ content=reasoning_content if reasoning_content else [],
229
+ agent_id=agent_id,
230
+ model=model,
231
+ tool_calls=oai_tool_calls,
232
+ tool_call_id=allowed_tool_calls[0].id,
233
+ created_at=get_utc_time(),
234
+ step_id=step_id,
235
+ run_id=run_id,
236
+ )
237
+ if pre_computed_assistant_message_id:
238
+ tool_message.id = pre_computed_assistant_message_id
239
+ messages.append(tool_message)
198
240
  # Construct the tool call with the assistant's message
199
- # Optionally set request_heartbeat in tool args (v2 behavior only)
200
- if append_request_heartbeat:
201
- function_arguments[REQUEST_HEARTBEAT_PARAM] = continue_stepping
202
- tool_call = OpenAIToolCall(
203
- id=tool_call_id,
204
- function=OpenAIFunction(
205
- name=function_name,
206
- arguments=json.dumps(function_arguments),
207
- ),
208
- type="function",
209
- )
241
+ oai_tool_calls = [
242
+ OpenAIToolCall(
243
+ id=tool_call.id,
244
+ function=OpenAIFunction(
245
+ name=tool_call.function.name,
246
+ arguments=tool_call.function.arguments,
247
+ ),
248
+ type="function",
249
+ )
250
+ for tool_call in requested_tool_calls
251
+ ]
210
252
  # TODO: Use ToolCallContent instead of tool_calls
211
253
  # TODO: This helps preserve ordering
212
254
  approval_message = Message(
213
255
  role=MessageRole.approval,
214
- content=reasoning_content if reasoning_content else [],
256
+ content=reasoning_content if reasoning_content and not allowed_tool_calls else [],
215
257
  agent_id=agent_id,
216
258
  model=model,
217
- tool_calls=[tool_call],
218
- tool_call_id=tool_call_id,
259
+ tool_calls=oai_tool_calls,
260
+ tool_call_id=oai_tool_calls[0].id,
219
261
  created_at=get_utc_time(),
220
262
  step_id=step_id,
221
263
  run_id=run_id,
222
264
  )
223
265
  if pre_computed_assistant_message_id:
224
- approval_message.id = pre_computed_assistant_message_id
225
- return approval_message
266
+ approval_message.id = decrement_message_uuid(pre_computed_assistant_message_id)
267
+ messages.append(approval_message)
268
+ return messages
269
+
270
+
271
+ def decrement_message_uuid(message_id: str):
272
+ message_uuid = uuid.UUID(message_id.split("-", maxsplit=1)[1])
273
+ uuid_as_int = message_uuid.int
274
+ decremented_int = uuid_as_int - 1
275
+ decremented_uuid = uuid.UUID(int=decremented_int)
276
+ return "message-" + str(decremented_uuid)
226
277
 
227
278
 
228
279
  def create_letta_messages_from_llm_response(
@@ -361,6 +412,117 @@ def create_letta_messages_from_llm_response(
361
412
  return messages
362
413
 
363
414
 
415
+ def create_parallel_tool_messages_from_llm_response(
416
+ agent_id: str,
417
+ model: str,
418
+ tool_call_specs: List[Dict[str, Any]], # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
419
+ tool_execution_results: List[ToolExecutionResult],
420
+ function_responses: List[Optional[str]],
421
+ timezone: str,
422
+ run_id: Optional[str] = None,
423
+ step_id: Optional[str] = None,
424
+ reasoning_content: Optional[
425
+ List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
426
+ ] = None,
427
+ pre_computed_assistant_message_id: Optional[str] = None,
428
+ llm_batch_item_id: Optional[str] = None,
429
+ is_approval_response: bool = False,
430
+ tool_returns: List[ToolReturn] = [],
431
+ ) -> List[Message]:
432
+ """
433
+ Build two messages representing a parallel tool-call step:
434
+ - One assistant message with ALL tool_calls populated (tool_call_id left empty)
435
+ - One tool message with ALL tool_returns populated (tool_call_id left empty)
436
+
437
+ Notes:
438
+ - Consumers should read tool_calls/tool_returns arrays for per-call details.
439
+ - The tool message's content includes only the first call's packaged response for
440
+ backward-compatibility with legacy renderers. UIs should prefer tool_returns.
441
+ - When invoked for an approval response, the assistant message is omitted (the approval
442
+ tool call was previously surfaced).
443
+ """
444
+
445
+ # Construct OpenAI-style tool_calls for the assistant message
446
+ openai_tool_calls: List[OpenAIToolCall] = []
447
+ for spec in tool_call_specs:
448
+ name = spec.get("name")
449
+ args = spec.get("arguments", {})
450
+ call_id = spec.get("id") or str(uuid.uuid4())
451
+ # Ensure the spec carries the resolved id so returns/content can reference it
452
+ if not spec.get("id"):
453
+ spec["id"] = call_id
454
+ openai_tool_calls.append(
455
+ OpenAIToolCall(
456
+ id=call_id,
457
+ function=OpenAIFunction(name=name, arguments=json.dumps(args)),
458
+ type="function",
459
+ )
460
+ )
461
+
462
+ messages: List[Message] = []
463
+
464
+ if not is_approval_response:
465
+ # Assistant message with all tool_calls (no single tool_call_id)
466
+ # Safeguard against empty text messages
467
+ content: List[
468
+ Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
469
+ ] = []
470
+ if reasoning_content:
471
+ for content_part in reasoning_content:
472
+ if isinstance(content_part, TextContent) and content_part.text == "":
473
+ continue
474
+ content.append(content_part)
475
+
476
+ assistant_message = Message(
477
+ role=MessageRole.assistant,
478
+ content=content,
479
+ agent_id=agent_id,
480
+ model=model,
481
+ tool_calls=openai_tool_calls,
482
+ tool_call_id=None,
483
+ created_at=get_utc_time(),
484
+ batch_item_id=llm_batch_item_id,
485
+ run_id=run_id,
486
+ )
487
+ if step_id:
488
+ assistant_message.step_id = step_id
489
+ if pre_computed_assistant_message_id:
490
+ assistant_message.id = pre_computed_assistant_message_id
491
+ messages.append(assistant_message)
492
+
493
+ content: List[TextContent] = []
494
+ for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
495
+ packaged = package_function_response(exec_result.success_flag, response, timezone)
496
+ content.append(TextContent(text=packaged))
497
+ tool_returns.append(
498
+ ToolReturn(
499
+ tool_call_id=spec.get("id"),
500
+ status=exec_result.status,
501
+ stdout=exec_result.stdout,
502
+ stderr=exec_result.stderr,
503
+ func_response=packaged,
504
+ )
505
+ )
506
+
507
+ tool_message = Message(
508
+ role=MessageRole.tool,
509
+ content=content,
510
+ agent_id=agent_id,
511
+ model=model,
512
+ tool_calls=[],
513
+ tool_call_id=tool_returns[0].tool_call_id, # For legacy reasons, set to first one
514
+ created_at=get_utc_time(),
515
+ batch_item_id=llm_batch_item_id,
516
+ tool_returns=tool_returns,
517
+ run_id=run_id,
518
+ )
519
+ if step_id:
520
+ tool_message.step_id = step_id
521
+
522
+ messages.append(tool_message)
523
+ return messages
524
+
525
+
364
526
  def create_heartbeat_system_message(
365
527
  agent_id: str,
366
528
  model: str,
letta/server/server.py CHANGED
@@ -2,7 +2,6 @@ import asyncio
2
2
  import json
3
3
  import os
4
4
  import traceback
5
- import warnings
6
5
  from abc import abstractmethod
7
6
  from datetime import datetime
8
7
  from pathlib import Path
@@ -94,6 +93,7 @@ from letta.services.mcp.base_client import AsyncBaseMCPClient
94
93
  from letta.services.mcp.sse_client import MCP_CONFIG_TOPLEVEL_KEY, AsyncSSEMCPClient
95
94
  from letta.services.mcp.stdio_client import AsyncStdioMCPClient
96
95
  from letta.services.mcp_manager import MCPManager
96
+ from letta.services.mcp_server_manager import MCPServerManager
97
97
  from letta.services.message_manager import MessageManager
98
98
  from letta.services.organization_manager import OrganizationManager
99
99
  from letta.services.passage_manager import PassageManager
@@ -154,6 +154,7 @@ class SyncServer(object):
154
154
  self.user_manager = UserManager()
155
155
  self.tool_manager = ToolManager()
156
156
  self.mcp_manager = MCPManager()
157
+ self.mcp_server_manager = MCPServerManager()
157
158
  self.block_manager = BlockManager()
158
159
  self.source_manager = SourceManager()
159
160
  self.sandbox_config_manager = SandboxConfigManager()
@@ -482,8 +483,21 @@ class SyncServer(object):
482
483
  request: UpdateAgent,
483
484
  actor: User,
484
485
  ) -> AgentState:
485
- if request.model is not None:
486
- request.llm_config = await self.get_llm_config_from_handle_async(handle=request.model, actor=actor)
486
+ # Build llm_config from convenience fields if llm_config is not provided
487
+ if request.llm_config is None and (
488
+ request.model is not None or request.context_window_limit is not None or request.max_tokens is not None
489
+ ):
490
+ if request.model is None:
491
+ agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
492
+ request.model = agent.llm_config.handle
493
+ config_params = {
494
+ "handle": request.model,
495
+ "context_window_limit": request.context_window_limit,
496
+ "max_tokens": request.max_tokens,
497
+ }
498
+ log_event(name="start get_cached_llm_config", attributes=config_params)
499
+ request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
500
+ log_event(name="end get_cached_llm_config", attributes=config_params)
487
501
 
488
502
  if request.embedding is not None:
489
503
  request.embedding_config = await self.get_embedding_config_from_handle_async(handle=request.embedding, actor=actor)
@@ -761,8 +775,6 @@ class SyncServer(object):
761
775
 
762
776
  # TODO: move this into a thread
763
777
  source = await self.source_manager.get_source_by_id(source_id=source_id)
764
- if source is None:
765
- raise NoResultFound(f"Source {source_id} does not exist")
766
778
  connector = DirectoryConnector(input_files=[file_path])
767
779
  num_passages, num_documents = await self.load_data(user_id=source.created_by_id, source_name=source.name, connector=connector)
768
780
 
@@ -925,11 +937,10 @@ class SyncServer(object):
925
937
  async with asyncio.timeout(constants.GET_PROVIDERS_TIMEOUT_SECONDS):
926
938
  return await provider.list_llm_models_async()
927
939
  except asyncio.TimeoutError:
928
- warnings.warn(f"Timeout while listing LLM models for provider {provider}")
940
+ logger.warning(f"Timeout while listing LLM models for provider {provider}")
929
941
  return []
930
942
  except Exception as e:
931
- traceback.print_exc()
932
- warnings.warn(f"Error while listing LLM models for provider {provider}: {e}")
943
+ logger.exception(f"Error while listing LLM models for provider {provider}: {e}")
933
944
  return []
934
945
 
935
946
  # Execute all provider model listing tasks concurrently
@@ -968,10 +979,7 @@ class SyncServer(object):
968
979
  # All providers now have list_embedding_models_async
969
980
  return await provider.list_embedding_models_async()
970
981
  except Exception as e:
971
- import traceback
972
-
973
- traceback.print_exc()
974
- warnings.warn(f"An error occurred while listing embedding models for provider {provider}: {e}")
982
+ logger.exception(f"An error occurred while listing embedding models for provider {provider}: {e}")
975
983
  return []
976
984
 
977
985
  # Execute all provider model listing tasks concurrently
@@ -1140,9 +1148,9 @@ class SyncServer(object):
1140
1148
  # llm_config = LLMConfig(**config_data)
1141
1149
  # llm_models.append(llm_config)
1142
1150
  # except (json.JSONDecodeError, ValueError) as e:
1143
- # warnings.warn(f"Error parsing LLM config file {filename}: {e}")
1151
+ # logger.warning(f"Error parsing LLM config file {filename}: {e}")
1144
1152
  # except Exception as e:
1145
- # warnings.warn(f"Error reading LLM configs directory: {e}")
1153
+ # logger.warning(f"Error reading LLM configs directory: {e}")
1146
1154
  return llm_models
1147
1155
 
1148
1156
  def get_local_embedding_configs(self):
@@ -1160,9 +1168,9 @@ class SyncServer(object):
1160
1168
  # embedding_config = EmbeddingConfig(**config_data)
1161
1169
  # embedding_models.append(embedding_config)
1162
1170
  # except (json.JSONDecodeError, ValueError) as e:
1163
- # warnings.warn(f"Error parsing embedding config file {filename}: {e}")
1171
+ # logger.warning(f"Error parsing embedding config file {filename}: {e}")
1164
1172
  # except Exception as e:
1165
- # warnings.warn(f"Error reading embedding configs directory: {e}")
1173
+ # logger.warning(f"Error reading embedding configs directory: {e}")
1166
1174
  return embedding_models
1167
1175
 
1168
1176
  def add_llm_model(self, request: LLMConfig) -> LLMConfig:
@@ -1501,7 +1509,7 @@ class SyncServer(object):
1501
1509
  # supports_token_streaming = ["openai", "anthropic", "xai", "deepseek"]
1502
1510
  supports_token_streaming = ["openai", "anthropic", "deepseek"] # TODO re-enable xAI once streaming is patched
1503
1511
  if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
1504
- warnings.warn(
1512
+ logger.warning(
1505
1513
  f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
1506
1514
  )
1507
1515
  stream_tokens = False
@@ -1603,10 +1611,7 @@ class SyncServer(object):
1603
1611
  except HTTPException:
1604
1612
  raise
1605
1613
  except Exception as e:
1606
- print(e)
1607
- import traceback
1608
-
1609
- traceback.print_exc()
1614
+ logger.exception(f"Error sending message to agent: {e}")
1610
1615
  raise HTTPException(status_code=500, detail=f"{e}")
1611
1616
 
1612
1617
  @trace_method
@@ -1636,7 +1641,7 @@ class SyncServer(object):
1636
1641
  llm_config = letta_multi_agent.agent_state.llm_config
1637
1642
  supports_token_streaming = ["openai", "anthropic", "deepseek"]
1638
1643
  if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
1639
- warnings.warn(
1644
+ logger.warning(
1640
1645
  f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
1641
1646
  )
1642
1647
  stream_tokens = False
@@ -6,10 +6,13 @@ import traceback
6
6
  import websockets
7
7
 
8
8
  import letta.server.ws_api.protocol as protocol
9
+ from letta.log import get_logger
9
10
  from letta.server.constants import WS_DEFAULT_PORT
10
11
  from letta.server.server import SyncServer
11
12
  from letta.server.ws_api.interface import SyncWebSocketInterface
12
13
 
14
+ logger = get_logger(__name__)
15
+
13
16
 
14
17
  class WebSocketServer:
15
18
  def __init__(self, host="localhost", port=WS_DEFAULT_PORT):
@@ -68,8 +71,7 @@ class WebSocketServer:
68
71
  await websocket.send(protocol.server_command_response("OK: Agent initialized"))
69
72
  except Exception as e:
70
73
  self.agent = None
71
- print(f"[server] self.create_new_agent failed with:\n{e}")
72
- print(f"{traceback.format_exc()}")
74
+ logger.exception(f"[server] self.create_new_agent failed with: {e}")
73
75
  await websocket.send(protocol.server_command_response(f"Error: Failed to init agent - {str(e)}"))
74
76
 
75
77
  else:
@@ -88,8 +90,7 @@ class WebSocketServer:
88
90
  # self.run_step(user_message)
89
91
  self.server.user_message(user_id="NULL", agent_id=data["agent_id"], message=user_message)
90
92
  except Exception as e:
91
- print(f"[server] self.server.user_message failed with:\n{e}")
92
- print(f"{traceback.format_exc()}")
93
+ logger.exception(f"[server] self.server.user_message failed with: {e}")
93
94
  await websocket.send(protocol.server_agent_response_error(f"server.user_message failed with: {e}"))
94
95
  await asyncio.sleep(1) # pause before sending the terminating message, w/o this messages may be missed
95
96
  await websocket.send(protocol.server_agent_response_end())