letta-nightly 0.12.1.dev20251024104217__py3-none-any.whl → 0.13.0.dev20251024223017__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +2 -3
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/simple_llm_request_adapter.py +8 -5
- letta/adapters/simple_llm_stream_adapter.py +22 -6
- letta/agents/agent_loop.py +10 -3
- letta/agents/base_agent.py +4 -1
- letta/agents/helpers.py +41 -9
- letta/agents/letta_agent.py +11 -10
- letta/agents/letta_agent_v2.py +47 -37
- letta/agents/letta_agent_v3.py +395 -300
- letta/agents/voice_agent.py +8 -6
- letta/agents/voice_sleeptime_agent.py +3 -3
- letta/constants.py +30 -7
- letta/errors.py +20 -0
- letta/functions/function_sets/base.py +55 -3
- letta/functions/mcp_client/types.py +33 -57
- letta/functions/schema_generator.py +135 -23
- letta/groups/sleeptime_multi_agent_v3.py +6 -11
- letta/groups/sleeptime_multi_agent_v4.py +227 -0
- letta/helpers/converters.py +78 -4
- letta/helpers/crypto_utils.py +6 -2
- letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +9 -11
- letta/interfaces/anthropic_streaming_interface.py +3 -4
- letta/interfaces/gemini_streaming_interface.py +4 -6
- letta/interfaces/openai_streaming_interface.py +63 -28
- letta/llm_api/anthropic_client.py +7 -4
- letta/llm_api/deepseek_client.py +6 -4
- letta/llm_api/google_ai_client.py +3 -12
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/helpers.py +90 -61
- letta/llm_api/llm_api_tools.py +4 -1
- letta/llm_api/openai.py +12 -12
- letta/llm_api/openai_client.py +53 -16
- letta/local_llm/constants.py +4 -3
- letta/local_llm/json_parser.py +5 -2
- letta/local_llm/utils.py +2 -3
- letta/log.py +171 -7
- letta/orm/agent.py +43 -9
- letta/orm/archive.py +4 -0
- letta/orm/custom_columns.py +15 -0
- letta/orm/identity.py +11 -11
- letta/orm/mcp_server.py +9 -0
- letta/orm/message.py +6 -1
- letta/orm/run_metrics.py +7 -2
- letta/orm/sqlalchemy_base.py +2 -2
- letta/orm/tool.py +3 -0
- letta/otel/tracing.py +2 -0
- letta/prompts/prompt_generator.py +7 -2
- letta/schemas/agent.py +41 -10
- letta/schemas/agent_file.py +3 -0
- letta/schemas/archive.py +4 -2
- letta/schemas/block.py +2 -1
- letta/schemas/enums.py +36 -3
- letta/schemas/file.py +3 -3
- letta/schemas/folder.py +2 -1
- letta/schemas/group.py +2 -1
- letta/schemas/identity.py +18 -9
- letta/schemas/job.py +3 -1
- letta/schemas/letta_message.py +71 -12
- letta/schemas/letta_request.py +7 -3
- letta/schemas/letta_stop_reason.py +0 -25
- letta/schemas/llm_config.py +8 -2
- letta/schemas/mcp.py +80 -83
- letta/schemas/mcp_server.py +349 -0
- letta/schemas/memory.py +20 -8
- letta/schemas/message.py +212 -67
- letta/schemas/providers/anthropic.py +13 -6
- letta/schemas/providers/azure.py +6 -4
- letta/schemas/providers/base.py +8 -4
- letta/schemas/providers/bedrock.py +6 -2
- letta/schemas/providers/cerebras.py +7 -3
- letta/schemas/providers/deepseek.py +2 -1
- letta/schemas/providers/google_gemini.py +15 -6
- letta/schemas/providers/groq.py +2 -1
- letta/schemas/providers/lmstudio.py +9 -6
- letta/schemas/providers/mistral.py +2 -1
- letta/schemas/providers/openai.py +7 -2
- letta/schemas/providers/together.py +9 -3
- letta/schemas/providers/xai.py +7 -3
- letta/schemas/run.py +7 -2
- letta/schemas/run_metrics.py +2 -1
- letta/schemas/sandbox_config.py +2 -2
- letta/schemas/secret.py +3 -158
- letta/schemas/source.py +2 -2
- letta/schemas/step.py +2 -2
- letta/schemas/tool.py +24 -1
- letta/schemas/usage.py +0 -1
- letta/server/rest_api/app.py +123 -7
- letta/server/rest_api/dependencies.py +3 -0
- letta/server/rest_api/interface.py +7 -4
- letta/server/rest_api/redis_stream_manager.py +16 -1
- letta/server/rest_api/routers/v1/__init__.py +7 -0
- letta/server/rest_api/routers/v1/agents.py +332 -322
- letta/server/rest_api/routers/v1/archives.py +127 -40
- letta/server/rest_api/routers/v1/blocks.py +54 -6
- letta/server/rest_api/routers/v1/chat_completions.py +146 -0
- letta/server/rest_api/routers/v1/folders.py +27 -35
- letta/server/rest_api/routers/v1/groups.py +23 -35
- letta/server/rest_api/routers/v1/identities.py +24 -10
- letta/server/rest_api/routers/v1/internal_runs.py +107 -0
- letta/server/rest_api/routers/v1/internal_templates.py +162 -179
- letta/server/rest_api/routers/v1/jobs.py +15 -27
- letta/server/rest_api/routers/v1/mcp_servers.py +309 -0
- letta/server/rest_api/routers/v1/messages.py +23 -34
- letta/server/rest_api/routers/v1/organizations.py +6 -27
- letta/server/rest_api/routers/v1/providers.py +35 -62
- letta/server/rest_api/routers/v1/runs.py +30 -43
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -4
- letta/server/rest_api/routers/v1/sources.py +26 -42
- letta/server/rest_api/routers/v1/steps.py +16 -29
- letta/server/rest_api/routers/v1/tools.py +17 -13
- letta/server/rest_api/routers/v1/users.py +5 -17
- letta/server/rest_api/routers/v1/voice.py +18 -27
- letta/server/rest_api/streaming_response.py +5 -2
- letta/server/rest_api/utils.py +187 -25
- letta/server/server.py +27 -22
- letta/server/ws_api/server.py +5 -4
- letta/services/agent_manager.py +148 -26
- letta/services/agent_serialization_manager.py +6 -1
- letta/services/archive_manager.py +168 -15
- letta/services/block_manager.py +14 -4
- letta/services/file_manager.py +33 -29
- letta/services/group_manager.py +10 -0
- letta/services/helpers/agent_manager_helper.py +65 -11
- letta/services/identity_manager.py +105 -4
- letta/services/job_manager.py +11 -1
- letta/services/mcp/base_client.py +2 -2
- letta/services/mcp/oauth_utils.py +33 -8
- letta/services/mcp_manager.py +174 -78
- letta/services/mcp_server_manager.py +1331 -0
- letta/services/message_manager.py +109 -4
- letta/services/organization_manager.py +4 -4
- letta/services/passage_manager.py +9 -25
- letta/services/provider_manager.py +91 -15
- letta/services/run_manager.py +72 -15
- letta/services/sandbox_config_manager.py +45 -3
- letta/services/source_manager.py +15 -8
- letta/services/step_manager.py +24 -1
- letta/services/streaming_service.py +581 -0
- letta/services/summarizer/summarizer.py +1 -1
- letta/services/tool_executor/core_tool_executor.py +111 -0
- letta/services/tool_executor/files_tool_executor.py +5 -3
- letta/services/tool_executor/sandbox_tool_executor.py +2 -2
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_manager.py +10 -3
- letta/services/tool_sandbox/base.py +61 -1
- letta/services/tool_sandbox/local_sandbox.py +1 -3
- letta/services/user_manager.py +2 -2
- letta/settings.py +49 -5
- letta/system.py +14 -5
- letta/utils.py +73 -1
- letta/validators.py +105 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251024223017.dist-info}/METADATA +4 -2
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251024223017.dist-info}/RECORD +157 -151
- letta/schemas/letta_ping.py +0 -28
- letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251024223017.dist-info}/WHEEL +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251024223017.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251024223017.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any, Dict
|
|
2
2
|
|
|
3
|
-
import openai
|
|
4
3
|
from fastapi import APIRouter, Body, Depends
|
|
5
|
-
from fastapi.responses import StreamingResponse
|
|
6
4
|
|
|
7
|
-
from letta.agents.voice_agent import VoiceAgent
|
|
8
5
|
from letta.log import get_logger
|
|
9
6
|
from letta.server.rest_api.dependencies import HeaderParams, get_headers, get_letta_server
|
|
10
|
-
from letta.server.rest_api.utils import get_user_message_from_chat_completions_request
|
|
11
|
-
from letta.settings import model_settings
|
|
12
7
|
|
|
13
8
|
if TYPE_CHECKING:
|
|
14
9
|
from letta.server.server import SyncServer
|
|
@@ -23,11 +18,16 @@ logger = get_logger(__name__)
|
|
|
23
18
|
"/{agent_id}/chat/completions",
|
|
24
19
|
response_model=None,
|
|
25
20
|
operation_id="create_voice_chat_completions",
|
|
21
|
+
deprecated=True,
|
|
26
22
|
responses={
|
|
27
23
|
200: {
|
|
28
24
|
"description": "Successful response",
|
|
29
25
|
"content": {"text/event-stream": {}},
|
|
30
|
-
}
|
|
26
|
+
},
|
|
27
|
+
410: {
|
|
28
|
+
"description": "Endpoint deprecated",
|
|
29
|
+
"content": {"application/json": {"example": {"detail": "This endpoint has been deprecated"}}},
|
|
30
|
+
},
|
|
31
31
|
},
|
|
32
32
|
)
|
|
33
33
|
async def create_voice_chat_completions(
|
|
@@ -36,28 +36,19 @@ async def create_voice_chat_completions(
|
|
|
36
36
|
server: "SyncServer" = Depends(get_letta_server),
|
|
37
37
|
headers: HeaderParams = Depends(get_headers),
|
|
38
38
|
):
|
|
39
|
-
|
|
39
|
+
"""
|
|
40
|
+
DEPRECATED: This voice-beta endpoint has been deprecated.
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
api_key=model_settings.openai_api_key,
|
|
44
|
-
max_retries=0,
|
|
45
|
-
http_client=server.httpx_client,
|
|
46
|
-
)
|
|
42
|
+
The voice functionality has been integrated into the main chat completions endpoint.
|
|
43
|
+
Please use the standard /v1/agents/{agent_id}/messages endpoint instead.
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
agent_manager=server.agent_manager,
|
|
54
|
-
block_manager=server.block_manager,
|
|
55
|
-
run_manager=server.run_manager,
|
|
56
|
-
passage_manager=server.passage_manager,
|
|
57
|
-
actor=actor,
|
|
58
|
-
)
|
|
45
|
+
This endpoint will be removed in a future version.
|
|
46
|
+
"""
|
|
47
|
+
from fastapi import HTTPException
|
|
48
|
+
|
|
49
|
+
logger.warning(f"Deprecated voice-beta endpoint called for agent {agent_id}")
|
|
59
50
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
51
|
+
raise HTTPException(
|
|
52
|
+
status_code=410,
|
|
53
|
+
detail="The /voice-beta endpoint has been deprecated and is no longer available.",
|
|
63
54
|
)
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import json
|
|
7
7
|
from collections.abc import AsyncIterator
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from uuid import uuid4
|
|
8
10
|
|
|
9
11
|
import anyio
|
|
10
12
|
from fastapi import HTTPException
|
|
@@ -14,7 +16,7 @@ from starlette.types import Send
|
|
|
14
16
|
from letta.errors import LettaUnexpectedStreamCancellationError, PendingApprovalError
|
|
15
17
|
from letta.log import get_logger
|
|
16
18
|
from letta.schemas.enums import RunStatus
|
|
17
|
-
from letta.schemas.
|
|
19
|
+
from letta.schemas.letta_message import LettaPing
|
|
18
20
|
from letta.schemas.user import User
|
|
19
21
|
from letta.server.rest_api.utils import capture_sentry_exception
|
|
20
22
|
from letta.services.run_manager import RunManager
|
|
@@ -34,6 +36,7 @@ class RunCancelledException(Exception):
|
|
|
34
36
|
|
|
35
37
|
async def add_keepalive_to_stream(
|
|
36
38
|
stream_generator: AsyncIterator[str | bytes],
|
|
39
|
+
run_id: str,
|
|
37
40
|
keepalive_interval: float = 30.0,
|
|
38
41
|
) -> AsyncIterator[str | bytes]:
|
|
39
42
|
"""
|
|
@@ -83,7 +86,7 @@ async def add_keepalive_to_stream(
|
|
|
83
86
|
# No data received within keepalive interval
|
|
84
87
|
if not stream_exhausted:
|
|
85
88
|
# Send keepalive ping in the same format as [DONE]
|
|
86
|
-
yield f"data: {LettaPing().model_dump_json()}\n\n"
|
|
89
|
+
yield f"data: {LettaPing(id=f'ping-{uuid4()}', date=datetime.now(timezone.utc), run_id=run_id).model_dump_json()}\n\n"
|
|
87
90
|
else:
|
|
88
91
|
# Stream is done but queue might be processing
|
|
89
92
|
# Check if there's anything left
|
letta/server/rest_api/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import uuid
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
|
|
6
|
+
from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
from fastapi import Header, HTTPException
|
|
9
9
|
from openai.types.chat import ChatCompletionMessageParam
|
|
@@ -27,6 +27,7 @@ from letta.otel.metric_registry import MetricRegistry
|
|
|
27
27
|
from letta.otel.tracing import tracer
|
|
28
28
|
from letta.schemas.agent import AgentState
|
|
29
29
|
from letta.schemas.enums import MessageRole
|
|
30
|
+
from letta.schemas.letta_message import ToolReturn as LettaToolReturn
|
|
30
31
|
from letta.schemas.letta_message_content import (
|
|
31
32
|
OmittedReasoningContent,
|
|
32
33
|
ReasoningContent,
|
|
@@ -120,7 +121,7 @@ async def sse_async_generator(
|
|
|
120
121
|
err_msg = f"Expected LettaUsageStatistics, got {type(usage)}"
|
|
121
122
|
logger.error(err_msg)
|
|
122
123
|
raise ValueError(err_msg)
|
|
123
|
-
yield sse_formatter(usage.model_dump(
|
|
124
|
+
yield sse_formatter(usage.model_dump())
|
|
124
125
|
|
|
125
126
|
except ContextWindowExceededError as e:
|
|
126
127
|
capture_sentry_exception(e)
|
|
@@ -168,7 +169,23 @@ def create_input_messages(input_messages: List[MessageCreate], agent_id: str, ti
|
|
|
168
169
|
return messages
|
|
169
170
|
|
|
170
171
|
|
|
171
|
-
def create_approval_response_message_from_input(
|
|
172
|
+
def create_approval_response_message_from_input(
|
|
173
|
+
agent_state: AgentState, input_message: ApprovalCreate, run_id: Optional[str] = None
|
|
174
|
+
) -> List[Message]:
|
|
175
|
+
def maybe_convert_tool_return_message(maybe_tool_return: LettaToolReturn):
|
|
176
|
+
if isinstance(maybe_tool_return, LettaToolReturn):
|
|
177
|
+
packaged_function_response = package_function_response(
|
|
178
|
+
maybe_tool_return.status == "success", maybe_tool_return.tool_return, agent_state.timezone
|
|
179
|
+
)
|
|
180
|
+
return ToolReturn(
|
|
181
|
+
tool_call_id=maybe_tool_return.tool_call_id,
|
|
182
|
+
status=maybe_tool_return.status,
|
|
183
|
+
func_response=packaged_function_response,
|
|
184
|
+
stdout=maybe_tool_return.stdout,
|
|
185
|
+
stderr=maybe_tool_return.stderr,
|
|
186
|
+
)
|
|
187
|
+
return maybe_tool_return
|
|
188
|
+
|
|
172
189
|
return [
|
|
173
190
|
Message(
|
|
174
191
|
role=MessageRole.approval,
|
|
@@ -177,6 +194,8 @@ def create_approval_response_message_from_input(agent_state: AgentState, input_m
|
|
|
177
194
|
approval_request_id=input_message.approval_request_id,
|
|
178
195
|
approve=input_message.approve,
|
|
179
196
|
denial_reason=input_message.reason,
|
|
197
|
+
approvals=[maybe_convert_tool_return_message(approval) for approval in input_message.approvals],
|
|
198
|
+
run_id=run_id,
|
|
180
199
|
)
|
|
181
200
|
]
|
|
182
201
|
|
|
@@ -184,45 +203,77 @@ def create_approval_response_message_from_input(agent_state: AgentState, input_m
|
|
|
184
203
|
def create_approval_request_message_from_llm_response(
|
|
185
204
|
agent_id: str,
|
|
186
205
|
model: str,
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
tool_call_id: str,
|
|
190
|
-
actor: User,
|
|
191
|
-
continue_stepping: bool = False,
|
|
206
|
+
requested_tool_calls: List[OpenAIToolCall],
|
|
207
|
+
allowed_tool_calls: List[OpenAIToolCall] = [],
|
|
192
208
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
|
193
209
|
pre_computed_assistant_message_id: Optional[str] = None,
|
|
194
210
|
step_id: str | None = None,
|
|
195
211
|
run_id: str = None,
|
|
196
|
-
append_request_heartbeat: bool = True,
|
|
197
212
|
) -> Message:
|
|
213
|
+
messages = []
|
|
214
|
+
if allowed_tool_calls:
|
|
215
|
+
oai_tool_calls = [
|
|
216
|
+
OpenAIToolCall(
|
|
217
|
+
id=tool_call.id,
|
|
218
|
+
function=OpenAIFunction(
|
|
219
|
+
name=tool_call.function.name,
|
|
220
|
+
arguments=tool_call.function.arguments,
|
|
221
|
+
),
|
|
222
|
+
type="function",
|
|
223
|
+
)
|
|
224
|
+
for tool_call in allowed_tool_calls
|
|
225
|
+
]
|
|
226
|
+
tool_message = Message(
|
|
227
|
+
role=MessageRole.assistant,
|
|
228
|
+
content=reasoning_content if reasoning_content else [],
|
|
229
|
+
agent_id=agent_id,
|
|
230
|
+
model=model,
|
|
231
|
+
tool_calls=oai_tool_calls,
|
|
232
|
+
tool_call_id=allowed_tool_calls[0].id,
|
|
233
|
+
created_at=get_utc_time(),
|
|
234
|
+
step_id=step_id,
|
|
235
|
+
run_id=run_id,
|
|
236
|
+
)
|
|
237
|
+
if pre_computed_assistant_message_id:
|
|
238
|
+
tool_message.id = pre_computed_assistant_message_id
|
|
239
|
+
messages.append(tool_message)
|
|
198
240
|
# Construct the tool call with the assistant's message
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
|
|
241
|
+
oai_tool_calls = [
|
|
242
|
+
OpenAIToolCall(
|
|
243
|
+
id=tool_call.id,
|
|
244
|
+
function=OpenAIFunction(
|
|
245
|
+
name=tool_call.function.name,
|
|
246
|
+
arguments=tool_call.function.arguments,
|
|
247
|
+
),
|
|
248
|
+
type="function",
|
|
249
|
+
)
|
|
250
|
+
for tool_call in requested_tool_calls
|
|
251
|
+
]
|
|
210
252
|
# TODO: Use ToolCallContent instead of tool_calls
|
|
211
253
|
# TODO: This helps preserve ordering
|
|
212
254
|
approval_message = Message(
|
|
213
255
|
role=MessageRole.approval,
|
|
214
|
-
content=reasoning_content if reasoning_content else [],
|
|
256
|
+
content=reasoning_content if reasoning_content and not allowed_tool_calls else [],
|
|
215
257
|
agent_id=agent_id,
|
|
216
258
|
model=model,
|
|
217
|
-
tool_calls=
|
|
218
|
-
tool_call_id=
|
|
259
|
+
tool_calls=oai_tool_calls,
|
|
260
|
+
tool_call_id=oai_tool_calls[0].id,
|
|
219
261
|
created_at=get_utc_time(),
|
|
220
262
|
step_id=step_id,
|
|
221
263
|
run_id=run_id,
|
|
222
264
|
)
|
|
223
265
|
if pre_computed_assistant_message_id:
|
|
224
|
-
approval_message.id = pre_computed_assistant_message_id
|
|
225
|
-
|
|
266
|
+
approval_message.id = decrement_message_uuid(pre_computed_assistant_message_id)
|
|
267
|
+
messages.append(approval_message)
|
|
268
|
+
return messages
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def decrement_message_uuid(message_id: str):
|
|
272
|
+
message_uuid = uuid.UUID(message_id.split("-", maxsplit=1)[1])
|
|
273
|
+
uuid_as_int = message_uuid.int
|
|
274
|
+
decremented_int = uuid_as_int - 1
|
|
275
|
+
decremented_uuid = uuid.UUID(int=decremented_int)
|
|
276
|
+
return "message-" + str(decremented_uuid)
|
|
226
277
|
|
|
227
278
|
|
|
228
279
|
def create_letta_messages_from_llm_response(
|
|
@@ -361,6 +412,117 @@ def create_letta_messages_from_llm_response(
|
|
|
361
412
|
return messages
|
|
362
413
|
|
|
363
414
|
|
|
415
|
+
def create_parallel_tool_messages_from_llm_response(
|
|
416
|
+
agent_id: str,
|
|
417
|
+
model: str,
|
|
418
|
+
tool_call_specs: List[Dict[str, Any]], # List of tool call specs: {"name": str, "arguments": Dict, "id": Optional[str]}
|
|
419
|
+
tool_execution_results: List[ToolExecutionResult],
|
|
420
|
+
function_responses: List[Optional[str]],
|
|
421
|
+
timezone: str,
|
|
422
|
+
run_id: Optional[str] = None,
|
|
423
|
+
step_id: Optional[str] = None,
|
|
424
|
+
reasoning_content: Optional[
|
|
425
|
+
List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent | SummarizedReasoningContent]]
|
|
426
|
+
] = None,
|
|
427
|
+
pre_computed_assistant_message_id: Optional[str] = None,
|
|
428
|
+
llm_batch_item_id: Optional[str] = None,
|
|
429
|
+
is_approval_response: bool = False,
|
|
430
|
+
tool_returns: List[ToolReturn] = [],
|
|
431
|
+
) -> List[Message]:
|
|
432
|
+
"""
|
|
433
|
+
Build two messages representing a parallel tool-call step:
|
|
434
|
+
- One assistant message with ALL tool_calls populated (tool_call_id left empty)
|
|
435
|
+
- One tool message with ALL tool_returns populated (tool_call_id left empty)
|
|
436
|
+
|
|
437
|
+
Notes:
|
|
438
|
+
- Consumers should read tool_calls/tool_returns arrays for per-call details.
|
|
439
|
+
- The tool message's content includes only the first call's packaged response for
|
|
440
|
+
backward-compatibility with legacy renderers. UIs should prefer tool_returns.
|
|
441
|
+
- When invoked for an approval response, the assistant message is omitted (the approval
|
|
442
|
+
tool call was previously surfaced).
|
|
443
|
+
"""
|
|
444
|
+
|
|
445
|
+
# Construct OpenAI-style tool_calls for the assistant message
|
|
446
|
+
openai_tool_calls: List[OpenAIToolCall] = []
|
|
447
|
+
for spec in tool_call_specs:
|
|
448
|
+
name = spec.get("name")
|
|
449
|
+
args = spec.get("arguments", {})
|
|
450
|
+
call_id = spec.get("id") or str(uuid.uuid4())
|
|
451
|
+
# Ensure the spec carries the resolved id so returns/content can reference it
|
|
452
|
+
if not spec.get("id"):
|
|
453
|
+
spec["id"] = call_id
|
|
454
|
+
openai_tool_calls.append(
|
|
455
|
+
OpenAIToolCall(
|
|
456
|
+
id=call_id,
|
|
457
|
+
function=OpenAIFunction(name=name, arguments=json.dumps(args)),
|
|
458
|
+
type="function",
|
|
459
|
+
)
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
messages: List[Message] = []
|
|
463
|
+
|
|
464
|
+
if not is_approval_response:
|
|
465
|
+
# Assistant message with all tool_calls (no single tool_call_id)
|
|
466
|
+
# Safeguard against empty text messages
|
|
467
|
+
content: List[
|
|
468
|
+
Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent, SummarizedReasoningContent]
|
|
469
|
+
] = []
|
|
470
|
+
if reasoning_content:
|
|
471
|
+
for content_part in reasoning_content:
|
|
472
|
+
if isinstance(content_part, TextContent) and content_part.text == "":
|
|
473
|
+
continue
|
|
474
|
+
content.append(content_part)
|
|
475
|
+
|
|
476
|
+
assistant_message = Message(
|
|
477
|
+
role=MessageRole.assistant,
|
|
478
|
+
content=content,
|
|
479
|
+
agent_id=agent_id,
|
|
480
|
+
model=model,
|
|
481
|
+
tool_calls=openai_tool_calls,
|
|
482
|
+
tool_call_id=None,
|
|
483
|
+
created_at=get_utc_time(),
|
|
484
|
+
batch_item_id=llm_batch_item_id,
|
|
485
|
+
run_id=run_id,
|
|
486
|
+
)
|
|
487
|
+
if step_id:
|
|
488
|
+
assistant_message.step_id = step_id
|
|
489
|
+
if pre_computed_assistant_message_id:
|
|
490
|
+
assistant_message.id = pre_computed_assistant_message_id
|
|
491
|
+
messages.append(assistant_message)
|
|
492
|
+
|
|
493
|
+
content: List[TextContent] = []
|
|
494
|
+
for spec, exec_result, response in zip(tool_call_specs, tool_execution_results, function_responses):
|
|
495
|
+
packaged = package_function_response(exec_result.success_flag, response, timezone)
|
|
496
|
+
content.append(TextContent(text=packaged))
|
|
497
|
+
tool_returns.append(
|
|
498
|
+
ToolReturn(
|
|
499
|
+
tool_call_id=spec.get("id"),
|
|
500
|
+
status=exec_result.status,
|
|
501
|
+
stdout=exec_result.stdout,
|
|
502
|
+
stderr=exec_result.stderr,
|
|
503
|
+
func_response=packaged,
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
tool_message = Message(
|
|
508
|
+
role=MessageRole.tool,
|
|
509
|
+
content=content,
|
|
510
|
+
agent_id=agent_id,
|
|
511
|
+
model=model,
|
|
512
|
+
tool_calls=[],
|
|
513
|
+
tool_call_id=tool_returns[0].tool_call_id, # For legacy reasons, set to first one
|
|
514
|
+
created_at=get_utc_time(),
|
|
515
|
+
batch_item_id=llm_batch_item_id,
|
|
516
|
+
tool_returns=tool_returns,
|
|
517
|
+
run_id=run_id,
|
|
518
|
+
)
|
|
519
|
+
if step_id:
|
|
520
|
+
tool_message.step_id = step_id
|
|
521
|
+
|
|
522
|
+
messages.append(tool_message)
|
|
523
|
+
return messages
|
|
524
|
+
|
|
525
|
+
|
|
364
526
|
def create_heartbeat_system_message(
|
|
365
527
|
agent_id: str,
|
|
366
528
|
model: str,
|
letta/server/server.py
CHANGED
|
@@ -2,7 +2,6 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
import os
|
|
4
4
|
import traceback
|
|
5
|
-
import warnings
|
|
6
5
|
from abc import abstractmethod
|
|
7
6
|
from datetime import datetime
|
|
8
7
|
from pathlib import Path
|
|
@@ -94,6 +93,7 @@ from letta.services.mcp.base_client import AsyncBaseMCPClient
|
|
|
94
93
|
from letta.services.mcp.sse_client import MCP_CONFIG_TOPLEVEL_KEY, AsyncSSEMCPClient
|
|
95
94
|
from letta.services.mcp.stdio_client import AsyncStdioMCPClient
|
|
96
95
|
from letta.services.mcp_manager import MCPManager
|
|
96
|
+
from letta.services.mcp_server_manager import MCPServerManager
|
|
97
97
|
from letta.services.message_manager import MessageManager
|
|
98
98
|
from letta.services.organization_manager import OrganizationManager
|
|
99
99
|
from letta.services.passage_manager import PassageManager
|
|
@@ -154,6 +154,7 @@ class SyncServer(object):
|
|
|
154
154
|
self.user_manager = UserManager()
|
|
155
155
|
self.tool_manager = ToolManager()
|
|
156
156
|
self.mcp_manager = MCPManager()
|
|
157
|
+
self.mcp_server_manager = MCPServerManager()
|
|
157
158
|
self.block_manager = BlockManager()
|
|
158
159
|
self.source_manager = SourceManager()
|
|
159
160
|
self.sandbox_config_manager = SandboxConfigManager()
|
|
@@ -482,8 +483,21 @@ class SyncServer(object):
|
|
|
482
483
|
request: UpdateAgent,
|
|
483
484
|
actor: User,
|
|
484
485
|
) -> AgentState:
|
|
485
|
-
if
|
|
486
|
-
|
|
486
|
+
# Build llm_config from convenience fields if llm_config is not provided
|
|
487
|
+
if request.llm_config is None and (
|
|
488
|
+
request.model is not None or request.context_window_limit is not None or request.max_tokens is not None
|
|
489
|
+
):
|
|
490
|
+
if request.model is None:
|
|
491
|
+
agent = await self.agent_manager.get_agent_by_id_async(agent_id=agent_id, actor=actor)
|
|
492
|
+
request.model = agent.llm_config.handle
|
|
493
|
+
config_params = {
|
|
494
|
+
"handle": request.model,
|
|
495
|
+
"context_window_limit": request.context_window_limit,
|
|
496
|
+
"max_tokens": request.max_tokens,
|
|
497
|
+
}
|
|
498
|
+
log_event(name="start get_cached_llm_config", attributes=config_params)
|
|
499
|
+
request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
|
|
500
|
+
log_event(name="end get_cached_llm_config", attributes=config_params)
|
|
487
501
|
|
|
488
502
|
if request.embedding is not None:
|
|
489
503
|
request.embedding_config = await self.get_embedding_config_from_handle_async(handle=request.embedding, actor=actor)
|
|
@@ -761,8 +775,6 @@ class SyncServer(object):
|
|
|
761
775
|
|
|
762
776
|
# TODO: move this into a thread
|
|
763
777
|
source = await self.source_manager.get_source_by_id(source_id=source_id)
|
|
764
|
-
if source is None:
|
|
765
|
-
raise NoResultFound(f"Source {source_id} does not exist")
|
|
766
778
|
connector = DirectoryConnector(input_files=[file_path])
|
|
767
779
|
num_passages, num_documents = await self.load_data(user_id=source.created_by_id, source_name=source.name, connector=connector)
|
|
768
780
|
|
|
@@ -925,11 +937,10 @@ class SyncServer(object):
|
|
|
925
937
|
async with asyncio.timeout(constants.GET_PROVIDERS_TIMEOUT_SECONDS):
|
|
926
938
|
return await provider.list_llm_models_async()
|
|
927
939
|
except asyncio.TimeoutError:
|
|
928
|
-
|
|
940
|
+
logger.warning(f"Timeout while listing LLM models for provider {provider}")
|
|
929
941
|
return []
|
|
930
942
|
except Exception as e:
|
|
931
|
-
|
|
932
|
-
warnings.warn(f"Error while listing LLM models for provider {provider}: {e}")
|
|
943
|
+
logger.exception(f"Error while listing LLM models for provider {provider}: {e}")
|
|
933
944
|
return []
|
|
934
945
|
|
|
935
946
|
# Execute all provider model listing tasks concurrently
|
|
@@ -968,10 +979,7 @@ class SyncServer(object):
|
|
|
968
979
|
# All providers now have list_embedding_models_async
|
|
969
980
|
return await provider.list_embedding_models_async()
|
|
970
981
|
except Exception as e:
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
traceback.print_exc()
|
|
974
|
-
warnings.warn(f"An error occurred while listing embedding models for provider {provider}: {e}")
|
|
982
|
+
logger.exception(f"An error occurred while listing embedding models for provider {provider}: {e}")
|
|
975
983
|
return []
|
|
976
984
|
|
|
977
985
|
# Execute all provider model listing tasks concurrently
|
|
@@ -1140,9 +1148,9 @@ class SyncServer(object):
|
|
|
1140
1148
|
# llm_config = LLMConfig(**config_data)
|
|
1141
1149
|
# llm_models.append(llm_config)
|
|
1142
1150
|
# except (json.JSONDecodeError, ValueError) as e:
|
|
1143
|
-
#
|
|
1151
|
+
# logger.warning(f"Error parsing LLM config file {filename}: {e}")
|
|
1144
1152
|
# except Exception as e:
|
|
1145
|
-
#
|
|
1153
|
+
# logger.warning(f"Error reading LLM configs directory: {e}")
|
|
1146
1154
|
return llm_models
|
|
1147
1155
|
|
|
1148
1156
|
def get_local_embedding_configs(self):
|
|
@@ -1160,9 +1168,9 @@ class SyncServer(object):
|
|
|
1160
1168
|
# embedding_config = EmbeddingConfig(**config_data)
|
|
1161
1169
|
# embedding_models.append(embedding_config)
|
|
1162
1170
|
# except (json.JSONDecodeError, ValueError) as e:
|
|
1163
|
-
#
|
|
1171
|
+
# logger.warning(f"Error parsing embedding config file {filename}: {e}")
|
|
1164
1172
|
# except Exception as e:
|
|
1165
|
-
#
|
|
1173
|
+
# logger.warning(f"Error reading embedding configs directory: {e}")
|
|
1166
1174
|
return embedding_models
|
|
1167
1175
|
|
|
1168
1176
|
def add_llm_model(self, request: LLMConfig) -> LLMConfig:
|
|
@@ -1501,7 +1509,7 @@ class SyncServer(object):
|
|
|
1501
1509
|
# supports_token_streaming = ["openai", "anthropic", "xai", "deepseek"]
|
|
1502
1510
|
supports_token_streaming = ["openai", "anthropic", "deepseek"] # TODO re-enable xAI once streaming is patched
|
|
1503
1511
|
if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
|
|
1504
|
-
|
|
1512
|
+
logger.warning(
|
|
1505
1513
|
f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
|
1506
1514
|
)
|
|
1507
1515
|
stream_tokens = False
|
|
@@ -1603,10 +1611,7 @@ class SyncServer(object):
|
|
|
1603
1611
|
except HTTPException:
|
|
1604
1612
|
raise
|
|
1605
1613
|
except Exception as e:
|
|
1606
|
-
|
|
1607
|
-
import traceback
|
|
1608
|
-
|
|
1609
|
-
traceback.print_exc()
|
|
1614
|
+
logger.exception(f"Error sending message to agent: {e}")
|
|
1610
1615
|
raise HTTPException(status_code=500, detail=f"{e}")
|
|
1611
1616
|
|
|
1612
1617
|
@trace_method
|
|
@@ -1636,7 +1641,7 @@ class SyncServer(object):
|
|
|
1636
1641
|
llm_config = letta_multi_agent.agent_state.llm_config
|
|
1637
1642
|
supports_token_streaming = ["openai", "anthropic", "deepseek"]
|
|
1638
1643
|
if stream_tokens and (llm_config.model_endpoint_type not in supports_token_streaming):
|
|
1639
|
-
|
|
1644
|
+
logger.warning(
|
|
1640
1645
|
f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
|
1641
1646
|
)
|
|
1642
1647
|
stream_tokens = False
|
letta/server/ws_api/server.py
CHANGED
|
@@ -6,10 +6,13 @@ import traceback
|
|
|
6
6
|
import websockets
|
|
7
7
|
|
|
8
8
|
import letta.server.ws_api.protocol as protocol
|
|
9
|
+
from letta.log import get_logger
|
|
9
10
|
from letta.server.constants import WS_DEFAULT_PORT
|
|
10
11
|
from letta.server.server import SyncServer
|
|
11
12
|
from letta.server.ws_api.interface import SyncWebSocketInterface
|
|
12
13
|
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
class WebSocketServer:
|
|
15
18
|
def __init__(self, host="localhost", port=WS_DEFAULT_PORT):
|
|
@@ -68,8 +71,7 @@ class WebSocketServer:
|
|
|
68
71
|
await websocket.send(protocol.server_command_response("OK: Agent initialized"))
|
|
69
72
|
except Exception as e:
|
|
70
73
|
self.agent = None
|
|
71
|
-
|
|
72
|
-
print(f"{traceback.format_exc()}")
|
|
74
|
+
logger.exception(f"[server] self.create_new_agent failed with: {e}")
|
|
73
75
|
await websocket.send(protocol.server_command_response(f"Error: Failed to init agent - {str(e)}"))
|
|
74
76
|
|
|
75
77
|
else:
|
|
@@ -88,8 +90,7 @@ class WebSocketServer:
|
|
|
88
90
|
# self.run_step(user_message)
|
|
89
91
|
self.server.user_message(user_id="NULL", agent_id=data["agent_id"], message=user_message)
|
|
90
92
|
except Exception as e:
|
|
91
|
-
|
|
92
|
-
print(f"{traceback.format_exc()}")
|
|
93
|
+
logger.exception(f"[server] self.server.user_message failed with: {e}")
|
|
93
94
|
await websocket.send(protocol.server_agent_response_error(f"server.user_message failed with: {e}"))
|
|
94
95
|
await asyncio.sleep(1) # pause before sending the terminating message, w/o this messages may be missed
|
|
95
96
|
await websocket.send(protocol.server_agent_response_end())
|