letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +9 -3
- letta/agents/base_agent.py +2 -2
- letta/agents/letta_agent.py +56 -45
- letta/agents/voice_agent.py +2 -2
- letta/data_sources/redis_client.py +146 -1
- letta/errors.py +4 -0
- letta/functions/function_sets/files.py +2 -2
- letta/functions/mcp_client/types.py +30 -6
- letta/functions/schema_generator.py +46 -1
- letta/functions/schema_validator.py +17 -2
- letta/functions/types.py +1 -1
- letta/helpers/tool_execution_helper.py +0 -2
- letta/llm_api/anthropic_client.py +27 -5
- letta/llm_api/deepseek_client.py +97 -0
- letta/llm_api/groq_client.py +79 -0
- letta/llm_api/helpers.py +0 -1
- letta/llm_api/llm_api_tools.py +2 -113
- letta/llm_api/llm_client.py +21 -0
- letta/llm_api/llm_client_base.py +11 -9
- letta/llm_api/openai_client.py +3 -0
- letta/llm_api/xai_client.py +85 -0
- letta/prompts/prompt_generator.py +190 -0
- letta/schemas/agent_file.py +17 -2
- letta/schemas/file.py +24 -1
- letta/schemas/job.py +2 -0
- letta/schemas/letta_message.py +2 -0
- letta/schemas/letta_request.py +22 -0
- letta/schemas/message.py +10 -1
- letta/schemas/providers/bedrock.py +1 -0
- letta/server/rest_api/redis_stream_manager.py +300 -0
- letta/server/rest_api/routers/v1/agents.py +129 -7
- letta/server/rest_api/routers/v1/folders.py +15 -5
- letta/server/rest_api/routers/v1/runs.py +101 -11
- letta/server/rest_api/routers/v1/sources.py +21 -53
- letta/server/rest_api/routers/v1/telemetry.py +14 -4
- letta/server/rest_api/routers/v1/tools.py +2 -2
- letta/server/rest_api/streaming_response.py +3 -24
- letta/server/server.py +0 -1
- letta/services/agent_manager.py +2 -2
- letta/services/agent_serialization_manager.py +129 -32
- letta/services/file_manager.py +111 -6
- letta/services/file_processor/file_processor.py +5 -2
- letta/services/files_agents_manager.py +60 -0
- letta/services/helpers/agent_manager_helper.py +4 -205
- letta/services/helpers/tool_parser_helper.py +6 -3
- letta/services/mcp/base_client.py +7 -1
- letta/services/mcp/sse_client.py +7 -2
- letta/services/mcp/stdio_client.py +5 -0
- letta/services/mcp/streamable_http_client.py +11 -2
- letta/services/mcp_manager.py +31 -30
- letta/services/source_manager.py +26 -1
- letta/services/summarizer/summarizer.py +21 -10
- letta/services/tool_executor/files_tool_executor.py +13 -9
- letta/services/tool_executor/mcp_tool_executor.py +3 -0
- letta/services/tool_executor/tool_execution_manager.py +13 -0
- letta/services/tool_manager.py +43 -20
- letta/settings.py +1 -0
- letta/utils.py +37 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
- letta/functions/mcp_client/__init__.py +0 -0
- letta/functions/mcp_client/base_client.py +0 -156
- letta/functions/mcp_client/sse_client.py +0 -51
- letta/functions/mcp_client/stdio_client.py +0 -109
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0
@@ -21,7 +21,7 @@ from letta.constants import (
|
|
21
21
|
STRUCTURED_OUTPUT_MODELS,
|
22
22
|
)
|
23
23
|
from letta.helpers import ToolRulesSolver
|
24
|
-
from letta.helpers.datetime_helpers import
|
24
|
+
from letta.helpers.datetime_helpers import get_local_time
|
25
25
|
from letta.llm_api.llm_client import LLMClient
|
26
26
|
from letta.orm.agent import Agent as AgentModel
|
27
27
|
from letta.orm.agents_tags import AgentsTags
|
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
|
|
33
33
|
from letta.orm.sqlite_functions import adapt_array
|
34
34
|
from letta.otel.tracing import trace_method
|
35
35
|
from letta.prompts import gpt_system
|
36
|
+
from letta.prompts.prompt_generator import PromptGenerator
|
36
37
|
from letta.schemas.agent import AgentState, AgentType
|
37
38
|
from letta.schemas.embedding_config import EmbeddingConfig
|
38
39
|
from letta.schemas.enums import MessageRole
|
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
|
|
217
218
|
return system
|
218
219
|
|
219
220
|
|
220
|
-
# TODO: This code is kind of wonky and deserves a rewrite
|
221
|
-
def compile_memory_metadata_block(
|
222
|
-
memory_edit_timestamp: datetime,
|
223
|
-
timezone: str,
|
224
|
-
previous_message_count: int = 0,
|
225
|
-
archival_memory_size: Optional[int] = 0,
|
226
|
-
) -> str:
|
227
|
-
"""
|
228
|
-
Generate a memory metadata block for the agent's system prompt.
|
229
|
-
|
230
|
-
This creates a structured metadata section that informs the agent about
|
231
|
-
the current state of its memory systems, including timing information
|
232
|
-
and memory counts. This helps the agent understand what information
|
233
|
-
is available through its tools.
|
234
|
-
|
235
|
-
Args:
|
236
|
-
memory_edit_timestamp: When memory blocks were last modified
|
237
|
-
timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
|
238
|
-
previous_message_count: Number of messages in recall memory (conversation history)
|
239
|
-
archival_memory_size: Number of items in archival memory (long-term storage)
|
240
|
-
|
241
|
-
Returns:
|
242
|
-
A formatted string containing the memory metadata block with XML-style tags
|
243
|
-
|
244
|
-
Example Output:
|
245
|
-
<memory_metadata>
|
246
|
-
- The current time is: 2024-01-15 10:30 AM PST
|
247
|
-
- Memory blocks were last modified: 2024-01-15 09:00 AM PST
|
248
|
-
- 42 previous messages between you and the user are stored in recall memory (use tools to access them)
|
249
|
-
- 156 total memories you created are stored in archival memory (use tools to access them)
|
250
|
-
</memory_metadata>
|
251
|
-
"""
|
252
|
-
# Put the timestamp in the local timezone (mimicking get_local_time())
|
253
|
-
timestamp_str = format_datetime(memory_edit_timestamp, timezone)
|
254
|
-
|
255
|
-
# Create a metadata block of info so the agent knows about the metadata of out-of-context memories
|
256
|
-
metadata_lines = [
|
257
|
-
"<memory_metadata>",
|
258
|
-
f"- The current time is: {get_local_time_fast(timezone)}",
|
259
|
-
f"- Memory blocks were last modified: {timestamp_str}",
|
260
|
-
f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
|
261
|
-
]
|
262
|
-
|
263
|
-
# Only include archival memory line if there are archival memories
|
264
|
-
if archival_memory_size is not None and archival_memory_size > 0:
|
265
|
-
metadata_lines.append(
|
266
|
-
f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
|
267
|
-
)
|
268
|
-
|
269
|
-
metadata_lines.append("</memory_metadata>")
|
270
|
-
memory_metadata_block = "\n".join(metadata_lines)
|
271
|
-
return memory_metadata_block
|
272
|
-
|
273
|
-
|
274
221
|
class PreserveMapping(dict):
|
275
222
|
"""Used to preserve (do not modify) undefined variables in the system prompt"""
|
276
223
|
|
@@ -331,7 +278,7 @@ def compile_system_message(
|
|
331
278
|
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
332
279
|
else:
|
333
280
|
# TODO should this all put into the memory.__repr__ function?
|
334
|
-
memory_metadata_string = compile_memory_metadata_block(
|
281
|
+
memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
|
335
282
|
memory_edit_timestamp=in_context_memory_last_edit,
|
336
283
|
previous_message_count=previous_message_count,
|
337
284
|
archival_memory_size=archival_memory_size,
|
@@ -372,154 +319,6 @@ def compile_system_message(
|
|
372
319
|
return formatted_prompt
|
373
320
|
|
374
321
|
|
375
|
-
@trace_method
|
376
|
-
def get_system_message_from_compiled_memory(
|
377
|
-
system_prompt: str,
|
378
|
-
memory_with_sources: str,
|
379
|
-
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
380
|
-
timezone: str,
|
381
|
-
user_defined_variables: Optional[dict] = None,
|
382
|
-
append_icm_if_missing: bool = True,
|
383
|
-
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
384
|
-
previous_message_count: int = 0,
|
385
|
-
archival_memory_size: int = 0,
|
386
|
-
) -> str:
|
387
|
-
"""Prepare the final/full system message that will be fed into the LLM API
|
388
|
-
|
389
|
-
The base system message may be templated, in which case we need to render the variables.
|
390
|
-
|
391
|
-
The following are reserved variables:
|
392
|
-
- CORE_MEMORY: the in-context memory of the LLM
|
393
|
-
"""
|
394
|
-
if user_defined_variables is not None:
|
395
|
-
# TODO eventually support the user defining their own variables to inject
|
396
|
-
raise NotImplementedError
|
397
|
-
else:
|
398
|
-
variables = {}
|
399
|
-
|
400
|
-
# Add the protected memory variable
|
401
|
-
if IN_CONTEXT_MEMORY_KEYWORD in variables:
|
402
|
-
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
403
|
-
else:
|
404
|
-
# TODO should this all put into the memory.__repr__ function?
|
405
|
-
memory_metadata_string = compile_memory_metadata_block(
|
406
|
-
memory_edit_timestamp=in_context_memory_last_edit,
|
407
|
-
previous_message_count=previous_message_count,
|
408
|
-
archival_memory_size=archival_memory_size,
|
409
|
-
timezone=timezone,
|
410
|
-
)
|
411
|
-
|
412
|
-
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
|
413
|
-
|
414
|
-
# Add to the variables list to inject
|
415
|
-
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
|
416
|
-
|
417
|
-
if template_format == "f-string":
|
418
|
-
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
|
419
|
-
|
420
|
-
# Catch the special case where the system prompt is unformatted
|
421
|
-
if append_icm_if_missing:
|
422
|
-
if memory_variable_string not in system_prompt:
|
423
|
-
# In this case, append it to the end to make sure memory is still injected
|
424
|
-
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
|
425
|
-
system_prompt += "\n\n" + memory_variable_string
|
426
|
-
|
427
|
-
# render the variables using the built-in templater
|
428
|
-
try:
|
429
|
-
if user_defined_variables:
|
430
|
-
formatted_prompt = safe_format(system_prompt, variables)
|
431
|
-
else:
|
432
|
-
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
|
433
|
-
except Exception as e:
|
434
|
-
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
|
435
|
-
|
436
|
-
else:
|
437
|
-
# TODO support for mustache and jinja2
|
438
|
-
raise NotImplementedError(template_format)
|
439
|
-
|
440
|
-
return formatted_prompt
|
441
|
-
|
442
|
-
|
443
|
-
@trace_method
|
444
|
-
async def compile_system_message_async(
|
445
|
-
system_prompt: str,
|
446
|
-
in_context_memory: Memory,
|
447
|
-
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
448
|
-
timezone: str,
|
449
|
-
user_defined_variables: Optional[dict] = None,
|
450
|
-
append_icm_if_missing: bool = True,
|
451
|
-
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
452
|
-
previous_message_count: int = 0,
|
453
|
-
archival_memory_size: int = 0,
|
454
|
-
tool_rules_solver: Optional[ToolRulesSolver] = None,
|
455
|
-
sources: Optional[List] = None,
|
456
|
-
max_files_open: Optional[int] = None,
|
457
|
-
) -> str:
|
458
|
-
"""Prepare the final/full system message that will be fed into the LLM API
|
459
|
-
|
460
|
-
The base system message may be templated, in which case we need to render the variables.
|
461
|
-
|
462
|
-
The following are reserved variables:
|
463
|
-
- CORE_MEMORY: the in-context memory of the LLM
|
464
|
-
"""
|
465
|
-
|
466
|
-
# Add tool rule constraints if available
|
467
|
-
tool_constraint_block = None
|
468
|
-
if tool_rules_solver is not None:
|
469
|
-
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
470
|
-
|
471
|
-
if user_defined_variables is not None:
|
472
|
-
# TODO eventually support the user defining their own variables to inject
|
473
|
-
raise NotImplementedError
|
474
|
-
else:
|
475
|
-
variables = {}
|
476
|
-
|
477
|
-
# Add the protected memory variable
|
478
|
-
if IN_CONTEXT_MEMORY_KEYWORD in variables:
|
479
|
-
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
480
|
-
else:
|
481
|
-
# TODO should this all put into the memory.__repr__ function?
|
482
|
-
memory_metadata_string = compile_memory_metadata_block(
|
483
|
-
memory_edit_timestamp=in_context_memory_last_edit,
|
484
|
-
previous_message_count=previous_message_count,
|
485
|
-
archival_memory_size=archival_memory_size,
|
486
|
-
timezone=timezone,
|
487
|
-
)
|
488
|
-
|
489
|
-
memory_with_sources = await in_context_memory.compile_in_thread_async(
|
490
|
-
tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
|
491
|
-
)
|
492
|
-
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
|
493
|
-
|
494
|
-
# Add to the variables list to inject
|
495
|
-
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
|
496
|
-
|
497
|
-
if template_format == "f-string":
|
498
|
-
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
|
499
|
-
|
500
|
-
# Catch the special case where the system prompt is unformatted
|
501
|
-
if append_icm_if_missing:
|
502
|
-
if memory_variable_string not in system_prompt:
|
503
|
-
# In this case, append it to the end to make sure memory is still injected
|
504
|
-
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
|
505
|
-
system_prompt += "\n\n" + memory_variable_string
|
506
|
-
|
507
|
-
# render the variables using the built-in templater
|
508
|
-
try:
|
509
|
-
if user_defined_variables:
|
510
|
-
formatted_prompt = safe_format(system_prompt, variables)
|
511
|
-
else:
|
512
|
-
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
|
513
|
-
except Exception as e:
|
514
|
-
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
|
515
|
-
|
516
|
-
else:
|
517
|
-
# TODO support for mustache and jinja2
|
518
|
-
raise NotImplementedError(template_format)
|
519
|
-
|
520
|
-
return formatted_prompt
|
521
|
-
|
522
|
-
|
523
322
|
@trace_method
|
524
323
|
def initialize_message_sequence(
|
525
324
|
agent_state: AgentState,
|
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
|
|
601
400
|
if memory_edit_timestamp is None:
|
602
401
|
memory_edit_timestamp = get_local_time()
|
603
402
|
|
604
|
-
full_system_message = await compile_system_message_async(
|
403
|
+
full_system_message = await PromptGenerator.compile_system_message_async(
|
605
404
|
system_prompt=agent_state.system,
|
606
405
|
in_context_memory=agent_state.memory,
|
607
406
|
in_context_memory_last_edit=memory_edit_timestamp,
|
@@ -70,13 +70,16 @@ def runtime_override_tool_json_schema(
|
|
70
70
|
tool_list: list[JsonDict],
|
71
71
|
response_format: ResponseFormatUnion | None,
|
72
72
|
request_heartbeat: bool = True,
|
73
|
+
terminal_tools: set[str] | None = None,
|
73
74
|
) -> list[JsonDict]:
|
74
75
|
"""Override the tool JSON schemas at runtime if certain conditions are met.
|
75
76
|
|
76
77
|
Cases:
|
77
78
|
1. We will inject `send_message` tool calls with `response_format` if provided
|
78
|
-
2. Tools will have an additional `request_heartbeat` parameter added.
|
79
|
+
2. Tools will have an additional `request_heartbeat` parameter added (except for terminal tools).
|
79
80
|
"""
|
81
|
+
if terminal_tools is None:
|
82
|
+
terminal_tools = set()
|
80
83
|
for tool_json in tool_list:
|
81
84
|
if tool_json["name"] == SEND_MESSAGE_TOOL_NAME and response_format and response_format.type != ResponseFormatType.text:
|
82
85
|
if response_format.type == ResponseFormatType.json_schema:
|
@@ -89,8 +92,8 @@ def runtime_override_tool_json_schema(
|
|
89
92
|
"properties": {},
|
90
93
|
}
|
91
94
|
if request_heartbeat:
|
92
|
-
#
|
93
|
-
if tool_json["name"]
|
95
|
+
# Only add request_heartbeat to non-terminal tools
|
96
|
+
if tool_json["name"] not in terminal_tools:
|
94
97
|
tool_json["parameters"]["properties"][REQUEST_HEARTBEAT_PARAM] = {
|
95
98
|
"type": "boolean",
|
96
99
|
"description": REQUEST_HEARTBEAT_DESCRIPTION,
|
@@ -14,9 +14,15 @@ logger = get_logger(__name__)
|
|
14
14
|
|
15
15
|
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
16
16
|
class AsyncBaseMCPClient:
|
17
|
-
|
17
|
+
# HTTP headers
|
18
|
+
AGENT_ID_HEADER = "X-Agent-Id"
|
19
|
+
|
20
|
+
def __init__(
|
21
|
+
self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
|
22
|
+
):
|
18
23
|
self.server_config = server_config
|
19
24
|
self.oauth_provider = oauth_provider
|
25
|
+
self.agent_id = agent_id
|
20
26
|
self.exit_stack = AsyncExitStack()
|
21
27
|
self.session: Optional[ClientSession] = None
|
22
28
|
self.initialized = False
|
letta/services/mcp/sse_client.py
CHANGED
@@ -16,8 +16,10 @@ logger = get_logger(__name__)
|
|
16
16
|
|
17
17
|
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
18
18
|
class AsyncSSEMCPClient(AsyncBaseMCPClient):
|
19
|
-
def __init__(
|
20
|
-
|
19
|
+
def __init__(
|
20
|
+
self, server_config: SSEServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
|
21
|
+
):
|
22
|
+
super().__init__(server_config, oauth_provider, agent_id)
|
21
23
|
|
22
24
|
async def _initialize_connection(self, server_config: SSEServerConfig) -> None:
|
23
25
|
headers = {}
|
@@ -27,6 +29,9 @@ class AsyncSSEMCPClient(AsyncBaseMCPClient):
|
|
27
29
|
if server_config.auth_header and server_config.auth_token:
|
28
30
|
headers[server_config.auth_header] = server_config.auth_token
|
29
31
|
|
32
|
+
if self.agent_id:
|
33
|
+
headers[self.AGENT_ID_HEADER] = self.agent_id
|
34
|
+
|
30
35
|
# Use OAuth provider if available, otherwise use regular headers
|
31
36
|
if self.oauth_provider:
|
32
37
|
sse_cm = sse_client(url=server_config.server_url, headers=headers if headers else None, auth=self.oauth_provider)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
from mcp import ClientSession, StdioServerParameters
|
2
4
|
from mcp.client.stdio import stdio_client
|
3
5
|
|
@@ -10,6 +12,9 @@ logger = get_logger(__name__)
|
|
10
12
|
|
11
13
|
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
12
14
|
class AsyncStdioMCPClient(AsyncBaseMCPClient):
|
15
|
+
def __init__(self, server_config: StdioServerConfig, oauth_provider=None, agent_id: Optional[str] = None):
|
16
|
+
super().__init__(server_config, oauth_provider, agent_id)
|
17
|
+
|
13
18
|
async def _initialize_connection(self, server_config: StdioServerConfig) -> None:
|
14
19
|
args = [arg.split() for arg in server_config.args]
|
15
20
|
# flatten
|
@@ -12,8 +12,13 @@ logger = get_logger(__name__)
|
|
12
12
|
|
13
13
|
|
14
14
|
class AsyncStreamableHTTPMCPClient(AsyncBaseMCPClient):
|
15
|
-
def __init__(
|
16
|
-
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
server_config: StreamableHTTPServerConfig,
|
18
|
+
oauth_provider: Optional[OAuthClientProvider] = None,
|
19
|
+
agent_id: Optional[str] = None,
|
20
|
+
):
|
21
|
+
super().__init__(server_config, oauth_provider, agent_id)
|
17
22
|
|
18
23
|
async def _initialize_connection(self, server_config: BaseServerConfig) -> None:
|
19
24
|
if not isinstance(server_config, StreamableHTTPServerConfig):
|
@@ -28,6 +33,10 @@ class AsyncStreamableHTTPMCPClient(AsyncBaseMCPClient):
|
|
28
33
|
if server_config.auth_header and server_config.auth_token:
|
29
34
|
headers[server_config.auth_header] = server_config.auth_token
|
30
35
|
|
36
|
+
# Add agent ID header if provided
|
37
|
+
if self.agent_id:
|
38
|
+
headers[self.AGENT_ID_HEADER] = self.agent_id
|
39
|
+
|
31
40
|
# Use OAuth provider if available, otherwise use regular headers
|
32
41
|
if self.oauth_provider:
|
33
42
|
streamable_http_cm = streamablehttp_client(
|
letta/services/mcp_manager.py
CHANGED
@@ -41,6 +41,7 @@ from letta.services.mcp.sse_client import MCP_CONFIG_TOPLEVEL_KEY, AsyncSSEMCPCl
|
|
41
41
|
from letta.services.mcp.stdio_client import AsyncStdioMCPClient
|
42
42
|
from letta.services.mcp.streamable_http_client import AsyncStreamableHTTPMCPClient
|
43
43
|
from letta.services.tool_manager import ToolManager
|
44
|
+
from letta.settings import tool_settings
|
44
45
|
from letta.utils import enforce_types, printd
|
45
46
|
|
46
47
|
logger = get_logger(__name__)
|
@@ -55,19 +56,18 @@ class MCPManager:
|
|
55
56
|
self.cached_mcp_servers = {} # maps id -> async connection
|
56
57
|
|
57
58
|
@enforce_types
|
58
|
-
async def list_mcp_server_tools(self, mcp_server_name: str, actor: PydanticUser) -> List[MCPTool]:
|
59
|
+
async def list_mcp_server_tools(self, mcp_server_name: str, actor: PydanticUser, agent_id: Optional[str] = None) -> List[MCPTool]:
|
59
60
|
"""Get a list of all tools for a specific MCP server."""
|
60
61
|
mcp_client = None
|
61
62
|
try:
|
62
63
|
mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
|
63
64
|
mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
|
64
65
|
server_config = mcp_config.to_config()
|
65
|
-
mcp_client = await self.get_mcp_client(server_config, actor)
|
66
|
+
mcp_client = await self.get_mcp_client(server_config, actor, agent_id=agent_id)
|
66
67
|
await mcp_client.connect_to_server()
|
67
68
|
|
68
69
|
# list tools
|
69
70
|
tools = await mcp_client.list_tools()
|
70
|
-
|
71
71
|
# Add health information to each tool
|
72
72
|
for tool in tools:
|
73
73
|
if tool.inputSchema:
|
@@ -92,33 +92,34 @@ class MCPManager:
|
|
92
92
|
tool_args: Optional[Dict[str, Any]],
|
93
93
|
environment_variables: Dict[str, str],
|
94
94
|
actor: PydanticUser,
|
95
|
+
agent_id: Optional[str] = None,
|
95
96
|
) -> Tuple[str, bool]:
|
96
97
|
"""Call a specific tool from a specific MCP server."""
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
mcp_client = await self.get_mcp_client(server_config, actor)
|
112
|
-
await mcp_client.connect_to_server()
|
113
|
-
|
114
|
-
# call tool
|
115
|
-
result, success = await mcp_client.execute_tool(tool_name, tool_args)
|
116
|
-
logger.info(f"MCP Result: {result}, Success: {success}")
|
117
|
-
# TODO: change to pydantic tool
|
98
|
+
mcp_client = None
|
99
|
+
try:
|
100
|
+
if not tool_settings.mcp_read_from_config:
|
101
|
+
# read from DB
|
102
|
+
mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
|
103
|
+
mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
|
104
|
+
server_config = mcp_config.to_config(environment_variables)
|
105
|
+
else:
|
106
|
+
# read from config file
|
107
|
+
mcp_config = self.read_mcp_config()
|
108
|
+
if mcp_server_name not in mcp_config:
|
109
|
+
raise ValueError(f"MCP server {mcp_server_name} not found in config.")
|
110
|
+
server_config = mcp_config[mcp_server_name]
|
118
111
|
|
119
|
-
|
112
|
+
mcp_client = await self.get_mcp_client(server_config, actor, agent_id=agent_id)
|
113
|
+
await mcp_client.connect_to_server()
|
120
114
|
|
121
|
-
|
115
|
+
# call tool
|
116
|
+
result, success = await mcp_client.execute_tool(tool_name, tool_args)
|
117
|
+
logger.info(f"MCP Result: {result}, Success: {success}")
|
118
|
+
# TODO: change to pydantic tool
|
119
|
+
return result, success
|
120
|
+
finally:
|
121
|
+
if mcp_client:
|
122
|
+
await mcp_client.cleanup()
|
122
123
|
|
123
124
|
@enforce_types
|
124
125
|
async def add_tool_from_mcp_server(self, mcp_server_name: str, mcp_tool_name: str, actor: PydanticUser) -> PydanticTool:
|
@@ -129,7 +130,6 @@ class MCPManager:
|
|
129
130
|
raise ValueError(f"MCP server '{mcp_server_name}' not found")
|
130
131
|
|
131
132
|
mcp_tools = await self.list_mcp_server_tools(mcp_server_name, actor=actor)
|
132
|
-
|
133
133
|
for mcp_tool in mcp_tools:
|
134
134
|
# TODO: @jnjpng move health check to tool class
|
135
135
|
if mcp_tool.name == mcp_tool_name:
|
@@ -450,6 +450,7 @@ class MCPManager:
|
|
450
450
|
server_config: Union[SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig],
|
451
451
|
actor: PydanticUser,
|
452
452
|
oauth_provider: Optional[Any] = None,
|
453
|
+
agent_id: Optional[str] = None,
|
453
454
|
) -> Union[AsyncSSEMCPClient, AsyncStdioMCPClient, AsyncStreamableHTTPMCPClient]:
|
454
455
|
"""
|
455
456
|
Helper function to create the appropriate MCP client based on server configuration.
|
@@ -482,13 +483,13 @@ class MCPManager:
|
|
482
483
|
|
483
484
|
if server_config.type == MCPServerType.SSE:
|
484
485
|
server_config = SSEServerConfig(**server_config.model_dump())
|
485
|
-
return AsyncSSEMCPClient(server_config=server_config, oauth_provider=oauth_provider)
|
486
|
+
return AsyncSSEMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
|
486
487
|
elif server_config.type == MCPServerType.STDIO:
|
487
488
|
server_config = StdioServerConfig(**server_config.model_dump())
|
488
|
-
return AsyncStdioMCPClient(server_config=server_config, oauth_provider=oauth_provider)
|
489
|
+
return AsyncStdioMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
|
489
490
|
elif server_config.type == MCPServerType.STREAMABLE_HTTP:
|
490
491
|
server_config = StreamableHTTPServerConfig(**server_config.model_dump())
|
491
|
-
return AsyncStreamableHTTPMCPClient(server_config=server_config, oauth_provider=oauth_provider)
|
492
|
+
return AsyncStreamableHTTPMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
|
492
493
|
else:
|
493
494
|
raise ValueError(f"Unsupported server config type: {type(server_config)}")
|
494
495
|
|
letta/services/source_manager.py
CHANGED
@@ -143,7 +143,6 @@ class SourceManager:
|
|
143
143
|
update_dict[col.name] = excluded[col.name]
|
144
144
|
|
145
145
|
upsert_stmt = stmt.on_conflict_do_update(index_elements=["name", "organization_id"], set_=update_dict)
|
146
|
-
|
147
146
|
await session.execute(upsert_stmt)
|
148
147
|
await session.commit()
|
149
148
|
|
@@ -397,3 +396,29 @@ class SourceManager:
|
|
397
396
|
sources_orm = result.scalars().all()
|
398
397
|
|
399
398
|
return [source.to_pydantic() for source in sources_orm]
|
399
|
+
|
400
|
+
@enforce_types
|
401
|
+
@trace_method
|
402
|
+
async def get_existing_source_names(self, source_names: List[str], actor: PydanticUser) -> set[str]:
|
403
|
+
"""
|
404
|
+
Fast batch check to see which source names already exist for the organization.
|
405
|
+
|
406
|
+
Args:
|
407
|
+
source_names: List of source names to check
|
408
|
+
actor: User performing the action
|
409
|
+
|
410
|
+
Returns:
|
411
|
+
Set of source names that already exist
|
412
|
+
"""
|
413
|
+
if not source_names:
|
414
|
+
return set()
|
415
|
+
|
416
|
+
async with db_registry.async_session() as session:
|
417
|
+
query = select(SourceModel.name).where(
|
418
|
+
SourceModel.name.in_(source_names), SourceModel.organization_id == actor.organization_id, SourceModel.is_deleted == False
|
419
|
+
)
|
420
|
+
|
421
|
+
result = await session.execute(query)
|
422
|
+
existing_names = result.scalars().all()
|
423
|
+
|
424
|
+
return set(existing_names)
|
@@ -15,6 +15,8 @@ from letta.schemas.letta_message_content import TextContent
|
|
15
15
|
from letta.schemas.llm_config import LLMConfig
|
16
16
|
from letta.schemas.message import Message, MessageCreate
|
17
17
|
from letta.schemas.user import User
|
18
|
+
from letta.services.agent_manager import AgentManager
|
19
|
+
from letta.services.message_manager import MessageManager
|
18
20
|
from letta.services.summarizer.enums import SummarizationMode
|
19
21
|
from letta.system import package_summarize_message_no_counts
|
20
22
|
from letta.templates.template_helper import render_template
|
@@ -36,6 +38,10 @@ class Summarizer:
|
|
36
38
|
message_buffer_limit: int = 10,
|
37
39
|
message_buffer_min: int = 3,
|
38
40
|
partial_evict_summarizer_percentage: float = 0.30,
|
41
|
+
agent_manager: Optional[AgentManager] = None,
|
42
|
+
message_manager: Optional[MessageManager] = None,
|
43
|
+
actor: Optional[User] = None,
|
44
|
+
agent_id: Optional[str] = None,
|
39
45
|
):
|
40
46
|
self.mode = mode
|
41
47
|
|
@@ -46,6 +52,12 @@ class Summarizer:
|
|
46
52
|
self.summarizer_agent = summarizer_agent
|
47
53
|
self.partial_evict_summarizer_percentage = partial_evict_summarizer_percentage
|
48
54
|
|
55
|
+
# for partial buffer only
|
56
|
+
self.agent_manager = agent_manager
|
57
|
+
self.message_manager = message_manager
|
58
|
+
self.actor = actor
|
59
|
+
self.agent_id = agent_id
|
60
|
+
|
49
61
|
@trace_method
|
50
62
|
async def summarize(
|
51
63
|
self,
|
@@ -121,9 +133,6 @@ class Summarizer:
|
|
121
133
|
logger.debug("Not forcing summarization, returning in-context messages as is.")
|
122
134
|
return all_in_context_messages, False
|
123
135
|
|
124
|
-
# Very ugly code to pull LLMConfig etc from the SummarizerAgent if we're not using it for anything else
|
125
|
-
assert self.summarizer_agent is not None
|
126
|
-
|
127
136
|
# First step: determine how many messages to retain
|
128
137
|
total_message_count = len(all_in_context_messages)
|
129
138
|
assert self.partial_evict_summarizer_percentage >= 0.0 and self.partial_evict_summarizer_percentage <= 1.0
|
@@ -147,15 +156,13 @@ class Summarizer:
|
|
147
156
|
|
148
157
|
# Dynamically get the LLMConfig from the summarizer agent
|
149
158
|
# Pretty cringe code here that we need the agent for this but we don't use it
|
150
|
-
agent_state = await self.
|
151
|
-
agent_id=self.summarizer_agent.agent_id, actor=self.summarizer_agent.actor
|
152
|
-
)
|
159
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
|
153
160
|
|
154
161
|
# TODO if we do this via the "agent", then we can more easily allow toggling on the memory block version
|
155
162
|
summary_message_str = await simple_summary(
|
156
163
|
messages=messages_to_summarize,
|
157
164
|
llm_config=agent_state.llm_config,
|
158
|
-
actor=self.
|
165
|
+
actor=self.actor,
|
159
166
|
include_ack=True,
|
160
167
|
)
|
161
168
|
|
@@ -185,9 +192,9 @@ class Summarizer:
|
|
185
192
|
)[0]
|
186
193
|
|
187
194
|
# Create the message in the DB
|
188
|
-
await self.
|
195
|
+
await self.message_manager.create_many_messages_async(
|
189
196
|
pydantic_msgs=[summary_message_obj],
|
190
|
-
actor=self.
|
197
|
+
actor=self.actor,
|
191
198
|
)
|
192
199
|
|
193
200
|
updated_in_context_messages = all_in_context_messages[assistant_message_index:]
|
@@ -354,7 +361,11 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
|
|
354
361
|
# NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
|
355
362
|
# I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
|
356
363
|
# llm_config.put_inner_thoughts_in_kwargs = False
|
357
|
-
|
364
|
+
try:
|
365
|
+
response_data = await llm_client.request_async(request_data, llm_config)
|
366
|
+
except Exception as e:
|
367
|
+
# handle LLM error (likely a context window exceeded error)
|
368
|
+
raise llm_client.handle_llm_error(e)
|
358
369
|
response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
|
359
370
|
if response.choices[0].message.content is None:
|
360
371
|
logger.warning("No content returned from summarizer")
|
@@ -151,16 +151,16 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
151
151
|
offset = file_request.offset
|
152
152
|
length = file_request.length
|
153
153
|
|
154
|
-
#
|
154
|
+
# Use 0-indexed offset/length directly for LineChunker
|
155
155
|
start, end = None, None
|
156
156
|
if offset is not None or length is not None:
|
157
|
-
if offset is not None and offset <
|
158
|
-
raise ValueError(f"Offset for file {file_name} must be >=
|
157
|
+
if offset is not None and offset < 0:
|
158
|
+
raise ValueError(f"Offset for file {file_name} must be >= 0 (0-indexed), got {offset}")
|
159
159
|
if length is not None and length < 1:
|
160
160
|
raise ValueError(f"Length for file {file_name} must be >= 1, got {length}")
|
161
161
|
|
162
|
-
#
|
163
|
-
start =
|
162
|
+
# Use offset directly as it's already 0-indexed
|
163
|
+
start = offset if offset is not None else None
|
164
164
|
if start is not None and length is not None:
|
165
165
|
end = start + length
|
166
166
|
else:
|
@@ -193,7 +193,7 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
193
193
|
visible_content=visible_content,
|
194
194
|
max_files_open=agent_state.max_files_open,
|
195
195
|
start_line=start + 1 if start is not None else None, # convert to 1-indexed for user display
|
196
|
-
end_line=end if end is not None else None, # end is already exclusive
|
196
|
+
end_line=end if end is not None else None, # end is already exclusive, shows as 1-indexed inclusive
|
197
197
|
)
|
198
198
|
|
199
199
|
opened_files.append(file_name)
|
@@ -220,10 +220,14 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
220
220
|
for req in file_requests:
|
221
221
|
previous_info = format_previous_range(req.file_name)
|
222
222
|
if req.offset is not None and req.length is not None:
|
223
|
-
|
224
|
-
|
223
|
+
# Display as 1-indexed for user readability: (offset+1) to (offset+length)
|
224
|
+
start_line = req.offset + 1
|
225
|
+
end_line = req.offset + req.length
|
226
|
+
file_summaries.append(f"{req.file_name} (lines {start_line}-{end_line}){previous_info}")
|
225
227
|
elif req.offset is not None:
|
226
|
-
|
228
|
+
# Display as 1-indexed
|
229
|
+
start_line = req.offset + 1
|
230
|
+
file_summaries.append(f"{req.file_name} (lines {start_line}-end){previous_info}")
|
227
231
|
else:
|
228
232
|
file_summaries.append(f"{req.file_name}{previous_info}")
|
229
233
|
|