letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +9 -3
  3. letta/agents/base_agent.py +2 -2
  4. letta/agents/letta_agent.py +56 -45
  5. letta/agents/voice_agent.py +2 -2
  6. letta/data_sources/redis_client.py +146 -1
  7. letta/errors.py +4 -0
  8. letta/functions/function_sets/files.py +2 -2
  9. letta/functions/mcp_client/types.py +30 -6
  10. letta/functions/schema_generator.py +46 -1
  11. letta/functions/schema_validator.py +17 -2
  12. letta/functions/types.py +1 -1
  13. letta/helpers/tool_execution_helper.py +0 -2
  14. letta/llm_api/anthropic_client.py +27 -5
  15. letta/llm_api/deepseek_client.py +97 -0
  16. letta/llm_api/groq_client.py +79 -0
  17. letta/llm_api/helpers.py +0 -1
  18. letta/llm_api/llm_api_tools.py +2 -113
  19. letta/llm_api/llm_client.py +21 -0
  20. letta/llm_api/llm_client_base.py +11 -9
  21. letta/llm_api/openai_client.py +3 -0
  22. letta/llm_api/xai_client.py +85 -0
  23. letta/prompts/prompt_generator.py +190 -0
  24. letta/schemas/agent_file.py +17 -2
  25. letta/schemas/file.py +24 -1
  26. letta/schemas/job.py +2 -0
  27. letta/schemas/letta_message.py +2 -0
  28. letta/schemas/letta_request.py +22 -0
  29. letta/schemas/message.py +10 -1
  30. letta/schemas/providers/bedrock.py +1 -0
  31. letta/server/rest_api/redis_stream_manager.py +300 -0
  32. letta/server/rest_api/routers/v1/agents.py +129 -7
  33. letta/server/rest_api/routers/v1/folders.py +15 -5
  34. letta/server/rest_api/routers/v1/runs.py +101 -11
  35. letta/server/rest_api/routers/v1/sources.py +21 -53
  36. letta/server/rest_api/routers/v1/telemetry.py +14 -4
  37. letta/server/rest_api/routers/v1/tools.py +2 -2
  38. letta/server/rest_api/streaming_response.py +3 -24
  39. letta/server/server.py +0 -1
  40. letta/services/agent_manager.py +2 -2
  41. letta/services/agent_serialization_manager.py +129 -32
  42. letta/services/file_manager.py +111 -6
  43. letta/services/file_processor/file_processor.py +5 -2
  44. letta/services/files_agents_manager.py +60 -0
  45. letta/services/helpers/agent_manager_helper.py +4 -205
  46. letta/services/helpers/tool_parser_helper.py +6 -3
  47. letta/services/mcp/base_client.py +7 -1
  48. letta/services/mcp/sse_client.py +7 -2
  49. letta/services/mcp/stdio_client.py +5 -0
  50. letta/services/mcp/streamable_http_client.py +11 -2
  51. letta/services/mcp_manager.py +31 -30
  52. letta/services/source_manager.py +26 -1
  53. letta/services/summarizer/summarizer.py +21 -10
  54. letta/services/tool_executor/files_tool_executor.py +13 -9
  55. letta/services/tool_executor/mcp_tool_executor.py +3 -0
  56. letta/services/tool_executor/tool_execution_manager.py +13 -0
  57. letta/services/tool_manager.py +43 -20
  58. letta/settings.py +1 -0
  59. letta/utils.py +37 -0
  60. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
  61. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
  62. letta/functions/mcp_client/__init__.py +0 -0
  63. letta/functions/mcp_client/base_client.py +0 -156
  64. letta/functions/mcp_client/sse_client.py +0 -51
  65. letta/functions/mcp_client/stdio_client.py +0 -109
  66. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0
@@ -21,7 +21,7 @@ from letta.constants import (
21
21
  STRUCTURED_OUTPUT_MODELS,
22
22
  )
23
23
  from letta.helpers import ToolRulesSolver
24
- from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
24
+ from letta.helpers.datetime_helpers import get_local_time
25
25
  from letta.llm_api.llm_client import LLMClient
26
26
  from letta.orm.agent import Agent as AgentModel
27
27
  from letta.orm.agents_tags import AgentsTags
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
33
33
  from letta.orm.sqlite_functions import adapt_array
34
34
  from letta.otel.tracing import trace_method
35
35
  from letta.prompts import gpt_system
36
+ from letta.prompts.prompt_generator import PromptGenerator
36
37
  from letta.schemas.agent import AgentState, AgentType
37
38
  from letta.schemas.embedding_config import EmbeddingConfig
38
39
  from letta.schemas.enums import MessageRole
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
217
218
  return system
218
219
 
219
220
 
220
- # TODO: This code is kind of wonky and deserves a rewrite
221
- def compile_memory_metadata_block(
222
- memory_edit_timestamp: datetime,
223
- timezone: str,
224
- previous_message_count: int = 0,
225
- archival_memory_size: Optional[int] = 0,
226
- ) -> str:
227
- """
228
- Generate a memory metadata block for the agent's system prompt.
229
-
230
- This creates a structured metadata section that informs the agent about
231
- the current state of its memory systems, including timing information
232
- and memory counts. This helps the agent understand what information
233
- is available through its tools.
234
-
235
- Args:
236
- memory_edit_timestamp: When memory blocks were last modified
237
- timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
238
- previous_message_count: Number of messages in recall memory (conversation history)
239
- archival_memory_size: Number of items in archival memory (long-term storage)
240
-
241
- Returns:
242
- A formatted string containing the memory metadata block with XML-style tags
243
-
244
- Example Output:
245
- <memory_metadata>
246
- - The current time is: 2024-01-15 10:30 AM PST
247
- - Memory blocks were last modified: 2024-01-15 09:00 AM PST
248
- - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
249
- - 156 total memories you created are stored in archival memory (use tools to access them)
250
- </memory_metadata>
251
- """
252
- # Put the timestamp in the local timezone (mimicking get_local_time())
253
- timestamp_str = format_datetime(memory_edit_timestamp, timezone)
254
-
255
- # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
256
- metadata_lines = [
257
- "<memory_metadata>",
258
- f"- The current time is: {get_local_time_fast(timezone)}",
259
- f"- Memory blocks were last modified: {timestamp_str}",
260
- f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
261
- ]
262
-
263
- # Only include archival memory line if there are archival memories
264
- if archival_memory_size is not None and archival_memory_size > 0:
265
- metadata_lines.append(
266
- f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
267
- )
268
-
269
- metadata_lines.append("</memory_metadata>")
270
- memory_metadata_block = "\n".join(metadata_lines)
271
- return memory_metadata_block
272
-
273
-
274
221
  class PreserveMapping(dict):
275
222
  """Used to preserve (do not modify) undefined variables in the system prompt"""
276
223
 
@@ -331,7 +278,7 @@ def compile_system_message(
331
278
  raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
332
279
  else:
333
280
  # TODO should this all put into the memory.__repr__ function?
334
- memory_metadata_string = compile_memory_metadata_block(
281
+ memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
335
282
  memory_edit_timestamp=in_context_memory_last_edit,
336
283
  previous_message_count=previous_message_count,
337
284
  archival_memory_size=archival_memory_size,
@@ -372,154 +319,6 @@ def compile_system_message(
372
319
  return formatted_prompt
373
320
 
374
321
 
375
- @trace_method
376
- def get_system_message_from_compiled_memory(
377
- system_prompt: str,
378
- memory_with_sources: str,
379
- in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
380
- timezone: str,
381
- user_defined_variables: Optional[dict] = None,
382
- append_icm_if_missing: bool = True,
383
- template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
384
- previous_message_count: int = 0,
385
- archival_memory_size: int = 0,
386
- ) -> str:
387
- """Prepare the final/full system message that will be fed into the LLM API
388
-
389
- The base system message may be templated, in which case we need to render the variables.
390
-
391
- The following are reserved variables:
392
- - CORE_MEMORY: the in-context memory of the LLM
393
- """
394
- if user_defined_variables is not None:
395
- # TODO eventually support the user defining their own variables to inject
396
- raise NotImplementedError
397
- else:
398
- variables = {}
399
-
400
- # Add the protected memory variable
401
- if IN_CONTEXT_MEMORY_KEYWORD in variables:
402
- raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
403
- else:
404
- # TODO should this all put into the memory.__repr__ function?
405
- memory_metadata_string = compile_memory_metadata_block(
406
- memory_edit_timestamp=in_context_memory_last_edit,
407
- previous_message_count=previous_message_count,
408
- archival_memory_size=archival_memory_size,
409
- timezone=timezone,
410
- )
411
-
412
- full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
413
-
414
- # Add to the variables list to inject
415
- variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
416
-
417
- if template_format == "f-string":
418
- memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
419
-
420
- # Catch the special case where the system prompt is unformatted
421
- if append_icm_if_missing:
422
- if memory_variable_string not in system_prompt:
423
- # In this case, append it to the end to make sure memory is still injected
424
- # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
425
- system_prompt += "\n\n" + memory_variable_string
426
-
427
- # render the variables using the built-in templater
428
- try:
429
- if user_defined_variables:
430
- formatted_prompt = safe_format(system_prompt, variables)
431
- else:
432
- formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
433
- except Exception as e:
434
- raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
435
-
436
- else:
437
- # TODO support for mustache and jinja2
438
- raise NotImplementedError(template_format)
439
-
440
- return formatted_prompt
441
-
442
-
443
- @trace_method
444
- async def compile_system_message_async(
445
- system_prompt: str,
446
- in_context_memory: Memory,
447
- in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
448
- timezone: str,
449
- user_defined_variables: Optional[dict] = None,
450
- append_icm_if_missing: bool = True,
451
- template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
452
- previous_message_count: int = 0,
453
- archival_memory_size: int = 0,
454
- tool_rules_solver: Optional[ToolRulesSolver] = None,
455
- sources: Optional[List] = None,
456
- max_files_open: Optional[int] = None,
457
- ) -> str:
458
- """Prepare the final/full system message that will be fed into the LLM API
459
-
460
- The base system message may be templated, in which case we need to render the variables.
461
-
462
- The following are reserved variables:
463
- - CORE_MEMORY: the in-context memory of the LLM
464
- """
465
-
466
- # Add tool rule constraints if available
467
- tool_constraint_block = None
468
- if tool_rules_solver is not None:
469
- tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
470
-
471
- if user_defined_variables is not None:
472
- # TODO eventually support the user defining their own variables to inject
473
- raise NotImplementedError
474
- else:
475
- variables = {}
476
-
477
- # Add the protected memory variable
478
- if IN_CONTEXT_MEMORY_KEYWORD in variables:
479
- raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
480
- else:
481
- # TODO should this all put into the memory.__repr__ function?
482
- memory_metadata_string = compile_memory_metadata_block(
483
- memory_edit_timestamp=in_context_memory_last_edit,
484
- previous_message_count=previous_message_count,
485
- archival_memory_size=archival_memory_size,
486
- timezone=timezone,
487
- )
488
-
489
- memory_with_sources = await in_context_memory.compile_in_thread_async(
490
- tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
491
- )
492
- full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
493
-
494
- # Add to the variables list to inject
495
- variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
496
-
497
- if template_format == "f-string":
498
- memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
499
-
500
- # Catch the special case where the system prompt is unformatted
501
- if append_icm_if_missing:
502
- if memory_variable_string not in system_prompt:
503
- # In this case, append it to the end to make sure memory is still injected
504
- # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
505
- system_prompt += "\n\n" + memory_variable_string
506
-
507
- # render the variables using the built-in templater
508
- try:
509
- if user_defined_variables:
510
- formatted_prompt = safe_format(system_prompt, variables)
511
- else:
512
- formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
513
- except Exception as e:
514
- raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
515
-
516
- else:
517
- # TODO support for mustache and jinja2
518
- raise NotImplementedError(template_format)
519
-
520
- return formatted_prompt
521
-
522
-
523
322
  @trace_method
524
323
  def initialize_message_sequence(
525
324
  agent_state: AgentState,
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
601
400
  if memory_edit_timestamp is None:
602
401
  memory_edit_timestamp = get_local_time()
603
402
 
604
- full_system_message = await compile_system_message_async(
403
+ full_system_message = await PromptGenerator.compile_system_message_async(
605
404
  system_prompt=agent_state.system,
606
405
  in_context_memory=agent_state.memory,
607
406
  in_context_memory_last_edit=memory_edit_timestamp,
@@ -70,13 +70,16 @@ def runtime_override_tool_json_schema(
70
70
  tool_list: list[JsonDict],
71
71
  response_format: ResponseFormatUnion | None,
72
72
  request_heartbeat: bool = True,
73
+ terminal_tools: set[str] | None = None,
73
74
  ) -> list[JsonDict]:
74
75
  """Override the tool JSON schemas at runtime if certain conditions are met.
75
76
 
76
77
  Cases:
77
78
  1. We will inject `send_message` tool calls with `response_format` if provided
78
- 2. Tools will have an additional `request_heartbeat` parameter added.
79
+ 2. Tools will have an additional `request_heartbeat` parameter added (except for terminal tools).
79
80
  """
81
+ if terminal_tools is None:
82
+ terminal_tools = set()
80
83
  for tool_json in tool_list:
81
84
  if tool_json["name"] == SEND_MESSAGE_TOOL_NAME and response_format and response_format.type != ResponseFormatType.text:
82
85
  if response_format.type == ResponseFormatType.json_schema:
@@ -89,8 +92,8 @@ def runtime_override_tool_json_schema(
89
92
  "properties": {},
90
93
  }
91
94
  if request_heartbeat:
92
- # TODO (cliandy): see support for tool control loop parameters
93
- if tool_json["name"] != SEND_MESSAGE_TOOL_NAME:
95
+ # Only add request_heartbeat to non-terminal tools
96
+ if tool_json["name"] not in terminal_tools:
94
97
  tool_json["parameters"]["properties"][REQUEST_HEARTBEAT_PARAM] = {
95
98
  "type": "boolean",
96
99
  "description": REQUEST_HEARTBEAT_DESCRIPTION,
@@ -14,9 +14,15 @@ logger = get_logger(__name__)
14
14
 
15
15
  # TODO: Get rid of Async prefix on this class name once we deprecate old sync code
16
16
  class AsyncBaseMCPClient:
17
- def __init__(self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None):
17
+ # HTTP headers
18
+ AGENT_ID_HEADER = "X-Agent-Id"
19
+
20
+ def __init__(
21
+ self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
22
+ ):
18
23
  self.server_config = server_config
19
24
  self.oauth_provider = oauth_provider
25
+ self.agent_id = agent_id
20
26
  self.exit_stack = AsyncExitStack()
21
27
  self.session: Optional[ClientSession] = None
22
28
  self.initialized = False
@@ -16,8 +16,10 @@ logger = get_logger(__name__)
16
16
 
17
17
  # TODO: Get rid of Async prefix on this class name once we deprecate old sync code
18
18
  class AsyncSSEMCPClient(AsyncBaseMCPClient):
19
- def __init__(self, server_config: SSEServerConfig, oauth_provider: Optional[OAuthClientProvider] = None):
20
- super().__init__(server_config, oauth_provider)
19
+ def __init__(
20
+ self, server_config: SSEServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
21
+ ):
22
+ super().__init__(server_config, oauth_provider, agent_id)
21
23
 
22
24
  async def _initialize_connection(self, server_config: SSEServerConfig) -> None:
23
25
  headers = {}
@@ -27,6 +29,9 @@ class AsyncSSEMCPClient(AsyncBaseMCPClient):
27
29
  if server_config.auth_header and server_config.auth_token:
28
30
  headers[server_config.auth_header] = server_config.auth_token
29
31
 
32
+ if self.agent_id:
33
+ headers[self.AGENT_ID_HEADER] = self.agent_id
34
+
30
35
  # Use OAuth provider if available, otherwise use regular headers
31
36
  if self.oauth_provider:
32
37
  sse_cm = sse_client(url=server_config.server_url, headers=headers if headers else None, auth=self.oauth_provider)
@@ -1,3 +1,5 @@
1
+ from typing import Optional
2
+
1
3
  from mcp import ClientSession, StdioServerParameters
2
4
  from mcp.client.stdio import stdio_client
3
5
 
@@ -10,6 +12,9 @@ logger = get_logger(__name__)
10
12
 
11
13
  # TODO: Get rid of Async prefix on this class name once we deprecate old sync code
12
14
  class AsyncStdioMCPClient(AsyncBaseMCPClient):
15
+ def __init__(self, server_config: StdioServerConfig, oauth_provider=None, agent_id: Optional[str] = None):
16
+ super().__init__(server_config, oauth_provider, agent_id)
17
+
13
18
  async def _initialize_connection(self, server_config: StdioServerConfig) -> None:
14
19
  args = [arg.split() for arg in server_config.args]
15
20
  # flatten
@@ -12,8 +12,13 @@ logger = get_logger(__name__)
12
12
 
13
13
 
14
14
  class AsyncStreamableHTTPMCPClient(AsyncBaseMCPClient):
15
- def __init__(self, server_config: StreamableHTTPServerConfig, oauth_provider: Optional[OAuthClientProvider] = None):
16
- super().__init__(server_config, oauth_provider)
15
+ def __init__(
16
+ self,
17
+ server_config: StreamableHTTPServerConfig,
18
+ oauth_provider: Optional[OAuthClientProvider] = None,
19
+ agent_id: Optional[str] = None,
20
+ ):
21
+ super().__init__(server_config, oauth_provider, agent_id)
17
22
 
18
23
  async def _initialize_connection(self, server_config: BaseServerConfig) -> None:
19
24
  if not isinstance(server_config, StreamableHTTPServerConfig):
@@ -28,6 +33,10 @@ class AsyncStreamableHTTPMCPClient(AsyncBaseMCPClient):
28
33
  if server_config.auth_header and server_config.auth_token:
29
34
  headers[server_config.auth_header] = server_config.auth_token
30
35
 
36
+ # Add agent ID header if provided
37
+ if self.agent_id:
38
+ headers[self.AGENT_ID_HEADER] = self.agent_id
39
+
31
40
  # Use OAuth provider if available, otherwise use regular headers
32
41
  if self.oauth_provider:
33
42
  streamable_http_cm = streamablehttp_client(
@@ -41,6 +41,7 @@ from letta.services.mcp.sse_client import MCP_CONFIG_TOPLEVEL_KEY, AsyncSSEMCPCl
41
41
  from letta.services.mcp.stdio_client import AsyncStdioMCPClient
42
42
  from letta.services.mcp.streamable_http_client import AsyncStreamableHTTPMCPClient
43
43
  from letta.services.tool_manager import ToolManager
44
+ from letta.settings import tool_settings
44
45
  from letta.utils import enforce_types, printd
45
46
 
46
47
  logger = get_logger(__name__)
@@ -55,19 +56,18 @@ class MCPManager:
55
56
  self.cached_mcp_servers = {} # maps id -> async connection
56
57
 
57
58
  @enforce_types
58
- async def list_mcp_server_tools(self, mcp_server_name: str, actor: PydanticUser) -> List[MCPTool]:
59
+ async def list_mcp_server_tools(self, mcp_server_name: str, actor: PydanticUser, agent_id: Optional[str] = None) -> List[MCPTool]:
59
60
  """Get a list of all tools for a specific MCP server."""
60
61
  mcp_client = None
61
62
  try:
62
63
  mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
63
64
  mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
64
65
  server_config = mcp_config.to_config()
65
- mcp_client = await self.get_mcp_client(server_config, actor)
66
+ mcp_client = await self.get_mcp_client(server_config, actor, agent_id=agent_id)
66
67
  await mcp_client.connect_to_server()
67
68
 
68
69
  # list tools
69
70
  tools = await mcp_client.list_tools()
70
-
71
71
  # Add health information to each tool
72
72
  for tool in tools:
73
73
  if tool.inputSchema:
@@ -92,33 +92,34 @@ class MCPManager:
92
92
  tool_args: Optional[Dict[str, Any]],
93
93
  environment_variables: Dict[str, str],
94
94
  actor: PydanticUser,
95
+ agent_id: Optional[str] = None,
95
96
  ) -> Tuple[str, bool]:
96
97
  """Call a specific tool from a specific MCP server."""
97
- from letta.settings import tool_settings
98
-
99
- if not tool_settings.mcp_read_from_config:
100
- # read from DB
101
- mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
102
- mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
103
- server_config = mcp_config.to_config(environment_variables)
104
- else:
105
- # read from config file
106
- mcp_config = self.read_mcp_config()
107
- if mcp_server_name not in mcp_config:
108
- raise ValueError(f"MCP server {mcp_server_name} not found in config.")
109
- server_config = mcp_config[mcp_server_name]
110
-
111
- mcp_client = await self.get_mcp_client(server_config, actor)
112
- await mcp_client.connect_to_server()
113
-
114
- # call tool
115
- result, success = await mcp_client.execute_tool(tool_name, tool_args)
116
- logger.info(f"MCP Result: {result}, Success: {success}")
117
- # TODO: change to pydantic tool
98
+ mcp_client = None
99
+ try:
100
+ if not tool_settings.mcp_read_from_config:
101
+ # read from DB
102
+ mcp_server_id = await self.get_mcp_server_id_by_name(mcp_server_name, actor=actor)
103
+ mcp_config = await self.get_mcp_server_by_id_async(mcp_server_id, actor=actor)
104
+ server_config = mcp_config.to_config(environment_variables)
105
+ else:
106
+ # read from config file
107
+ mcp_config = self.read_mcp_config()
108
+ if mcp_server_name not in mcp_config:
109
+ raise ValueError(f"MCP server {mcp_server_name} not found in config.")
110
+ server_config = mcp_config[mcp_server_name]
118
111
 
119
- await mcp_client.cleanup()
112
+ mcp_client = await self.get_mcp_client(server_config, actor, agent_id=agent_id)
113
+ await mcp_client.connect_to_server()
120
114
 
121
- return result, success
115
+ # call tool
116
+ result, success = await mcp_client.execute_tool(tool_name, tool_args)
117
+ logger.info(f"MCP Result: {result}, Success: {success}")
118
+ # TODO: change to pydantic tool
119
+ return result, success
120
+ finally:
121
+ if mcp_client:
122
+ await mcp_client.cleanup()
122
123
 
123
124
  @enforce_types
124
125
  async def add_tool_from_mcp_server(self, mcp_server_name: str, mcp_tool_name: str, actor: PydanticUser) -> PydanticTool:
@@ -129,7 +130,6 @@ class MCPManager:
129
130
  raise ValueError(f"MCP server '{mcp_server_name}' not found")
130
131
 
131
132
  mcp_tools = await self.list_mcp_server_tools(mcp_server_name, actor=actor)
132
-
133
133
  for mcp_tool in mcp_tools:
134
134
  # TODO: @jnjpng move health check to tool class
135
135
  if mcp_tool.name == mcp_tool_name:
@@ -450,6 +450,7 @@ class MCPManager:
450
450
  server_config: Union[SSEServerConfig, StdioServerConfig, StreamableHTTPServerConfig],
451
451
  actor: PydanticUser,
452
452
  oauth_provider: Optional[Any] = None,
453
+ agent_id: Optional[str] = None,
453
454
  ) -> Union[AsyncSSEMCPClient, AsyncStdioMCPClient, AsyncStreamableHTTPMCPClient]:
454
455
  """
455
456
  Helper function to create the appropriate MCP client based on server configuration.
@@ -482,13 +483,13 @@ class MCPManager:
482
483
 
483
484
  if server_config.type == MCPServerType.SSE:
484
485
  server_config = SSEServerConfig(**server_config.model_dump())
485
- return AsyncSSEMCPClient(server_config=server_config, oauth_provider=oauth_provider)
486
+ return AsyncSSEMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
486
487
  elif server_config.type == MCPServerType.STDIO:
487
488
  server_config = StdioServerConfig(**server_config.model_dump())
488
- return AsyncStdioMCPClient(server_config=server_config, oauth_provider=oauth_provider)
489
+ return AsyncStdioMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
489
490
  elif server_config.type == MCPServerType.STREAMABLE_HTTP:
490
491
  server_config = StreamableHTTPServerConfig(**server_config.model_dump())
491
- return AsyncStreamableHTTPMCPClient(server_config=server_config, oauth_provider=oauth_provider)
492
+ return AsyncStreamableHTTPMCPClient(server_config=server_config, oauth_provider=oauth_provider, agent_id=agent_id)
492
493
  else:
493
494
  raise ValueError(f"Unsupported server config type: {type(server_config)}")
494
495
 
@@ -143,7 +143,6 @@ class SourceManager:
143
143
  update_dict[col.name] = excluded[col.name]
144
144
 
145
145
  upsert_stmt = stmt.on_conflict_do_update(index_elements=["name", "organization_id"], set_=update_dict)
146
-
147
146
  await session.execute(upsert_stmt)
148
147
  await session.commit()
149
148
 
@@ -397,3 +396,29 @@ class SourceManager:
397
396
  sources_orm = result.scalars().all()
398
397
 
399
398
  return [source.to_pydantic() for source in sources_orm]
399
+
400
+ @enforce_types
401
+ @trace_method
402
+ async def get_existing_source_names(self, source_names: List[str], actor: PydanticUser) -> set[str]:
403
+ """
404
+ Fast batch check to see which source names already exist for the organization.
405
+
406
+ Args:
407
+ source_names: List of source names to check
408
+ actor: User performing the action
409
+
410
+ Returns:
411
+ Set of source names that already exist
412
+ """
413
+ if not source_names:
414
+ return set()
415
+
416
+ async with db_registry.async_session() as session:
417
+ query = select(SourceModel.name).where(
418
+ SourceModel.name.in_(source_names), SourceModel.organization_id == actor.organization_id, SourceModel.is_deleted == False
419
+ )
420
+
421
+ result = await session.execute(query)
422
+ existing_names = result.scalars().all()
423
+
424
+ return set(existing_names)
@@ -15,6 +15,8 @@ from letta.schemas.letta_message_content import TextContent
15
15
  from letta.schemas.llm_config import LLMConfig
16
16
  from letta.schemas.message import Message, MessageCreate
17
17
  from letta.schemas.user import User
18
+ from letta.services.agent_manager import AgentManager
19
+ from letta.services.message_manager import MessageManager
18
20
  from letta.services.summarizer.enums import SummarizationMode
19
21
  from letta.system import package_summarize_message_no_counts
20
22
  from letta.templates.template_helper import render_template
@@ -36,6 +38,10 @@ class Summarizer:
36
38
  message_buffer_limit: int = 10,
37
39
  message_buffer_min: int = 3,
38
40
  partial_evict_summarizer_percentage: float = 0.30,
41
+ agent_manager: Optional[AgentManager] = None,
42
+ message_manager: Optional[MessageManager] = None,
43
+ actor: Optional[User] = None,
44
+ agent_id: Optional[str] = None,
39
45
  ):
40
46
  self.mode = mode
41
47
 
@@ -46,6 +52,12 @@ class Summarizer:
46
52
  self.summarizer_agent = summarizer_agent
47
53
  self.partial_evict_summarizer_percentage = partial_evict_summarizer_percentage
48
54
 
55
+ # for partial buffer only
56
+ self.agent_manager = agent_manager
57
+ self.message_manager = message_manager
58
+ self.actor = actor
59
+ self.agent_id = agent_id
60
+
49
61
  @trace_method
50
62
  async def summarize(
51
63
  self,
@@ -121,9 +133,6 @@ class Summarizer:
121
133
  logger.debug("Not forcing summarization, returning in-context messages as is.")
122
134
  return all_in_context_messages, False
123
135
 
124
- # Very ugly code to pull LLMConfig etc from the SummarizerAgent if we're not using it for anything else
125
- assert self.summarizer_agent is not None
126
-
127
136
  # First step: determine how many messages to retain
128
137
  total_message_count = len(all_in_context_messages)
129
138
  assert self.partial_evict_summarizer_percentage >= 0.0 and self.partial_evict_summarizer_percentage <= 1.0
@@ -147,15 +156,13 @@ class Summarizer:
147
156
 
148
157
  # Dynamically get the LLMConfig from the summarizer agent
149
158
  # Pretty cringe code here that we need the agent for this but we don't use it
150
- agent_state = await self.summarizer_agent.agent_manager.get_agent_by_id_async(
151
- agent_id=self.summarizer_agent.agent_id, actor=self.summarizer_agent.actor
152
- )
159
+ agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
153
160
 
154
161
  # TODO if we do this via the "agent", then we can more easily allow toggling on the memory block version
155
162
  summary_message_str = await simple_summary(
156
163
  messages=messages_to_summarize,
157
164
  llm_config=agent_state.llm_config,
158
- actor=self.summarizer_agent.actor,
165
+ actor=self.actor,
159
166
  include_ack=True,
160
167
  )
161
168
 
@@ -185,9 +192,9 @@ class Summarizer:
185
192
  )[0]
186
193
 
187
194
  # Create the message in the DB
188
- await self.summarizer_agent.message_manager.create_many_messages_async(
195
+ await self.message_manager.create_many_messages_async(
189
196
  pydantic_msgs=[summary_message_obj],
190
- actor=self.summarizer_agent.actor,
197
+ actor=self.actor,
191
198
  )
192
199
 
193
200
  updated_in_context_messages = all_in_context_messages[assistant_message_index:]
@@ -354,7 +361,11 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
354
361
  # NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
355
362
  # I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
356
363
  # llm_config.put_inner_thoughts_in_kwargs = False
357
- response_data = await llm_client.request_async(request_data, llm_config)
364
+ try:
365
+ response_data = await llm_client.request_async(request_data, llm_config)
366
+ except Exception as e:
367
+ # handle LLM error (likely a context window exceeded error)
368
+ raise llm_client.handle_llm_error(e)
358
369
  response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
359
370
  if response.choices[0].message.content is None:
360
371
  logger.warning("No content returned from summarizer")
@@ -151,16 +151,16 @@ class LettaFileToolExecutor(ToolExecutor):
151
151
  offset = file_request.offset
152
152
  length = file_request.length
153
153
 
154
- # Convert 1-indexed offset/length to 0-indexed start/end for LineChunker
154
+ # Use 0-indexed offset/length directly for LineChunker
155
155
  start, end = None, None
156
156
  if offset is not None or length is not None:
157
- if offset is not None and offset < 1:
158
- raise ValueError(f"Offset for file {file_name} must be >= 1 (1-indexed), got {offset}")
157
+ if offset is not None and offset < 0:
158
+ raise ValueError(f"Offset for file {file_name} must be >= 0 (0-indexed), got {offset}")
159
159
  if length is not None and length < 1:
160
160
  raise ValueError(f"Length for file {file_name} must be >= 1, got {length}")
161
161
 
162
- # Convert to 0-indexed for LineChunker
163
- start = (offset - 1) if offset is not None else None
162
+ # Use offset directly as it's already 0-indexed
163
+ start = offset if offset is not None else None
164
164
  if start is not None and length is not None:
165
165
  end = start + length
166
166
  else:
@@ -193,7 +193,7 @@ class LettaFileToolExecutor(ToolExecutor):
193
193
  visible_content=visible_content,
194
194
  max_files_open=agent_state.max_files_open,
195
195
  start_line=start + 1 if start is not None else None, # convert to 1-indexed for user display
196
- end_line=end if end is not None else None, # end is already exclusive in slicing, so this is correct
196
+ end_line=end if end is not None else None, # end is already exclusive, shows as 1-indexed inclusive
197
197
  )
198
198
 
199
199
  opened_files.append(file_name)
@@ -220,10 +220,14 @@ class LettaFileToolExecutor(ToolExecutor):
220
220
  for req in file_requests:
221
221
  previous_info = format_previous_range(req.file_name)
222
222
  if req.offset is not None and req.length is not None:
223
- end_line = req.offset + req.length - 1
224
- file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line}){previous_info}")
223
+ # Display as 1-indexed for user readability: (offset+1) to (offset+length)
224
+ start_line = req.offset + 1
225
+ end_line = req.offset + req.length
226
+ file_summaries.append(f"{req.file_name} (lines {start_line}-{end_line}){previous_info}")
225
227
  elif req.offset is not None:
226
- file_summaries.append(f"{req.file_name} (lines {req.offset}-end){previous_info}")
228
+ # Display as 1-indexed
229
+ start_line = req.offset + 1
230
+ file_summaries.append(f"{req.file_name} (lines {start_line}-end){previous_info}")
227
231
  else:
228
232
  file_summaries.append(f"{req.file_name}{previous_info}")
229
233