letta-nightly 0.8.8.dev20250703104323__py3-none-any.whl → 0.8.8.dev20250703174903__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/agent.py +1 -0
  2. letta/agents/base_agent.py +8 -2
  3. letta/agents/ephemeral_summary_agent.py +33 -33
  4. letta/agents/letta_agent.py +104 -53
  5. letta/agents/voice_agent.py +2 -1
  6. letta/constants.py +8 -4
  7. letta/functions/function_sets/files.py +22 -7
  8. letta/functions/function_sets/multi_agent.py +34 -0
  9. letta/functions/types.py +1 -1
  10. letta/groups/helpers.py +8 -5
  11. letta/groups/sleeptime_multi_agent_v2.py +20 -15
  12. letta/interface.py +1 -1
  13. letta/interfaces/anthropic_streaming_interface.py +15 -8
  14. letta/interfaces/openai_chat_completions_streaming_interface.py +9 -6
  15. letta/interfaces/openai_streaming_interface.py +17 -11
  16. letta/llm_api/openai_client.py +2 -1
  17. letta/orm/agent.py +1 -0
  18. letta/orm/file.py +8 -2
  19. letta/orm/files_agents.py +36 -11
  20. letta/orm/mcp_server.py +3 -0
  21. letta/orm/source.py +2 -1
  22. letta/orm/step.py +3 -0
  23. letta/prompts/system/memgpt_v2_chat.txt +5 -8
  24. letta/schemas/agent.py +58 -23
  25. letta/schemas/embedding_config.py +3 -2
  26. letta/schemas/enums.py +4 -0
  27. letta/schemas/file.py +1 -0
  28. letta/schemas/letta_stop_reason.py +18 -0
  29. letta/schemas/mcp.py +15 -10
  30. letta/schemas/memory.py +35 -5
  31. letta/schemas/providers.py +11 -0
  32. letta/schemas/step.py +1 -0
  33. letta/schemas/tool.py +2 -1
  34. letta/server/rest_api/routers/v1/agents.py +320 -184
  35. letta/server/rest_api/routers/v1/groups.py +6 -2
  36. letta/server/rest_api/routers/v1/identities.py +6 -2
  37. letta/server/rest_api/routers/v1/jobs.py +49 -1
  38. letta/server/rest_api/routers/v1/sources.py +28 -19
  39. letta/server/rest_api/routers/v1/steps.py +7 -2
  40. letta/server/rest_api/routers/v1/tools.py +40 -9
  41. letta/server/rest_api/streaming_response.py +88 -0
  42. letta/server/server.py +61 -55
  43. letta/services/agent_manager.py +28 -16
  44. letta/services/file_manager.py +58 -9
  45. letta/services/file_processor/chunker/llama_index_chunker.py +2 -0
  46. letta/services/file_processor/embedder/openai_embedder.py +54 -10
  47. letta/services/file_processor/file_processor.py +59 -0
  48. letta/services/file_processor/parser/mistral_parser.py +2 -0
  49. letta/services/files_agents_manager.py +120 -2
  50. letta/services/helpers/agent_manager_helper.py +21 -4
  51. letta/services/job_manager.py +57 -6
  52. letta/services/mcp/base_client.py +1 -0
  53. letta/services/mcp_manager.py +13 -1
  54. letta/services/step_manager.py +14 -5
  55. letta/services/summarizer/summarizer.py +6 -22
  56. letta/services/tool_executor/builtin_tool_executor.py +0 -1
  57. letta/services/tool_executor/files_tool_executor.py +2 -2
  58. letta/services/tool_executor/multi_agent_tool_executor.py +23 -0
  59. letta/services/tool_manager.py +7 -7
  60. letta/settings.py +11 -2
  61. letta/templates/summary_request_text.j2 +19 -0
  62. letta/utils.py +95 -14
  63. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/METADATA +2 -2
  64. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/RECORD +68 -67
  65. /letta/{agents/prompts → prompts/system}/summary_system_prompt.txt +0 -0
  66. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.8.8.dev20250703104323.dist-info → letta_nightly-0.8.8.dev20250703174903.dist-info}/entry_points.txt +0 -0
letta/agent.py CHANGED
@@ -990,6 +990,7 @@ class Agent(BaseAgent):
990
990
  ),
991
991
  job_id=job_id,
992
992
  step_id=step_id,
993
+ project_id=self.agent_state.project_id,
993
994
  )
994
995
  for message in all_new_messages:
995
996
  message.step_id = step.id
@@ -67,7 +67,8 @@ class BaseAgent(ABC):
67
67
  """
68
68
  raise NotImplementedError
69
69
 
70
- def pre_process_input_message(self, input_messages: List[MessageCreate]) -> Any:
70
+ @staticmethod
71
+ def pre_process_input_message(input_messages: List[MessageCreate]) -> Any:
71
72
  """
72
73
  Pre-process function to run on the input_message.
73
74
  """
@@ -97,9 +98,13 @@ class BaseAgent(ABC):
97
98
  # [DB Call] loading blocks (modifies: agent_state.memory.blocks)
98
99
  await self.agent_manager.refresh_memory_async(agent_state=agent_state, actor=self.actor)
99
100
 
101
+ tool_constraint_block = None
102
+ if tool_rules_solver is not None:
103
+ tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
104
+
100
105
  # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
101
106
  curr_system_message = in_context_messages[0]
102
- curr_memory_str = agent_state.memory.compile()
107
+ curr_memory_str = agent_state.memory.compile(tool_usage_rules=tool_constraint_block, sources=agent_state.sources)
103
108
  curr_system_message_text = curr_system_message.content[0].text
104
109
  if curr_memory_str in curr_system_message_text:
105
110
  logger.debug(
@@ -124,6 +129,7 @@ class BaseAgent(ABC):
124
129
  previous_message_count=num_messages - len(in_context_messages),
125
130
  archival_memory_size=num_archival_memories,
126
131
  tool_rules_solver=tool_rules_solver,
132
+ sources=agent_state.sources,
127
133
  )
128
134
 
129
135
  diff = united_diff(curr_system_message_text, new_system_message_str)
@@ -1,27 +1,28 @@
1
- from pathlib import Path
2
- from typing import AsyncGenerator, Dict, List
3
-
4
- from openai import AsyncOpenAI
1
+ from typing import AsyncGenerator, List
5
2
 
6
3
  from letta.agents.base_agent import BaseAgent
7
4
  from letta.constants import DEFAULT_MAX_STEPS
5
+ from letta.helpers.message_helper import convert_message_creates_to_messages
6
+ from letta.llm_api.llm_client import LLMClient
7
+ from letta.log import get_logger
8
8
  from letta.orm.errors import NoResultFound
9
+ from letta.prompts.gpt_system import get_system_text
9
10
  from letta.schemas.block import Block, BlockUpdate
10
11
  from letta.schemas.enums import MessageRole
11
12
  from letta.schemas.letta_message_content import TextContent
12
13
  from letta.schemas.message import Message, MessageCreate
13
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
14
14
  from letta.schemas.user import User
15
15
  from letta.services.agent_manager import AgentManager
16
16
  from letta.services.block_manager import BlockManager
17
17
  from letta.services.message_manager import MessageManager
18
18
 
19
+ logger = get_logger(__name__)
20
+
19
21
 
20
22
  class EphemeralSummaryAgent(BaseAgent):
21
23
  """
22
- A stateless summarization agent (thin wrapper around OpenAI)
23
-
24
- # TODO: Extend to more clients
24
+ A stateless summarization agent that utilizes the caller's LLM client to summarize the conversation.
25
+ TODO (cliandy): allow the summarizer to use another llm_config from the main agent maybe?
25
26
  """
26
27
 
27
28
  def __init__(
@@ -35,7 +36,7 @@ class EphemeralSummaryAgent(BaseAgent):
35
36
  ):
36
37
  super().__init__(
37
38
  agent_id=agent_id,
38
- openai_client=AsyncOpenAI(),
39
+ openai_client=None,
39
40
  message_manager=message_manager,
40
41
  agent_manager=agent_manager,
41
42
  actor=actor,
@@ -65,17 +66,33 @@ class EphemeralSummaryAgent(BaseAgent):
65
66
  input_message = input_messages[0]
66
67
  input_message.content[0].text += f"\n\n--- Previous Summary ---\n{block.value}\n"
67
68
 
68
- openai_messages = self.pre_process_input_message(input_messages=input_messages)
69
- request = self._build_openai_request(openai_messages)
69
+ # Gets the LLMCLient based on the calling agent's LLM Config
70
+ agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
71
+ llm_client = LLMClient.create(
72
+ provider_type=agent_state.llm_config.model_endpoint_type,
73
+ put_inner_thoughts_first=True,
74
+ actor=self.actor,
75
+ )
70
76
 
71
- # TODO: Extend to generic client
72
- chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
73
- summary = chat_completion.choices[0].message.content.strip()
77
+ system_message_create = MessageCreate(
78
+ role=MessageRole.system,
79
+ content=[TextContent(text=get_system_text("summary_system_prompt"))],
80
+ )
81
+ messages = convert_message_creates_to_messages(
82
+ message_creates=[system_message_create] + input_messages,
83
+ agent_id=self.agent_id,
84
+ timezone=agent_state.timezone,
85
+ )
86
+
87
+ request_data = llm_client.build_request_data(messages, agent_state.llm_config, tools=[])
88
+ response_data = await llm_client.request_async(request_data, agent_state.llm_config)
89
+ response = llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
90
+ summary = response.choices[0].message.content.strip()
74
91
 
75
92
  await self.block_manager.update_block_async(block_id=block.id, block_update=BlockUpdate(value=summary), actor=self.actor)
76
93
 
77
- print(block)
78
- print(summary)
94
+ logger.debug("block:", block)
95
+ logger.debug("summary:", summary)
79
96
 
80
97
  return [
81
98
  Message(
@@ -84,22 +101,5 @@ class EphemeralSummaryAgent(BaseAgent):
84
101
  )
85
102
  ]
86
103
 
87
- def _build_openai_request(self, openai_messages: List[Dict]) -> ChatCompletionRequest:
88
- current_dir = Path(__file__).parent
89
- file_path = current_dir / "prompts" / "summary_system_prompt.txt"
90
- with open(file_path, "r") as file:
91
- system = file.read()
92
-
93
- system_message = [{"role": "system", "content": system}]
94
-
95
- openai_request = ChatCompletionRequest(
96
- model="gpt-4o",
97
- messages=system_message + openai_messages,
98
- user=self.actor.id,
99
- max_completion_tokens=4096,
100
- temperature=0.7,
101
- )
102
- return openai_request
103
-
104
104
  async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
105
105
  raise NotImplementedError("EphemeralAgent does not support async step.")
@@ -1,8 +1,9 @@
1
1
  import asyncio
2
2
  import json
3
3
  import uuid
4
+ from collections.abc import AsyncGenerator
4
5
  from datetime import datetime
5
- from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
6
+ from typing import Optional
6
7
 
7
8
  from openai import AsyncStream
8
9
  from openai.types.chat import ChatCompletionChunk
@@ -34,7 +35,7 @@ from letta.otel.context import get_ctx_attributes
34
35
  from letta.otel.metric_registry import MetricRegistry
35
36
  from letta.otel.tracing import log_event, trace_method, tracer
36
37
  from letta.schemas.agent import AgentState, UpdateAgent
37
- from letta.schemas.enums import MessageRole, ProviderType
38
+ from letta.schemas.enums import JobStatus, MessageRole, ProviderType
38
39
  from letta.schemas.letta_message import MessageType
39
40
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
40
41
  from letta.schemas.letta_response import LettaResponse
@@ -58,14 +59,17 @@ from letta.services.summarizer.enums import SummarizationMode
58
59
  from letta.services.summarizer.summarizer import Summarizer
59
60
  from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
60
61
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
61
- from letta.settings import model_settings
62
+ from letta.settings import model_settings, settings, summarizer_settings
62
63
  from letta.system import package_function_response
63
64
  from letta.types import JsonDict
64
65
  from letta.utils import log_telemetry, validate_function_response
65
66
 
67
+ logger = get_logger(__name__)
68
+
69
+ DEFAULT_SUMMARY_BLOCK_LABEL = "conversation_summary"
66
70
 
67
- class LettaAgent(BaseAgent):
68
71
 
72
+ class LettaAgent(BaseAgent):
69
73
  def __init__(
70
74
  self,
71
75
  agent_id: str,
@@ -77,11 +81,12 @@ class LettaAgent(BaseAgent):
77
81
  actor: User,
78
82
  step_manager: StepManager = NoopStepManager(),
79
83
  telemetry_manager: TelemetryManager = NoopTelemetryManager(),
80
- summary_block_label: str = "conversation_summary",
81
- message_buffer_limit: int = 60, # TODO: Make this configurable
82
- message_buffer_min: int = 15, # TODO: Make this configurable
83
- enable_summarization: bool = True, # TODO: Make this configurable
84
- max_summarization_retries: int = 3, # TODO: Make this configurable
84
+ current_run_id: str | None = None,
85
+ summary_block_label: str = DEFAULT_SUMMARY_BLOCK_LABEL,
86
+ message_buffer_limit: int = summarizer_settings.message_buffer_limit,
87
+ message_buffer_min: int = summarizer_settings.message_buffer_min,
88
+ enable_summarization: bool = summarizer_settings.enable_summarization,
89
+ max_summarization_retries: int = summarizer_settings.max_summarization_retries,
85
90
  ):
86
91
  super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
87
92
 
@@ -92,7 +97,9 @@ class LettaAgent(BaseAgent):
92
97
  self.passage_manager = passage_manager
93
98
  self.step_manager = step_manager
94
99
  self.telemetry_manager = telemetry_manager
95
- self.response_messages: List[Message] = []
100
+ self.job_manager = job_manager
101
+ self.current_run_id = current_run_id
102
+ self.response_messages: list[Message] = []
96
103
 
97
104
  self.last_function_response = None
98
105
 
@@ -117,23 +124,42 @@ class LettaAgent(BaseAgent):
117
124
  )
118
125
 
119
126
  self.summarizer = Summarizer(
120
- mode=SummarizationMode.STATIC_MESSAGE_BUFFER,
127
+ mode=SummarizationMode(summarizer_settings.mode),
121
128
  summarizer_agent=self.summarization_agent,
122
129
  # TODO: Make this configurable
123
130
  message_buffer_limit=message_buffer_limit,
124
131
  message_buffer_min=message_buffer_min,
125
132
  )
126
133
 
134
+ async def _check_run_cancellation(self) -> bool:
135
+ """
136
+ Check if the current run associated with this agent execution has been cancelled.
137
+
138
+ Returns:
139
+ True if the run is cancelled, False otherwise (or if no run is associated)
140
+ """
141
+ if not self.job_manager or not self.current_run_id:
142
+ return False
143
+
144
+ try:
145
+ job = await self.job_manager.get_job_by_id_async(job_id=self.current_run_id, actor=self.actor)
146
+ return job.status == JobStatus.cancelled
147
+ except Exception as e:
148
+ # Log the error but don't fail the execution
149
+ logger.warning(f"Failed to check job cancellation status for job {self.current_run_id}: {e}")
150
+ return False
151
+
127
152
  @trace_method
128
153
  async def step(
129
154
  self,
130
- input_messages: List[MessageCreate],
155
+ input_messages: list[MessageCreate],
131
156
  max_steps: int = DEFAULT_MAX_STEPS,
132
- run_id: Optional[str] = None,
157
+ run_id: str | None = None,
133
158
  use_assistant_message: bool = True,
134
- request_start_timestamp_ns: Optional[int] = None,
135
- include_return_message_types: Optional[List[MessageType]] = None,
159
+ request_start_timestamp_ns: int | None = None,
160
+ include_return_message_types: list[MessageType] | None = None,
136
161
  ) -> LettaResponse:
162
+ # TODO (cliandy): pass in run_id and use at send_message endpoints for all step functions
137
163
  agent_state = await self.agent_manager.get_agent_by_id_async(
138
164
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
139
165
  )
@@ -155,11 +181,11 @@ class LettaAgent(BaseAgent):
155
181
  @trace_method
156
182
  async def step_stream_no_tokens(
157
183
  self,
158
- input_messages: List[MessageCreate],
184
+ input_messages: list[MessageCreate],
159
185
  max_steps: int = DEFAULT_MAX_STEPS,
160
186
  use_assistant_message: bool = True,
161
- request_start_timestamp_ns: Optional[int] = None,
162
- include_return_message_types: Optional[List[MessageType]] = None,
187
+ request_start_timestamp_ns: int | None = None,
188
+ include_return_message_types: list[MessageType] | None = None,
163
189
  ):
164
190
  agent_state = await self.agent_manager.get_agent_by_id_async(
165
191
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
@@ -182,6 +208,13 @@ class LettaAgent(BaseAgent):
182
208
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
183
209
 
184
210
  for i in range(max_steps):
211
+ # Check for job cancellation at the start of each step
212
+ if await self._check_run_cancellation():
213
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
214
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
215
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
216
+ break
217
+
185
218
  step_id = generate_step_id()
186
219
  step_start = get_utc_timestamp_ns()
187
220
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
@@ -313,11 +346,11 @@ class LettaAgent(BaseAgent):
313
346
  async def _step(
314
347
  self,
315
348
  agent_state: AgentState,
316
- input_messages: List[MessageCreate],
349
+ input_messages: list[MessageCreate],
317
350
  max_steps: int = DEFAULT_MAX_STEPS,
318
- run_id: Optional[str] = None,
319
- request_start_timestamp_ns: Optional[int] = None,
320
- ) -> Tuple[List[Message], List[Message], Optional[LettaStopReason], LettaUsageStatistics]:
351
+ run_id: str | None = None,
352
+ request_start_timestamp_ns: int | None = None,
353
+ ) -> tuple[list[Message], list[Message], LettaStopReason | None, LettaUsageStatistics]:
321
354
  """
322
355
  Carries out an invocation of the agent loop. In each step, the agent
323
356
  1. Rebuilds its memory
@@ -343,6 +376,12 @@ class LettaAgent(BaseAgent):
343
376
  stop_reason = None
344
377
  usage = LettaUsageStatistics()
345
378
  for i in range(max_steps):
379
+ # Check for job cancellation at the start of each step
380
+ if await self._check_run_cancellation():
381
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
382
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
383
+ break
384
+
346
385
  step_id = generate_step_id()
347
386
  step_start = get_utc_timestamp_ns()
348
387
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
@@ -425,7 +464,7 @@ class LettaAgent(BaseAgent):
425
464
  ),
426
465
  )
427
466
 
428
- MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
467
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
429
468
 
430
469
  if not should_continue:
431
470
  break
@@ -455,6 +494,8 @@ class LettaAgent(BaseAgent):
455
494
  return current_in_context_messages, new_in_context_messages, stop_reason, usage
456
495
 
457
496
  async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
497
+ if not settings.track_last_agent_run:
498
+ return
458
499
  try:
459
500
  await self.agent_manager.update_agent_async(
460
501
  agent_id=self.agent_id,
@@ -467,11 +508,11 @@ class LettaAgent(BaseAgent):
467
508
  @trace_method
468
509
  async def step_stream(
469
510
  self,
470
- input_messages: List[MessageCreate],
511
+ input_messages: list[MessageCreate],
471
512
  max_steps: int = DEFAULT_MAX_STEPS,
472
513
  use_assistant_message: bool = True,
473
- request_start_timestamp_ns: Optional[int] = None,
474
- include_return_message_types: Optional[List[MessageType]] = None,
514
+ request_start_timestamp_ns: int | None = None,
515
+ include_return_message_types: list[MessageType] | None = None,
475
516
  ) -> AsyncGenerator[str, None]:
476
517
  """
477
518
  Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
@@ -503,6 +544,13 @@ class LettaAgent(BaseAgent):
503
544
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
504
545
 
505
546
  for i in range(max_steps):
547
+ # Check for job cancellation at the start of each step
548
+ if await self._check_run_cancellation():
549
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
550
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
551
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
552
+ break
553
+
506
554
  step_id = generate_step_id()
507
555
  step_start = get_utc_timestamp_ns()
508
556
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
@@ -543,7 +591,9 @@ class LettaAgent(BaseAgent):
543
591
  raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
544
592
 
545
593
  async for chunk in interface.process(
546
- stream, ttft_span=request_span, provider_request_start_timestamp_ns=provider_request_start_timestamp_ns
594
+ stream,
595
+ ttft_span=request_span,
596
+ provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
547
597
  ):
548
598
  # Measure time to first token
549
599
  if first_chunk and request_span is not None:
@@ -653,7 +703,7 @@ class LettaAgent(BaseAgent):
653
703
  yield f"data: {tool_return.model_dump_json()}\n\n"
654
704
 
655
705
  # TODO (cliandy): consolidate and expand with trace
656
- MetricRegistry().step_execution_time_ms_histogram.record(step_start - get_utc_timestamp_ns(), get_ctx_attributes())
706
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
657
707
 
658
708
  if not should_continue:
659
709
  break
@@ -686,13 +736,13 @@ class LettaAgent(BaseAgent):
686
736
  # noinspection PyInconsistentReturns
687
737
  async def _build_and_request_from_llm(
688
738
  self,
689
- current_in_context_messages: List[Message],
690
- new_in_context_messages: List[Message],
739
+ current_in_context_messages: list[Message],
740
+ new_in_context_messages: list[Message],
691
741
  agent_state: AgentState,
692
742
  llm_client: LLMClientBase,
693
743
  tool_rules_solver: ToolRulesSolver,
694
744
  agent_step_span: "Span",
695
- ) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]] | None:
745
+ ) -> tuple[dict, dict, list[Message], list[Message], list[str]] | None:
696
746
  for attempt in range(self.max_summarization_retries + 1):
697
747
  try:
698
748
  log_event("agent.stream_no_tokens.messages.refreshed")
@@ -738,12 +788,12 @@ class LettaAgent(BaseAgent):
738
788
  first_chunk: bool,
739
789
  ttft_span: "Span",
740
790
  request_start_timestamp_ns: int,
741
- current_in_context_messages: List[Message],
742
- new_in_context_messages: List[Message],
791
+ current_in_context_messages: list[Message],
792
+ new_in_context_messages: list[Message],
743
793
  agent_state: AgentState,
744
794
  llm_client: LLMClientBase,
745
795
  tool_rules_solver: ToolRulesSolver,
746
- ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str], int] | None:
796
+ ) -> tuple[dict, AsyncStream[ChatCompletionChunk], list[Message], list[Message], list[str], int] | None:
747
797
  for attempt in range(self.max_summarization_retries + 1):
748
798
  try:
749
799
  log_event("agent.stream_no_tokens.messages.refreshed")
@@ -795,11 +845,11 @@ class LettaAgent(BaseAgent):
795
845
  self,
796
846
  e: Exception,
797
847
  llm_client: LLMClientBase,
798
- in_context_messages: List[Message],
799
- new_letta_messages: List[Message],
848
+ in_context_messages: list[Message],
849
+ new_letta_messages: list[Message],
800
850
  llm_config: LLMConfig,
801
851
  force: bool,
802
- ) -> List[Message]:
852
+ ) -> list[Message]:
803
853
  if isinstance(e, ContextWindowExceededError):
804
854
  return await self._rebuild_context_window(
805
855
  in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
@@ -810,12 +860,12 @@ class LettaAgent(BaseAgent):
810
860
  @trace_method
811
861
  async def _rebuild_context_window(
812
862
  self,
813
- in_context_messages: List[Message],
814
- new_letta_messages: List[Message],
863
+ in_context_messages: list[Message],
864
+ new_letta_messages: list[Message],
815
865
  llm_config: LLMConfig,
816
- total_tokens: Optional[int] = None,
866
+ total_tokens: int | None = None,
817
867
  force: bool = False,
818
- ) -> List[Message]:
868
+ ) -> list[Message]:
819
869
  # If total tokens is reached, we truncate down
820
870
  # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
821
871
  if force or (total_tokens and total_tokens > llm_config.context_window):
@@ -851,10 +901,10 @@ class LettaAgent(BaseAgent):
851
901
  async def _create_llm_request_data_async(
852
902
  self,
853
903
  llm_client: LLMClientBase,
854
- in_context_messages: List[Message],
904
+ in_context_messages: list[Message],
855
905
  agent_state: AgentState,
856
906
  tool_rules_solver: ToolRulesSolver,
857
- ) -> Tuple[dict, List[str]]:
907
+ ) -> tuple[dict, list[str]]:
858
908
  self.num_messages, self.num_archival_memories = await asyncio.gather(
859
909
  (
860
910
  self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
@@ -925,18 +975,18 @@ class LettaAgent(BaseAgent):
925
975
  async def _handle_ai_response(
926
976
  self,
927
977
  tool_call: ToolCall,
928
- valid_tool_names: List[str],
978
+ valid_tool_names: list[str],
929
979
  agent_state: AgentState,
930
980
  tool_rules_solver: ToolRulesSolver,
931
981
  usage: UsageStatistics,
932
- reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
933
- pre_computed_assistant_message_id: Optional[str] = None,
982
+ reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
983
+ pre_computed_assistant_message_id: str | None = None,
934
984
  step_id: str | None = None,
935
- initial_messages: Optional[List[Message]] = None,
985
+ initial_messages: list[Message] | None = None,
936
986
  agent_step_span: Optional["Span"] = None,
937
- is_final_step: Optional[bool] = None,
938
- run_id: Optional[str] = None,
939
- ) -> Tuple[List[Message], bool, Optional[LettaStopReason]]:
987
+ is_final_step: bool | None = None,
988
+ run_id: str | None = None,
989
+ ) -> tuple[list[Message], bool, LettaStopReason | None]:
940
990
  """
941
991
  Handle the final AI response once streaming completes, execute / validate the
942
992
  tool call, decide whether we should keep stepping, and persist state.
@@ -1012,8 +1062,9 @@ class LettaAgent(BaseAgent):
1012
1062
  context_window_limit=agent_state.llm_config.context_window,
1013
1063
  usage=usage,
1014
1064
  provider_id=None,
1015
- job_id=run_id,
1065
+ job_id=run_id if run_id else self.current_run_id,
1016
1066
  step_id=step_id,
1067
+ project_id=agent_state.project_id,
1017
1068
  )
1018
1069
 
1019
1070
  tool_call_messages = create_letta_messages_from_llm_response(
@@ -1150,7 +1201,7 @@ class LettaAgent(BaseAgent):
1150
1201
  name="tool_execution_completed",
1151
1202
  attributes={
1152
1203
  "tool_name": target_tool.name,
1153
- "duration_ms": ns_to_ms((end_time - start_time)),
1204
+ "duration_ms": ns_to_ms(end_time - start_time),
1154
1205
  "success": tool_execution_result.success_flag,
1155
1206
  "tool_type": target_tool.tool_type,
1156
1207
  "tool_id": target_tool.id,
@@ -1160,7 +1211,7 @@ class LettaAgent(BaseAgent):
1160
1211
  return tool_execution_result
1161
1212
 
1162
1213
  @trace_method
1163
- def _load_last_function_response(self, in_context_messages: List[Message]):
1214
+ def _load_last_function_response(self, in_context_messages: list[Message]):
1164
1215
  """Load the last function response from message history"""
1165
1216
  for msg in reversed(in_context_messages):
1166
1217
  if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
@@ -153,6 +153,7 @@ class VoiceAgent(BaseAgent):
153
153
  timezone=agent_state.timezone,
154
154
  previous_message_count=self.num_messages,
155
155
  archival_memory_size=self.num_archival_memories,
156
+ sources=agent_state.sources,
156
157
  )
157
158
  letta_message_db_queue = create_input_messages(
158
159
  input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=self.actor
@@ -366,7 +367,7 @@ class VoiceAgent(BaseAgent):
366
367
  "description": (
367
368
  "Look in long-term or earlier-conversation memory **only when** the "
368
369
  "user asks about something missing from the visible context. "
369
- "The users latest utterance is sent automatically as the main query.\n\n"
370
+ "The user's latest utterance is sent automatically as the main query.\n\n"
370
371
  "Optional refinements (set unused fields to *null*):\n"
371
372
  "• `convo_keyword_queries` – extra names/IDs if the request is vague.\n"
372
373
  "• `start_minutes_ago` / `end_minutes_ago` – limit results to a recent time window."
letta/constants.py CHANGED
@@ -83,7 +83,7 @@ SEND_MESSAGE_TOOL_NAME = "send_message"
83
83
  # Base tools that cannot be edited, as they access agent state directly
84
84
  # Note that we don't include "conversation_search_date" for now
85
85
  BASE_TOOLS = [SEND_MESSAGE_TOOL_NAME, "conversation_search", "archival_memory_insert", "archival_memory_search"]
86
- DEPRECATED_BASE_TOOLS = ["archival_memory_insert", "archival_memory_search"]
86
+ DEPRECATED_LETTA_TOOLS = ["archival_memory_insert", "archival_memory_search"]
87
87
  # Base memory tools CAN be edited, and are added by default by the server
88
88
  BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
89
89
  # New v2 collection of the base memory tools (effecitvely same as sleeptime set), to pair with memgpt_v2 prompt
@@ -115,7 +115,8 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
115
115
  "finish_rethinking_memory",
116
116
  ]
117
117
  # Multi agent tools
118
- MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags"]
118
+ MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
119
+ LOCAL_ONLY_MULTI_AGENT_TOOLS = ["send_message_to_agent_async"]
119
120
 
120
121
  # Used to catch if line numbers are pushed in
121
122
  # MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(r"^Line \d+: ", re.MULTILINE)
@@ -130,7 +131,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
130
131
  BUILTIN_TOOLS = ["run_code", "web_search"]
131
132
 
132
133
  # Built in tools
133
- FILES_TOOLS = ["open_files", "grep_files", "search_files"]
134
+ FILES_TOOLS = ["open_files", "grep_files", "semantic_search_files"]
134
135
 
135
136
  FILE_MEMORY_EXISTS_MESSAGE = "The following files are currently accessible in memory:"
136
137
  FILE_MEMORY_EMPTY_MESSAGE = (
@@ -325,7 +326,7 @@ MAX_ERROR_MESSAGE_CHAR_LIMIT = 500
325
326
  CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 5000
326
327
  CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 5000
327
328
  CORE_MEMORY_BLOCK_CHAR_LIMIT: int = 5000
328
- CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 5000
329
+ CORE_MEMORY_SOURCE_CHAR_LIMIT: int = 50000
329
330
  # Function return limits
330
331
  FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words
331
332
  BASE_FUNCTION_RETURN_CHAR_LIMIT = 1000000 # very high (we rely on implementation)
@@ -357,6 +358,9 @@ REDIS_INCLUDE = "include"
357
358
  REDIS_EXCLUDE = "exclude"
358
359
  REDIS_SET_DEFAULT_VAL = "None"
359
360
  REDIS_DEFAULT_CACHE_PREFIX = "letta_cache"
361
+ REDIS_RUN_ID_PREFIX = "agent:send_message:run_id"
360
362
 
361
363
  # TODO: This is temporary, eventually use token-based eviction
362
364
  MAX_FILES_OPEN = 5
365
+
366
+ GET_PROVIDERS_TIMEOUT_SECONDS = 10
@@ -10,15 +10,20 @@ if TYPE_CHECKING:
10
10
  async def open_files(agent_state: "AgentState", file_requests: List[FileOpenRequest], close_all_others: bool = False) -> str:
11
11
  """Open one or more files and load their contents into files section in core memory. Maximum of 5 files can be opened simultaneously.
12
12
 
13
+ Use this when you want to:
14
+ - Inspect or reference file contents during reasoning
15
+ - View specific portions of large files (e.g. functions or definitions)
16
+ - Replace currently open files with a new set for focused context (via `close_all_others=True`)
17
+
13
18
  Examples:
14
- Open single file (entire content):
15
- file_requests = [FileOpenRequest(file_name="config.py")]
19
+ Open single file belonging to a directory named `project_utils` (entire content):
20
+ file_requests = [FileOpenRequest(file_name="project_utils/config.py")]
16
21
 
17
22
  Open multiple files with different view ranges:
18
23
  file_requests = [
19
- FileOpenRequest(file_name="config.py", offset=1, length=50), # Lines 1-50
20
- FileOpenRequest(file_name="main.py", offset=100, length=100), # Lines 100-199
21
- FileOpenRequest(file_name="utils.py") # Entire file
24
+ FileOpenRequest(file_name="project_utils/config.py", offset=1, length=50), # Lines 1-50
25
+ FileOpenRequest(file_name="project_utils/main.py", offset=100, length=100), # Lines 100-199
26
+ FileOpenRequest(file_name="project_utils/utils.py") # Entire file
22
27
  ]
23
28
 
24
29
  Close all other files and open new ones:
@@ -43,6 +48,11 @@ async def grep_files(
43
48
  """
44
49
  Grep tool to search files across data sources using a keyword or regex pattern.
45
50
 
51
+ Use this when you want to:
52
+ - Quickly find occurrences of a variable, function, or keyword
53
+ - Locate log messages, error codes, or TODOs across files
54
+ - Understand surrounding code by including `context_lines`
55
+
46
56
  Args:
47
57
  pattern (str): Keyword or regex pattern to search within file contents.
48
58
  include (Optional[str]): Optional keyword or regex pattern to filter filenames to include in the search.
@@ -55,9 +65,14 @@ async def grep_files(
55
65
  raise NotImplementedError("Tool not implemented. Please contact the Letta team.")
56
66
 
57
67
 
58
- async def search_files(agent_state: "AgentState", query: str) -> List["FileMetadata"]:
68
+ async def semantic_search_files(agent_state: "AgentState", query: str) -> List["FileMetadata"]:
59
69
  """
60
- Get list of most relevant files across all data sources using embedding search.
70
+ Get list of most relevant chunks from any file using vector/embedding search.
71
+
72
+ Use this when you want to:
73
+ - Find related content that without using exact keywords (e.g., conceptually similar sections)
74
+ - Look up high-level descriptions, documentation, or config patterns
75
+ - Perform fuzzy search when grep isn't sufficient
61
76
 
62
77
  Args:
63
78
  query (str): The search query.