letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250613104250__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +11 -1
  3. letta/agents/base_agent.py +11 -4
  4. letta/agents/ephemeral_summary_agent.py +3 -2
  5. letta/agents/letta_agent.py +109 -78
  6. letta/agents/letta_agent_batch.py +4 -3
  7. letta/agents/voice_agent.py +3 -3
  8. letta/agents/voice_sleeptime_agent.py +3 -2
  9. letta/client/client.py +6 -3
  10. letta/constants.py +6 -0
  11. letta/data_sources/connectors.py +3 -5
  12. letta/functions/async_composio_toolset.py +4 -1
  13. letta/functions/function_sets/files.py +4 -3
  14. letta/functions/schema_generator.py +5 -2
  15. letta/groups/sleeptime_multi_agent_v2.py +4 -3
  16. letta/helpers/converters.py +7 -1
  17. letta/helpers/message_helper.py +31 -11
  18. letta/helpers/tool_rule_solver.py +69 -4
  19. letta/interfaces/anthropic_streaming_interface.py +8 -1
  20. letta/interfaces/openai_streaming_interface.py +4 -1
  21. letta/llm_api/anthropic_client.py +4 -4
  22. letta/llm_api/openai_client.py +56 -11
  23. letta/local_llm/utils.py +3 -20
  24. letta/orm/sqlalchemy_base.py +7 -1
  25. letta/otel/metric_registry.py +26 -0
  26. letta/otel/metrics.py +78 -14
  27. letta/schemas/letta_message_content.py +64 -3
  28. letta/schemas/letta_request.py +5 -1
  29. letta/schemas/message.py +61 -14
  30. letta/schemas/openai/chat_completion_request.py +1 -1
  31. letta/schemas/providers.py +41 -14
  32. letta/schemas/tool_rule.py +67 -0
  33. letta/schemas/user.py +2 -2
  34. letta/server/rest_api/routers/v1/agents.py +22 -12
  35. letta/server/rest_api/routers/v1/sources.py +13 -25
  36. letta/server/server.py +10 -5
  37. letta/services/agent_manager.py +5 -1
  38. letta/services/file_manager.py +219 -0
  39. letta/services/file_processor/chunker/line_chunker.py +119 -14
  40. letta/services/file_processor/file_processor.py +8 -8
  41. letta/services/file_processor/file_types.py +303 -0
  42. letta/services/file_processor/parser/mistral_parser.py +2 -11
  43. letta/services/helpers/agent_manager_helper.py +6 -0
  44. letta/services/message_manager.py +32 -0
  45. letta/services/organization_manager.py +4 -6
  46. letta/services/passage_manager.py +1 -0
  47. letta/services/source_manager.py +0 -208
  48. letta/services/tool_executor/composio_tool_executor.py +5 -1
  49. letta/services/tool_executor/files_tool_executor.py +291 -15
  50. letta/services/user_manager.py +8 -8
  51. letta/system.py +3 -1
  52. letta/utils.py +7 -13
  53. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/METADATA +2 -2
  54. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/RECORD +57 -55
  55. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/LICENSE +0 -0
  56. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/WHEEL +0 -0
  57. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250613104250.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- __version__ = "0.8.3"
3
+ __version__ = "0.8.4"
4
4
 
5
5
  if os.environ.get("LETTA_VERSION"):
6
6
  __version__ = os.environ["LETTA_VERSION"]
letta/agent.py CHANGED
@@ -46,7 +46,7 @@ from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_
46
46
  from letta.schemas.block import BlockUpdate
47
47
  from letta.schemas.embedding_config import EmbeddingConfig
48
48
  from letta.schemas.enums import MessageRole, ProviderType
49
- from letta.schemas.letta_message_content import TextContent
49
+ from letta.schemas.letta_message_content import ImageContent, TextContent
50
50
  from letta.schemas.memory import ContextWindowOverview, Memory
51
51
  from letta.schemas.message import Message, MessageCreate, ToolReturn
52
52
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
@@ -369,6 +369,16 @@ class Agent(BaseAgent):
369
369
  )
370
370
  else:
371
371
  # Fallback to existing flow
372
+ for message in message_sequence:
373
+ if isinstance(message.content, list):
374
+
375
+ def get_fallback_text_content(content):
376
+ if isinstance(content, ImageContent):
377
+ return TextContent(text="[Image Here]")
378
+ return content
379
+
380
+ message.content = [get_fallback_text_content(content) for content in message.content]
381
+
372
382
  response = create(
373
383
  llm_config=self.agent_state.llm_config,
374
384
  messages=message_sequence,
@@ -3,6 +3,8 @@ from typing import Any, AsyncGenerator, List, Optional, Union
3
3
 
4
4
  import openai
5
5
 
6
+ from letta.constants import DEFAULT_MAX_STEPS
7
+ from letta.helpers import ToolRulesSolver
6
8
  from letta.helpers.datetime_helpers import get_utc_time
7
9
  from letta.log import get_logger
8
10
  from letta.schemas.agent import AgentState
@@ -15,6 +17,7 @@ from letta.schemas.user import User
15
17
  from letta.services.agent_manager import AgentManager
16
18
  from letta.services.helpers.agent_manager_helper import compile_system_message
17
19
  from letta.services.message_manager import MessageManager
20
+ from letta.services.passage_manager import PassageManager
18
21
  from letta.utils import united_diff
19
22
 
20
23
  logger = get_logger(__name__)
@@ -39,11 +42,13 @@ class BaseAgent(ABC):
39
42
  self.openai_client = openai_client
40
43
  self.message_manager = message_manager
41
44
  self.agent_manager = agent_manager
45
+ # TODO: Pass this in
46
+ self.passage_manager = PassageManager()
42
47
  self.actor = actor
43
48
  self.logger = get_logger(agent_id)
44
49
 
45
50
  @abstractmethod
46
- async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
51
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
47
52
  """
48
53
  Main execution loop for the agent.
49
54
  """
@@ -51,7 +56,7 @@ class BaseAgent(ABC):
51
56
 
52
57
  @abstractmethod
53
58
  async def step_stream(
54
- self, input_messages: List[MessageCreate], max_steps: int = 10
59
+ self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS
55
60
  ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
56
61
  """
57
62
  Main streaming execution loop for the agent.
@@ -77,8 +82,9 @@ class BaseAgent(ABC):
77
82
  self,
78
83
  in_context_messages: List[Message],
79
84
  agent_state: AgentState,
80
- num_messages: int | None = None, # storing these calculations is specific to the voice agent
81
- num_archival_memories: int | None = None,
85
+ tool_rules_solver: Optional[ToolRulesSolver] = None,
86
+ num_messages: Optional[int] = None, # storing these calculations is specific to the voice agent
87
+ num_archival_memories: Optional[int] = None,
82
88
  ) -> List[Message]:
83
89
  """
84
90
  Async version of function above. For now before breaking up components, changes should be made in both places.
@@ -112,6 +118,7 @@ class BaseAgent(ABC):
112
118
  in_context_memory_last_edit=memory_edit_timestamp,
113
119
  previous_message_count=num_messages,
114
120
  archival_memory_size=num_archival_memories,
121
+ tool_rules_solver=tool_rules_solver,
115
122
  )
116
123
 
117
124
  diff = united_diff(curr_system_message_text, new_system_message_str)
@@ -4,6 +4,7 @@ from typing import AsyncGenerator, Dict, List
4
4
  from openai import AsyncOpenAI
5
5
 
6
6
  from letta.agents.base_agent import BaseAgent
7
+ from letta.constants import DEFAULT_MAX_STEPS
7
8
  from letta.orm.errors import NoResultFound
8
9
  from letta.schemas.block import Block, BlockUpdate
9
10
  from letta.schemas.enums import MessageRole
@@ -42,7 +43,7 @@ class EphemeralSummaryAgent(BaseAgent):
42
43
  self.target_block_label = target_block_label
43
44
  self.block_manager = block_manager
44
45
 
45
- async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> List[Message]:
46
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> List[Message]:
46
47
  if len(input_messages) > 1:
47
48
  raise ValueError("Can only invoke EphemeralSummaryAgent with a single summarization message.")
48
49
 
@@ -100,5 +101,5 @@ class EphemeralSummaryAgent(BaseAgent):
100
101
  )
101
102
  return openai_request
102
103
 
103
- async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = 10) -> AsyncGenerator[str, None]:
104
+ async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
104
105
  raise NotImplementedError("EphemeralAgent does not support async step.")
@@ -8,12 +8,8 @@ from openai.types.chat import ChatCompletionChunk
8
8
 
9
9
  from letta.agents.base_agent import BaseAgent
10
10
  from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
11
- from letta.agents.helpers import (
12
- _create_letta_response,
13
- _prepare_in_context_messages_async,
14
- _prepare_in_context_messages_no_persist_async,
15
- generate_step_id,
16
- )
11
+ from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_no_persist_async, generate_step_id
12
+ from letta.constants import DEFAULT_MAX_STEPS
17
13
  from letta.errors import ContextWindowExceededError
18
14
  from letta.helpers import ToolRulesSolver
19
15
  from letta.helpers.datetime_helpers import AsyncTimer, get_utc_timestamp_ns, ns_to_ms
@@ -37,6 +33,7 @@ from letta.schemas.llm_config import LLMConfig
37
33
  from letta.schemas.message import Message, MessageCreate
38
34
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
39
35
  from letta.schemas.provider_trace import ProviderTraceCreate
36
+ from letta.schemas.tool_execution_result import ToolExecutionResult
40
37
  from letta.schemas.usage import LettaUsageStatistics
41
38
  from letta.schemas.user import User
42
39
  from letta.server.rest_api.utils import create_letta_messages_from_llm_response
@@ -119,7 +116,7 @@ class LettaAgent(BaseAgent):
119
116
  async def step(
120
117
  self,
121
118
  input_messages: List[MessageCreate],
122
- max_steps: int = 10,
119
+ max_steps: int = DEFAULT_MAX_STEPS,
123
120
  use_assistant_message: bool = True,
124
121
  request_start_timestamp_ns: Optional[int] = None,
125
122
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -144,7 +141,7 @@ class LettaAgent(BaseAgent):
144
141
  async def step_stream_no_tokens(
145
142
  self,
146
143
  input_messages: List[MessageCreate],
147
- max_steps: int = 10,
144
+ max_steps: int = DEFAULT_MAX_STEPS,
148
145
  use_assistant_message: bool = True,
149
146
  request_start_timestamp_ns: Optional[int] = None,
150
147
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -152,9 +149,10 @@ class LettaAgent(BaseAgent):
152
149
  agent_state = await self.agent_manager.get_agent_by_id_async(
153
150
  agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
154
151
  )
155
- current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
152
+ current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
156
153
  input_messages, agent_state, self.message_manager, self.actor
157
154
  )
155
+ initial_messages = new_in_context_messages
158
156
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
159
157
  llm_client = LLMClient.create(
160
158
  provider_type=agent_state.llm_config.model_endpoint_type,
@@ -167,18 +165,20 @@ class LettaAgent(BaseAgent):
167
165
  request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
168
166
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
169
167
 
170
- for _ in range(max_steps):
168
+ for i in range(max_steps):
171
169
  step_id = generate_step_id()
172
170
  step_start = get_utc_timestamp_ns()
173
171
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
174
172
  agent_step_span.set_attributes({"step_id": step_id})
175
173
 
176
- request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
177
- current_in_context_messages,
178
- new_in_context_messages,
179
- agent_state,
180
- llm_client,
181
- tool_rules_solver,
174
+ request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
175
+ await self._build_and_request_from_llm(
176
+ current_in_context_messages,
177
+ new_in_context_messages,
178
+ agent_state,
179
+ llm_client,
180
+ tool_rules_solver,
181
+ )
182
182
  )
183
183
  in_context_messages = current_in_context_messages + new_in_context_messages
184
184
 
@@ -223,14 +223,18 @@ class LettaAgent(BaseAgent):
223
223
 
224
224
  persisted_messages, should_continue = await self._handle_ai_response(
225
225
  tool_call,
226
+ valid_tool_names,
226
227
  agent_state,
227
228
  tool_rules_solver,
228
229
  response.usage,
229
230
  reasoning_content=reasoning,
231
+ initial_messages=initial_messages,
230
232
  agent_step_span=agent_step_span,
233
+ is_final_step=(i == max_steps - 1),
231
234
  )
232
235
  self.response_messages.extend(persisted_messages)
233
236
  new_in_context_messages.extend(persisted_messages)
237
+ initial_messages = None
234
238
  log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
235
239
 
236
240
  # log step time
@@ -291,7 +295,7 @@ class LettaAgent(BaseAgent):
291
295
  self,
292
296
  agent_state: AgentState,
293
297
  input_messages: List[MessageCreate],
294
- max_steps: int = 10,
298
+ max_steps: int = DEFAULT_MAX_STEPS,
295
299
  request_start_timestamp_ns: Optional[int] = None,
296
300
  ) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
297
301
  """
@@ -301,9 +305,10 @@ class LettaAgent(BaseAgent):
301
305
  3. Fetches a response from the LLM
302
306
  4. Processes the response
303
307
  """
304
- current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
308
+ current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
305
309
  input_messages, agent_state, self.message_manager, self.actor
306
310
  )
311
+ initial_messages = new_in_context_messages
307
312
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
308
313
  llm_client = LLMClient.create(
309
314
  provider_type=agent_state.llm_config.model_endpoint_type,
@@ -316,14 +321,16 @@ class LettaAgent(BaseAgent):
316
321
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
317
322
 
318
323
  usage = LettaUsageStatistics()
319
- for _ in range(max_steps):
324
+ for i in range(max_steps):
320
325
  step_id = generate_step_id()
321
326
  step_start = get_utc_timestamp_ns()
322
327
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
323
328
  agent_step_span.set_attributes({"step_id": step_id})
324
329
 
325
- request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
326
- current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
330
+ request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
331
+ await self._build_and_request_from_llm(
332
+ current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
333
+ )
327
334
  )
328
335
  in_context_messages = current_in_context_messages + new_in_context_messages
329
336
 
@@ -362,15 +369,19 @@ class LettaAgent(BaseAgent):
362
369
 
363
370
  persisted_messages, should_continue = await self._handle_ai_response(
364
371
  tool_call,
372
+ valid_tool_names,
365
373
  agent_state,
366
374
  tool_rules_solver,
367
375
  response.usage,
368
376
  reasoning_content=reasoning,
369
377
  step_id=step_id,
378
+ initial_messages=initial_messages,
370
379
  agent_step_span=agent_step_span,
380
+ is_final_step=(i == max_steps - 1),
371
381
  )
372
382
  self.response_messages.extend(persisted_messages)
373
383
  new_in_context_messages.extend(persisted_messages)
384
+ initial_messages = None
374
385
  log_event("agent.step.llm_response.processed") # [4^]
375
386
 
376
387
  # log step time
@@ -416,7 +427,7 @@ class LettaAgent(BaseAgent):
416
427
  async def step_stream(
417
428
  self,
418
429
  input_messages: List[MessageCreate],
419
- max_steps: int = 10,
430
+ max_steps: int = DEFAULT_MAX_STEPS,
420
431
  use_assistant_message: bool = True,
421
432
  request_start_timestamp_ns: Optional[int] = None,
422
433
  include_return_message_types: Optional[List[MessageType]] = None,
@@ -435,10 +446,6 @@ class LettaAgent(BaseAgent):
435
446
  current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
436
447
  input_messages, agent_state, self.message_manager, self.actor
437
448
  )
438
-
439
- # Special strategy to lower TTFT
440
- # Delay persistence of the initial input message as much as possible
441
- persisted_input_messages = False
442
449
  initial_messages = new_in_context_messages
443
450
 
444
451
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
@@ -454,21 +461,23 @@ class LettaAgent(BaseAgent):
454
461
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
455
462
 
456
463
  provider_request_start_timestamp_ns = None
457
- for _ in range(max_steps):
464
+ for i in range(max_steps):
458
465
  step_id = generate_step_id()
459
466
  step_start = get_utc_timestamp_ns()
460
467
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
461
468
  agent_step_span.set_attributes({"step_id": step_id})
462
469
 
463
- request_data, stream, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm_streaming(
464
- first_chunk,
465
- agent_step_span,
466
- request_start_timestamp_ns,
467
- current_in_context_messages,
468
- new_in_context_messages,
469
- agent_state,
470
- llm_client,
471
- tool_rules_solver,
470
+ request_data, stream, current_in_context_messages, new_in_context_messages, valid_tool_names = (
471
+ await self._build_and_request_from_llm_streaming(
472
+ first_chunk,
473
+ agent_step_span,
474
+ request_start_timestamp_ns,
475
+ current_in_context_messages,
476
+ new_in_context_messages,
477
+ agent_state,
478
+ llm_client,
479
+ tool_rules_solver,
480
+ )
472
481
  )
473
482
  log_event("agent.stream.llm_response.received") # [3^]
474
483
 
@@ -513,12 +522,6 @@ class LettaAgent(BaseAgent):
513
522
  interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
514
523
  )
515
524
 
516
- # Persist input messages if not already
517
- # Special strategy to lower TTFT
518
- if not persisted_input_messages:
519
- await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
520
- persisted_input_messages = True
521
-
522
525
  # log LLM request time
523
526
  now = get_utc_timestamp_ns()
524
527
  llm_request_ns = now - step_start
@@ -529,6 +532,7 @@ class LettaAgent(BaseAgent):
529
532
  reasoning_content = interface.get_reasoning_content()
530
533
  persisted_messages, should_continue = await self._handle_ai_response(
531
534
  tool_call,
535
+ valid_tool_names,
532
536
  agent_state,
533
537
  tool_rules_solver,
534
538
  UsageStatistics(
@@ -539,10 +543,13 @@ class LettaAgent(BaseAgent):
539
543
  reasoning_content=reasoning_content,
540
544
  pre_computed_assistant_message_id=interface.letta_message_id,
541
545
  step_id=step_id,
546
+ initial_messages=initial_messages,
542
547
  agent_step_span=agent_step_span,
548
+ is_final_step=(i == max_steps - 1),
543
549
  )
544
550
  self.response_messages.extend(persisted_messages)
545
551
  new_in_context_messages.extend(persisted_messages)
552
+ initial_messages = None
546
553
 
547
554
  # log total step time
548
555
  now = get_utc_timestamp_ns()
@@ -614,12 +621,12 @@ class LettaAgent(BaseAgent):
614
621
  agent_state: AgentState,
615
622
  llm_client: LLMClientBase,
616
623
  tool_rules_solver: ToolRulesSolver,
617
- ) -> Tuple[Dict, Dict, List[Message], List[Message]]:
624
+ ) -> Tuple[Dict, Dict, List[Message], List[Message], List[str]]:
618
625
  for attempt in range(self.max_summarization_retries + 1):
619
626
  try:
620
627
  log_event("agent.stream_no_tokens.messages.refreshed")
621
628
  # Create LLM request data
622
- request_data = await self._create_llm_request_data_async(
629
+ request_data, valid_tool_names = await self._create_llm_request_data_async(
623
630
  llm_client=llm_client,
624
631
  in_context_messages=current_in_context_messages + new_in_context_messages,
625
632
  agent_state=agent_state,
@@ -634,12 +641,7 @@ class LettaAgent(BaseAgent):
634
641
  dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
635
642
  )
636
643
  # Attempt LLM request
637
- return (
638
- request_data,
639
- response,
640
- current_in_context_messages,
641
- new_in_context_messages,
642
- )
644
+ return (request_data, response, current_in_context_messages, new_in_context_messages, valid_tool_names)
643
645
 
644
646
  except Exception as e:
645
647
  if attempt == self.max_summarization_retries:
@@ -667,12 +669,12 @@ class LettaAgent(BaseAgent):
667
669
  agent_state: AgentState,
668
670
  llm_client: LLMClientBase,
669
671
  tool_rules_solver: ToolRulesSolver,
670
- ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message]]:
672
+ ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message], List[str]]:
671
673
  for attempt in range(self.max_summarization_retries + 1):
672
674
  try:
673
675
  log_event("agent.stream_no_tokens.messages.refreshed")
674
676
  # Create LLM request data
675
- request_data = await self._create_llm_request_data_async(
677
+ request_data, valid_tool_names = await self._create_llm_request_data_async(
676
678
  llm_client=llm_client,
677
679
  in_context_messages=current_in_context_messages + new_in_context_messages,
678
680
  agent_state=agent_state,
@@ -691,6 +693,7 @@ class LettaAgent(BaseAgent):
691
693
  await llm_client.stream_async(request_data, agent_state.llm_config),
692
694
  current_in_context_messages,
693
695
  new_in_context_messages,
696
+ valid_tool_names,
694
697
  )
695
698
 
696
699
  except Exception as e:
@@ -773,7 +776,7 @@ class LettaAgent(BaseAgent):
773
776
  in_context_messages: List[Message],
774
777
  agent_state: AgentState,
775
778
  tool_rules_solver: ToolRulesSolver,
776
- ) -> dict:
779
+ ) -> Tuple[dict, List[str]]:
777
780
  self.num_messages, self.num_archival_memories = await asyncio.gather(
778
781
  (
779
782
  self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
@@ -787,7 +790,11 @@ class LettaAgent(BaseAgent):
787
790
  ),
788
791
  )
789
792
  in_context_messages = await self._rebuild_memory_async(
790
- in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
793
+ in_context_messages,
794
+ agent_state,
795
+ num_messages=self.num_messages,
796
+ num_archival_memories=self.num_archival_memories,
797
+ tool_rules_solver=tool_rules_solver,
791
798
  )
792
799
 
793
800
  tools = [
@@ -809,8 +816,7 @@ class LettaAgent(BaseAgent):
809
816
  ]
810
817
 
811
818
  # Mirror the sync agent loop: get allowed tools or allow all if none are allowed
812
- if self.last_function_response is None:
813
- self.last_function_response = self._load_last_function_response(in_context_messages)
819
+ self.last_function_response = self._load_last_function_response(in_context_messages)
814
820
  valid_tool_names = tool_rules_solver.get_allowed_tool_names(
815
821
  available_tools=set([t.name for t in tools]),
816
822
  last_function_response=self.last_function_response,
@@ -827,28 +833,40 @@ class LettaAgent(BaseAgent):
827
833
  tool_list=allowed_tools, response_format=agent_state.response_format, request_heartbeat=True
828
834
  )
829
835
 
830
- return llm_client.build_request_data(in_context_messages, agent_state.llm_config, allowed_tools, force_tool_call)
836
+ return (
837
+ llm_client.build_request_data(
838
+ in_context_messages,
839
+ agent_state.llm_config,
840
+ allowed_tools,
841
+ force_tool_call,
842
+ ),
843
+ valid_tool_names,
844
+ )
831
845
 
832
846
  @trace_method
833
847
  async def _handle_ai_response(
834
848
  self,
835
849
  tool_call: ToolCall,
850
+ valid_tool_names: List[str],
836
851
  agent_state: AgentState,
837
852
  tool_rules_solver: ToolRulesSolver,
838
853
  usage: UsageStatistics,
839
854
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
840
855
  pre_computed_assistant_message_id: Optional[str] = None,
841
856
  step_id: str | None = None,
842
- new_in_context_messages: Optional[List[Message]] = None,
857
+ initial_messages: Optional[List[Message]] = None,
843
858
  agent_step_span: Optional["Span"] = None,
859
+ is_final_step: Optional[bool] = None,
844
860
  ) -> Tuple[List[Message], bool]:
845
861
  """
846
862
  Now that streaming is done, handle the final AI response.
847
863
  This might yield additional SSE tokens if we do stalling.
848
864
  At the end, set self._continue_execution accordingly.
849
865
  """
866
+ # Check if the called tool is allowed by tool name:
850
867
  tool_call_name = tool_call.function.name
851
868
  tool_call_args_str = tool_call.function.arguments
869
+
852
870
  # Temp hack to gracefully handle parallel tool calling attempt, only take first one
853
871
  if "}{" in tool_call_args_str:
854
872
  tool_call_args_str = tool_call_args_str.split("}{", 1)[0] + "}"
@@ -861,17 +879,21 @@ class LettaAgent(BaseAgent):
861
879
  except AssertionError:
862
880
  tool_args = json.loads(tool_args)
863
881
 
864
- # Get request heartbeats and coerce to bool
865
- request_heartbeat = tool_args.pop("request_heartbeat", False)
866
- # Pre-emptively pop out inner_thoughts
867
- tool_args.pop(INNER_THOUGHTS_KWARG, "")
868
-
869
- # So this is necessary, because sometimes non-structured outputs makes mistakes
870
- if not isinstance(request_heartbeat, bool):
871
- if isinstance(request_heartbeat, str):
872
- request_heartbeat = request_heartbeat.lower() == "true"
873
- else:
874
- request_heartbeat = bool(request_heartbeat)
882
+ if is_final_step:
883
+ logger.info("Agent has reached max steps.")
884
+ request_heartbeat = False
885
+ else:
886
+ # Get request heartbeats and coerce to bool
887
+ request_heartbeat = tool_args.pop("request_heartbeat", False)
888
+ # Pre-emptively pop out inner_thoughts
889
+ tool_args.pop(INNER_THOUGHTS_KWARG, "")
890
+
891
+ # So this is necessary, because sometimes non-structured outputs makes mistakes
892
+ if not isinstance(request_heartbeat, bool):
893
+ if isinstance(request_heartbeat, str):
894
+ request_heartbeat = request_heartbeat.lower() == "true"
895
+ else:
896
+ request_heartbeat = bool(request_heartbeat)
875
897
 
876
898
  tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
877
899
 
@@ -883,14 +905,21 @@ class LettaAgent(BaseAgent):
883
905
  tool_call_id=tool_call_id,
884
906
  request_heartbeat=request_heartbeat,
885
907
  )
886
-
887
- tool_execution_result = await self._execute_tool(
888
- tool_name=tool_call_name,
889
- tool_args=tool_args,
890
- agent_state=agent_state,
891
- agent_step_span=agent_step_span,
892
- step_id=step_id,
893
- )
908
+ if tool_call_name not in valid_tool_names:
909
+ base_error_message = f"[ToolConstraintError] Cannot call {tool_call_name}, valid tools to call include: {valid_tool_names}."
910
+ violated_rule_messages = tool_rules_solver.guess_rule_violation(tool_call_name)
911
+ if violated_rule_messages:
912
+ bullet_points = "\n".join(f"\t- {msg}" for msg in violated_rule_messages)
913
+ base_error_message += f"\n** Hint: Possible rules that were violated:\n{bullet_points}"
914
+ tool_execution_result = ToolExecutionResult(status="error", func_return=base_error_message)
915
+ else:
916
+ tool_execution_result = await self._execute_tool(
917
+ tool_name=tool_call_name,
918
+ tool_args=tool_args,
919
+ agent_state=agent_state,
920
+ agent_step_span=agent_step_span,
921
+ step_id=step_id,
922
+ )
894
923
  log_telemetry(
895
924
  self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
896
925
  )
@@ -960,7 +989,9 @@ class LettaAgent(BaseAgent):
960
989
  step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
961
990
  )
962
991
 
963
- persisted_messages = await self.message_manager.create_many_messages_async(tool_call_messages, actor=self.actor)
992
+ persisted_messages = await self.message_manager.create_many_messages_async(
993
+ (initial_messages or []) + tool_call_messages, actor=self.actor
994
+ )
964
995
  self.last_function_response = function_response
965
996
 
966
997
  return persisted_messages, continue_stepping
@@ -8,6 +8,7 @@ from anthropic.types.beta.messages import BetaMessageBatchCanceledResult, BetaMe
8
8
 
9
9
  from letta.agents.base_agent import BaseAgent
10
10
  from letta.agents.helpers import _prepare_in_context_messages_async
11
+ from letta.constants import DEFAULT_MAX_STEPS
11
12
  from letta.helpers import ToolRulesSolver
12
13
  from letta.helpers.datetime_helpers import get_utc_time
13
14
  from letta.helpers.tool_execution_helper import enable_strict_mode
@@ -110,7 +111,7 @@ class LettaAgentBatch(BaseAgent):
110
111
  sandbox_config_manager: SandboxConfigManager,
111
112
  job_manager: JobManager,
112
113
  actor: User,
113
- max_steps: int = 10,
114
+ max_steps: int = DEFAULT_MAX_STEPS,
114
115
  ):
115
116
  self.message_manager = message_manager
116
117
  self.agent_manager = agent_manager
@@ -619,10 +620,10 @@ class LettaAgentBatch(BaseAgent):
619
620
  return in_context_messages
620
621
 
621
622
  # Not used in batch.
622
- async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
623
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
623
624
  raise NotImplementedError
624
625
 
625
626
  async def step_stream(
626
- self, input_messages: List[MessageCreate], max_steps: int = 10
627
+ self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS
627
628
  ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
628
629
  raise NotImplementedError
@@ -9,7 +9,7 @@ import openai
9
9
  from letta.agents.base_agent import BaseAgent
10
10
  from letta.agents.exceptions import IncompatibleAgentType
11
11
  from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
12
- from letta.constants import NON_USER_MSG_PREFIX
12
+ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
13
13
  from letta.helpers.datetime_helpers import get_utc_time
14
14
  from letta.helpers.tool_execution_helper import (
15
15
  add_pre_execution_message,
@@ -111,10 +111,10 @@ class VoiceAgent(BaseAgent):
111
111
 
112
112
  return summarizer
113
113
 
114
- async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
114
+ async def step(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> LettaResponse:
115
115
  raise NotImplementedError("VoiceAgent does not have a synchronous step implemented currently.")
116
116
 
117
- async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = 10) -> AsyncGenerator[str, None]:
117
+ async def step_stream(self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS) -> AsyncGenerator[str, None]:
118
118
  """
119
119
  Main streaming loop that yields partial tokens.
120
120
  Whenever we detect a tool call, we yield from _handle_ai_response as well.
@@ -2,6 +2,7 @@ from typing import AsyncGenerator, List, Optional, Tuple, Union
2
2
 
3
3
  from letta.agents.helpers import _create_letta_response, serialize_message_history
4
4
  from letta.agents.letta_agent import LettaAgent
5
+ from letta.constants import DEFAULT_MAX_STEPS
5
6
  from letta.orm.enums import ToolType
6
7
  from letta.otel.tracing import trace_method
7
8
  from letta.schemas.agent import AgentState
@@ -62,7 +63,7 @@ class VoiceSleeptimeAgent(LettaAgent):
62
63
  async def step(
63
64
  self,
64
65
  input_messages: List[MessageCreate],
65
- max_steps: int = 20,
66
+ max_steps: int = DEFAULT_MAX_STEPS,
66
67
  use_assistant_message: bool = True,
67
68
  include_return_message_types: Optional[List[MessageType]] = None,
68
69
  ) -> LettaResponse:
@@ -170,7 +171,7 @@ class VoiceSleeptimeAgent(LettaAgent):
170
171
  return f"Failed to store memory given start_index {start_index} and end_index {end_index}: {e}", False
171
172
 
172
173
  async def step_stream(
173
- self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True
174
+ self, input_messages: List[MessageCreate], max_steps: int = DEFAULT_MAX_STEPS, use_assistant_message: bool = True
174
175
  ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
175
176
  """
176
177
  This agent is synchronous-only. If called in an async context, raise an error.