letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. letta/adapters/letta_llm_adapter.py +1 -0
  2. letta/adapters/letta_llm_request_adapter.py +0 -1
  3. letta/adapters/letta_llm_stream_adapter.py +7 -2
  4. letta/adapters/simple_llm_request_adapter.py +88 -0
  5. letta/adapters/simple_llm_stream_adapter.py +192 -0
  6. letta/agents/agent_loop.py +6 -0
  7. letta/agents/ephemeral_summary_agent.py +2 -1
  8. letta/agents/helpers.py +142 -6
  9. letta/agents/letta_agent.py +13 -33
  10. letta/agents/letta_agent_batch.py +2 -4
  11. letta/agents/letta_agent_v2.py +87 -77
  12. letta/agents/letta_agent_v3.py +899 -0
  13. letta/agents/voice_agent.py +2 -6
  14. letta/constants.py +8 -4
  15. letta/errors.py +40 -0
  16. letta/functions/function_sets/base.py +84 -4
  17. letta/functions/function_sets/multi_agent.py +0 -3
  18. letta/functions/schema_generator.py +113 -71
  19. letta/groups/dynamic_multi_agent.py +3 -2
  20. letta/groups/helpers.py +1 -2
  21. letta/groups/round_robin_multi_agent.py +3 -2
  22. letta/groups/sleeptime_multi_agent.py +3 -2
  23. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  24. letta/groups/sleeptime_multi_agent_v3.py +17 -17
  25. letta/groups/supervisor_multi_agent.py +84 -80
  26. letta/helpers/converters.py +3 -0
  27. letta/helpers/message_helper.py +4 -0
  28. letta/helpers/tool_rule_solver.py +92 -5
  29. letta/interfaces/anthropic_streaming_interface.py +409 -0
  30. letta/interfaces/gemini_streaming_interface.py +296 -0
  31. letta/interfaces/openai_streaming_interface.py +752 -1
  32. letta/llm_api/anthropic_client.py +126 -16
  33. letta/llm_api/bedrock_client.py +4 -2
  34. letta/llm_api/deepseek_client.py +4 -1
  35. letta/llm_api/google_vertex_client.py +123 -42
  36. letta/llm_api/groq_client.py +4 -1
  37. letta/llm_api/llm_api_tools.py +11 -4
  38. letta/llm_api/llm_client_base.py +6 -2
  39. letta/llm_api/openai.py +32 -2
  40. letta/llm_api/openai_client.py +423 -18
  41. letta/llm_api/xai_client.py +4 -1
  42. letta/main.py +9 -5
  43. letta/memory.py +1 -0
  44. letta/orm/__init__.py +1 -1
  45. letta/orm/agent.py +10 -0
  46. letta/orm/block.py +7 -16
  47. letta/orm/blocks_agents.py +8 -2
  48. letta/orm/files_agents.py +2 -0
  49. letta/orm/job.py +7 -5
  50. letta/orm/mcp_oauth.py +1 -0
  51. letta/orm/message.py +21 -6
  52. letta/orm/organization.py +2 -0
  53. letta/orm/provider.py +6 -2
  54. letta/orm/run.py +71 -0
  55. letta/orm/sandbox_config.py +7 -1
  56. letta/orm/sqlalchemy_base.py +0 -306
  57. letta/orm/step.py +6 -5
  58. letta/orm/step_metrics.py +5 -5
  59. letta/otel/tracing.py +28 -3
  60. letta/plugins/defaults.py +4 -4
  61. letta/prompts/system_prompts/__init__.py +2 -0
  62. letta/prompts/system_prompts/letta_v1.py +25 -0
  63. letta/schemas/agent.py +3 -2
  64. letta/schemas/agent_file.py +9 -3
  65. letta/schemas/block.py +23 -10
  66. letta/schemas/enums.py +21 -2
  67. letta/schemas/job.py +17 -4
  68. letta/schemas/letta_message_content.py +71 -2
  69. letta/schemas/letta_stop_reason.py +5 -5
  70. letta/schemas/llm_config.py +53 -3
  71. letta/schemas/memory.py +1 -1
  72. letta/schemas/message.py +504 -117
  73. letta/schemas/openai/responses_request.py +64 -0
  74. letta/schemas/providers/__init__.py +2 -0
  75. letta/schemas/providers/anthropic.py +16 -0
  76. letta/schemas/providers/ollama.py +115 -33
  77. letta/schemas/providers/openrouter.py +52 -0
  78. letta/schemas/providers/vllm.py +2 -1
  79. letta/schemas/run.py +48 -42
  80. letta/schemas/step.py +2 -2
  81. letta/schemas/step_metrics.py +1 -1
  82. letta/schemas/tool.py +15 -107
  83. letta/schemas/tool_rule.py +88 -5
  84. letta/serialize_schemas/marshmallow_agent.py +1 -0
  85. letta/server/db.py +86 -408
  86. letta/server/rest_api/app.py +61 -10
  87. letta/server/rest_api/dependencies.py +14 -0
  88. letta/server/rest_api/redis_stream_manager.py +19 -8
  89. letta/server/rest_api/routers/v1/agents.py +364 -292
  90. letta/server/rest_api/routers/v1/blocks.py +14 -20
  91. letta/server/rest_api/routers/v1/identities.py +45 -110
  92. letta/server/rest_api/routers/v1/internal_templates.py +21 -0
  93. letta/server/rest_api/routers/v1/jobs.py +23 -6
  94. letta/server/rest_api/routers/v1/messages.py +1 -1
  95. letta/server/rest_api/routers/v1/runs.py +126 -85
  96. letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
  97. letta/server/rest_api/routers/v1/tools.py +281 -594
  98. letta/server/rest_api/routers/v1/voice.py +1 -1
  99. letta/server/rest_api/streaming_response.py +29 -29
  100. letta/server/rest_api/utils.py +122 -64
  101. letta/server/server.py +160 -887
  102. letta/services/agent_manager.py +236 -919
  103. letta/services/agent_serialization_manager.py +16 -0
  104. letta/services/archive_manager.py +0 -100
  105. letta/services/block_manager.py +211 -168
  106. letta/services/file_manager.py +1 -1
  107. letta/services/files_agents_manager.py +24 -33
  108. letta/services/group_manager.py +0 -142
  109. letta/services/helpers/agent_manager_helper.py +7 -2
  110. letta/services/helpers/run_manager_helper.py +85 -0
  111. letta/services/job_manager.py +96 -411
  112. letta/services/lettuce/__init__.py +6 -0
  113. letta/services/lettuce/lettuce_client_base.py +86 -0
  114. letta/services/mcp_manager.py +38 -6
  115. letta/services/message_manager.py +165 -362
  116. letta/services/organization_manager.py +0 -36
  117. letta/services/passage_manager.py +0 -345
  118. letta/services/provider_manager.py +0 -80
  119. letta/services/run_manager.py +301 -0
  120. letta/services/sandbox_config_manager.py +0 -234
  121. letta/services/step_manager.py +62 -39
  122. letta/services/summarizer/summarizer.py +9 -7
  123. letta/services/telemetry_manager.py +0 -16
  124. letta/services/tool_executor/builtin_tool_executor.py +35 -0
  125. letta/services/tool_executor/core_tool_executor.py +397 -2
  126. letta/services/tool_executor/files_tool_executor.py +3 -3
  127. letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
  128. letta/services/tool_executor/tool_execution_manager.py +6 -8
  129. letta/services/tool_executor/tool_executor_base.py +3 -3
  130. letta/services/tool_manager.py +85 -339
  131. letta/services/tool_sandbox/base.py +24 -13
  132. letta/services/tool_sandbox/e2b_sandbox.py +16 -1
  133. letta/services/tool_schema_generator.py +123 -0
  134. letta/services/user_manager.py +0 -99
  135. letta/settings.py +20 -4
  136. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
  137. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
  138. letta/agents/temporal/activities/__init__.py +0 -4
  139. letta/agents/temporal/activities/example_activity.py +0 -7
  140. letta/agents/temporal/activities/prepare_messages.py +0 -10
  141. letta/agents/temporal/temporal_agent_workflow.py +0 -56
  142. letta/agents/temporal/types.py +0 -25
  143. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
  144. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
  145. {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,3 @@
1
- import json
2
1
  import uuid
3
2
  from collections.abc import AsyncGenerator
4
3
  from datetime import datetime
@@ -13,6 +12,7 @@ from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
13
12
  from letta.agents.helpers import (
14
13
  _build_rule_violation_result,
15
14
  _create_letta_response,
15
+ _load_last_function_response,
16
16
  _pop_heartbeat,
17
17
  _prepare_in_context_messages_no_persist_async,
18
18
  _safe_load_tool_call_str,
@@ -34,7 +34,7 @@ from letta.otel.context import get_ctx_attributes
34
34
  from letta.otel.metric_registry import MetricRegistry
35
35
  from letta.otel.tracing import log_event, trace_method, tracer
36
36
  from letta.schemas.agent import AgentState, UpdateAgent
37
- from letta.schemas.enums import JobStatus, MessageRole, ProviderType, StepStatus, ToolType
37
+ from letta.schemas.enums import JobStatus, ProviderType, StepStatus, ToolType
38
38
  from letta.schemas.letta_message import MessageType
39
39
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
40
40
  from letta.schemas.letta_response import LettaResponse
@@ -48,7 +48,10 @@ from letta.schemas.step_metrics import StepMetrics
48
48
  from letta.schemas.tool_execution_result import ToolExecutionResult
49
49
  from letta.schemas.usage import LettaUsageStatistics
50
50
  from letta.schemas.user import User
51
- from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
51
+ from letta.server.rest_api.utils import (
52
+ create_approval_request_message_from_llm_response,
53
+ create_letta_messages_from_llm_response,
54
+ )
52
55
  from letta.services.agent_manager import AgentManager
53
56
  from letta.services.block_manager import BlockManager
54
57
  from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
@@ -297,7 +300,7 @@ class LettaAgent(BaseAgent):
297
300
  context_window_limit=agent_state.llm_config.context_window,
298
301
  usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
299
302
  provider_id=None,
300
- job_id=self.current_run_id if self.current_run_id else None,
303
+ run_id=self.current_run_id if self.current_run_id else None,
301
304
  step_id=step_id,
302
305
  project_id=agent_state.project_id,
303
306
  status=StepStatus.PENDING,
@@ -641,7 +644,7 @@ class LettaAgent(BaseAgent):
641
644
  context_window_limit=agent_state.llm_config.context_window,
642
645
  usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
643
646
  provider_id=None,
644
- job_id=run_id if run_id else self.current_run_id,
647
+ run_id=run_id if run_id else self.current_run_id,
645
648
  step_id=step_id,
646
649
  project_id=agent_state.project_id,
647
650
  status=StepStatus.PENDING,
@@ -765,7 +768,7 @@ class LettaAgent(BaseAgent):
765
768
  step_id=step_id,
766
769
  agent_state=agent_state,
767
770
  step_metrics=step_metrics,
768
- job_id=run_id if run_id else self.current_run_id,
771
+ run_id=run_id if run_id else self.current_run_id,
769
772
  )
770
773
 
771
774
  except Exception as e:
@@ -986,7 +989,7 @@ class LettaAgent(BaseAgent):
986
989
  context_window_limit=agent_state.llm_config.context_window,
987
990
  usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
988
991
  provider_id=None,
989
- job_id=self.current_run_id if self.current_run_id else None,
992
+ run_id=self.current_run_id if self.current_run_id else None,
990
993
  step_id=step_id,
991
994
  project_id=agent_state.project_id,
992
995
  status=StepStatus.PENDING,
@@ -1592,7 +1595,6 @@ class LettaAgent(BaseAgent):
1592
1595
  ToolType.LETTA_VOICE_SLEEPTIME_CORE,
1593
1596
  ToolType.LETTA_BUILTIN,
1594
1597
  ToolType.LETTA_FILES_CORE,
1595
- ToolType.EXTERNAL_COMPOSIO,
1596
1598
  ToolType.EXTERNAL_MCP,
1597
1599
  }
1598
1600
  ]
@@ -1619,6 +1621,7 @@ class LettaAgent(BaseAgent):
1619
1621
 
1620
1622
  return (
1621
1623
  llm_client.build_request_data(
1624
+ agent_state.agent_type,
1622
1625
  in_context_messages,
1623
1626
  agent_state.llm_config,
1624
1627
  allowed_tools,
@@ -1663,15 +1666,14 @@ class LettaAgent(BaseAgent):
1663
1666
  function_arguments={},
1664
1667
  tool_execution_result=ToolExecutionResult(status="error"),
1665
1668
  tool_call_id=tool_call_id,
1666
- function_call_success=False,
1667
1669
  function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
1668
1670
  timezone=agent_state.timezone,
1669
- actor=self.actor,
1670
1671
  continue_stepping=continue_stepping,
1671
1672
  heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
1672
1673
  reasoning_content=None,
1673
1674
  pre_computed_assistant_message_id=None,
1674
1675
  step_id=step_id,
1676
+ run_id=self.current_run_id,
1675
1677
  is_approval_response=True,
1676
1678
  )
1677
1679
  messages_to_persist = (initial_messages or []) + tool_call_messages
@@ -1773,15 +1775,14 @@ class LettaAgent(BaseAgent):
1773
1775
  function_arguments=tool_args,
1774
1776
  tool_execution_result=tool_execution_result,
1775
1777
  tool_call_id=tool_call_id,
1776
- function_call_success=tool_execution_result.success_flag,
1777
1778
  function_response=function_response_string,
1778
1779
  timezone=agent_state.timezone,
1779
- actor=self.actor,
1780
1780
  continue_stepping=continue_stepping,
1781
1781
  heartbeat_reason=heartbeat_reason,
1782
1782
  reasoning_content=reasoning_content,
1783
1783
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
1784
1784
  step_id=step_id,
1785
+ run_id=self.current_run_id,
1785
1786
  is_approval_response=is_approval or is_denial,
1786
1787
  )
1787
1788
  messages_to_persist = (initial_messages or []) + tool_call_messages
@@ -1790,13 +1791,6 @@ class LettaAgent(BaseAgent):
1790
1791
  messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
1791
1792
  )
1792
1793
 
1793
- if run_id:
1794
- await self.job_manager.add_messages_to_job_async(
1795
- job_id=run_id,
1796
- message_ids=[m.id for m in persisted_messages if m.role != "user"],
1797
- actor=self.actor,
1798
- )
1799
-
1800
1794
  return persisted_messages, continue_stepping, stop_reason
1801
1795
 
1802
1796
  def _decide_continuation(
@@ -1907,17 +1901,3 @@ class LettaAgent(BaseAgent):
1907
1901
  )
1908
1902
  log_event(name=f"finish_{tool_name}_execution", attributes=tool_execution_result.model_dump())
1909
1903
  return tool_execution_result
1910
-
1911
- @trace_method
1912
- def _load_last_function_response(self, in_context_messages: list[Message]):
1913
- """Load the last function response from message history"""
1914
- for msg in reversed(in_context_messages):
1915
- if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
1916
- text_content = msg.content[0].text
1917
- try:
1918
- response_json = json.loads(text_content)
1919
- if response_json.get("message"):
1920
- return response_json["message"]
1921
- except (json.JSONDecodeError, KeyError):
1922
- raise ValueError(f"Invalid JSON format in message: {text_content}")
1923
- return None
@@ -192,6 +192,7 @@ class LettaAgentBatch(BaseAgent):
192
192
 
193
193
  log_event(name="send_llm_batch_request")
194
194
  batch_response = await llm_client.send_llm_batch_request_async(
195
+ agent_type=agent_states[0].agent_type,
195
196
  agent_messages_mapping=agent_messages_mapping,
196
197
  agent_tools_mapping=agent_tools_mapping,
197
198
  agent_llm_config_mapping=agent_llm_config_mapping,
@@ -501,7 +502,6 @@ class LettaAgentBatch(BaseAgent):
501
502
  model=ctx.agent_state_map[agent_id].llm_config.model,
502
503
  function_call_success=success_flag_map[agent_id],
503
504
  timezone=ctx.agent_state_map[agent_id].timezone,
504
- actor=self.actor,
505
505
  )
506
506
  batch_reqs.append(
507
507
  LettaBatchRequest(
@@ -545,11 +545,9 @@ class LettaAgentBatch(BaseAgent):
545
545
  function_name=tool_call_name,
546
546
  function_arguments=tool_call_args,
547
547
  tool_call_id=tool_call_id,
548
- function_call_success=success_flag,
549
548
  function_response=tool_exec_result,
550
549
  tool_execution_result=tool_exec_result_obj,
551
550
  timezone=agent_state.timezone,
552
- actor=self.actor,
553
551
  continue_stepping=False,
554
552
  reasoning_content=reasoning_content,
555
553
  pre_computed_assistant_message_id=None,
@@ -615,7 +613,7 @@ class LettaAgentBatch(BaseAgent):
615
613
  self, agent_state: AgentState, input_messages: List[MessageCreate]
616
614
  ) -> List[Message]:
617
615
  current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
618
- input_messages, agent_state, self.message_manager, self.actor
616
+ input_messages, agent_state, self.message_manager, self.actor, run_id=None
619
617
  )
620
618
 
621
619
  in_context_messages = await self._rebuild_memory_async(current_in_context_messages + new_in_context_messages, agent_state)
@@ -1,8 +1,7 @@
1
1
  import asyncio
2
- import json
3
2
  import uuid
4
3
  from datetime import datetime
5
- from typing import AsyncGenerator, Tuple
4
+ from typing import AsyncGenerator, Optional, Tuple
6
5
 
7
6
  from opentelemetry.trace import Span
8
7
 
@@ -13,6 +12,8 @@ from letta.agents.base_agent_v2 import BaseAgentV2
13
12
  from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
14
13
  from letta.agents.helpers import (
15
14
  _build_rule_violation_result,
15
+ _load_last_function_response,
16
+ _maybe_get_approval_messages,
16
17
  _pop_heartbeat,
17
18
  _prepare_in_context_messages_no_persist_async,
18
19
  _safe_load_tool_call_str,
@@ -30,7 +31,7 @@ from letta.log import get_logger
30
31
  from letta.otel.tracing import log_event, trace_method, tracer
31
32
  from letta.prompts.prompt_generator import PromptGenerator
32
33
  from letta.schemas.agent import AgentState, UpdateAgent
33
- from letta.schemas.enums import AgentType, JobStatus, MessageRole, MessageStreamStatus, StepStatus
34
+ from letta.schemas.enums import AgentType, MessageStreamStatus, RunStatus, StepStatus
34
35
  from letta.schemas.letta_message import LettaMessage, MessageType
35
36
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
36
37
  from letta.schemas.letta_response import LettaResponse
@@ -42,14 +43,17 @@ from letta.schemas.step_metrics import StepMetrics
42
43
  from letta.schemas.tool_execution_result import ToolExecutionResult
43
44
  from letta.schemas.usage import LettaUsageStatistics
44
45
  from letta.schemas.user import User
45
- from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
46
+ from letta.server.rest_api.utils import (
47
+ create_approval_request_message_from_llm_response,
48
+ create_letta_messages_from_llm_response,
49
+ )
46
50
  from letta.services.agent_manager import AgentManager
47
51
  from letta.services.archive_manager import ArchiveManager
48
52
  from letta.services.block_manager import BlockManager
49
53
  from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
50
- from letta.services.job_manager import JobManager
51
54
  from letta.services.message_manager import MessageManager
52
55
  from letta.services.passage_manager import PassageManager
56
+ from letta.services.run_manager import RunManager
53
57
  from letta.services.step_manager import StepManager
54
58
  from letta.services.summarizer.enums import SummarizationMode
55
59
  from letta.services.summarizer.summarizer import Summarizer
@@ -89,7 +93,7 @@ class LettaAgentV2(BaseAgentV2):
89
93
  self.agent_manager = AgentManager()
90
94
  self.archive_manager = ArchiveManager()
91
95
  self.block_manager = BlockManager()
92
- self.job_manager = JobManager()
96
+ self.run_manager = RunManager()
93
97
  self.message_manager = MessageManager()
94
98
  self.passage_manager = PassageManager()
95
99
  self.step_manager = StepManager()
@@ -138,12 +142,14 @@ class LettaAgentV2(BaseAgentV2):
138
142
  """
139
143
  request = {}
140
144
  in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
141
- input_messages, self.agent_state, self.message_manager, self.actor
145
+ input_messages, self.agent_state, self.message_manager, self.actor, None
142
146
  )
143
147
  response = self._step(
148
+ run_id=None,
144
149
  messages=in_context_messages + input_messages_to_persist,
145
150
  llm_adapter=LettaLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
146
151
  dry_run=True,
152
+ enforce_run_id_set=False,
147
153
  )
148
154
  async for chunk in response:
149
155
  request = chunk # First chunk contains request data
@@ -179,11 +185,13 @@ class LettaAgentV2(BaseAgentV2):
179
185
  request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns)
180
186
 
181
187
  in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
182
- input_messages, self.agent_state, self.message_manager, self.actor
188
+ input_messages, self.agent_state, self.message_manager, self.actor, run_id
183
189
  )
184
190
  in_context_messages = in_context_messages + input_messages_to_persist
185
191
  response_letta_messages = []
186
192
  for i in range(max_steps):
193
+ remaining_turns = max_steps - i - 1
194
+
187
195
  response = self._step(
188
196
  messages=in_context_messages + self.response_messages,
189
197
  input_messages_to_persist=input_messages_to_persist,
@@ -192,6 +200,7 @@ class LettaAgentV2(BaseAgentV2):
192
200
  use_assistant_message=use_assistant_message,
193
201
  include_return_message_types=include_return_message_types,
194
202
  request_start_timestamp_ns=request_start_timestamp_ns,
203
+ remaining_turns=remaining_turns,
195
204
  )
196
205
 
197
206
  async for chunk in response:
@@ -264,6 +273,7 @@ class LettaAgentV2(BaseAgentV2):
264
273
  llm_adapter = LettaLLMStreamAdapter(
265
274
  llm_client=self.llm_client,
266
275
  llm_config=self.agent_state.llm_config,
276
+ run_id=run_id,
267
277
  )
268
278
  else:
269
279
  llm_adapter = LettaLLMRequestAdapter(
@@ -273,7 +283,7 @@ class LettaAgentV2(BaseAgentV2):
273
283
 
274
284
  try:
275
285
  in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
276
- input_messages, self.agent_state, self.message_manager, self.actor
286
+ input_messages, self.agent_state, self.message_manager, self.actor, run_id
277
287
  )
278
288
  in_context_messages = in_context_messages + input_messages_to_persist
279
289
  for i in range(max_steps):
@@ -332,13 +342,14 @@ class LettaAgentV2(BaseAgentV2):
332
342
  self,
333
343
  messages: list[Message],
334
344
  llm_adapter: LettaLLMAdapter,
345
+ run_id: Optional[str],
335
346
  input_messages_to_persist: list[Message] | None = None,
336
- run_id: str | None = None,
337
347
  use_assistant_message: bool = True,
338
348
  include_return_message_types: list[MessageType] | None = None,
339
349
  request_start_timestamp_ns: int | None = None,
340
350
  remaining_turns: int = -1,
341
351
  dry_run: bool = False,
352
+ enforce_run_id_set: bool = True,
342
353
  ) -> AsyncGenerator[LettaMessage | dict, None]:
343
354
  """
344
355
  Execute a single agent step (one LLM call and tool execution).
@@ -361,6 +372,9 @@ class LettaAgentV2(BaseAgentV2):
361
372
  Yields:
362
373
  LettaMessage or dict: Chunks for streaming mode, or request data for dry_run
363
374
  """
375
+ if enforce_run_id_set and run_id is None:
376
+ raise AssertionError("run_id is required when enforce_run_id_set is True")
377
+
364
378
  step_progression = StepProgression.START
365
379
  # TODO(@caren): clean this up
366
380
  tool_call, reasoning_content, agent_step_span, first_chunk, step_id, logged_step, step_start_ns, step_metrics = (
@@ -374,9 +388,9 @@ class LettaAgentV2(BaseAgentV2):
374
388
  None,
375
389
  )
376
390
  try:
377
- self.last_function_response = self._load_last_function_response(messages)
391
+ self.last_function_response = _load_last_function_response(messages)
378
392
  valid_tools = await self._get_valid_tools()
379
- approval_request, approval_response = await self._maybe_get_approval_messages(messages)
393
+ approval_request, approval_response = _maybe_get_approval_messages(messages)
380
394
  if approval_request and approval_response:
381
395
  tool_call = approval_request.tool_calls[0]
382
396
  reasoning_content = approval_request.content
@@ -399,6 +413,7 @@ class LettaAgentV2(BaseAgentV2):
399
413
  for llm_request_attempt in range(summarizer_settings.max_summarizer_retries + 1):
400
414
  try:
401
415
  request_data = self.llm_client.build_request_data(
416
+ agent_type=self.agent_state.agent_type,
402
417
  messages=messages,
403
418
  llm_config=self.agent_state.llm_config,
404
419
  tools=valid_tools,
@@ -457,6 +472,13 @@ class LettaAgentV2(BaseAgentV2):
457
472
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
458
473
  raise ValueError("No tool calls found in response, model must make a tool call")
459
474
 
475
+ # TODO: how should be associate input messages with runs?
476
+ ## Set run_id on input messages before persisting
477
+ # if input_messages_to_persist and run_id:
478
+ # for message in input_messages_to_persist:
479
+ # if message.run_id is None:
480
+ # message.run_id = run_id
481
+
460
482
  persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
461
483
  tool_call or llm_adapter.tool_call,
462
484
  [tool["name"] for tool in valid_tools],
@@ -558,6 +580,7 @@ class LettaAgentV2(BaseAgentV2):
558
580
  for message in input_messages_to_persist:
559
581
  message.is_err = True
560
582
  message.step_id = step_id
583
+ message.run_id = run_id
561
584
  await self.message_manager.create_many_messages_async(
562
585
  input_messages_to_persist,
563
586
  actor=self.actor,
@@ -598,18 +621,11 @@ class LettaAgentV2(BaseAgentV2):
598
621
  self.last_function_response = None
599
622
  self.response_messages = []
600
623
 
601
- async def _maybe_get_approval_messages(self, messages: list[Message]) -> Tuple[Message | None, Message | None]:
602
- if len(messages) >= 2:
603
- maybe_approval_request, maybe_approval_response = messages[-2], messages[-1]
604
- if maybe_approval_request.role == "approval" and maybe_approval_response.role == "approval":
605
- return maybe_approval_request, maybe_approval_response
606
- return None, None
607
-
608
624
  @trace_method
609
625
  async def _check_run_cancellation(self, run_id) -> bool:
610
626
  try:
611
- job = await self.job_manager.get_job_by_id_async(job_id=run_id, actor=self.actor)
612
- return job.status == JobStatus.cancelled
627
+ run = await self.run_manager.get_run_by_id(run_id=run_id, actor=self.actor)
628
+ return run.status == RunStatus.cancelled
613
629
  except Exception as e:
614
630
  # Log the error but don't fail the execution
615
631
  self.logger.warning(f"Failed to check job cancellation status for job {run_id}: {e}")
@@ -678,6 +694,9 @@ class LettaAgentV2(BaseAgentV2):
678
694
 
679
695
  curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
680
696
 
697
+ # refresh files
698
+ agent_state = await self.agent_manager.refresh_file_blocks(agent_state=agent_state, actor=self.actor)
699
+
681
700
  # generate just the memory string with current state for comparison
682
701
  curr_memory_str = agent_state.memory.compile(
683
702
  tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
@@ -740,20 +759,6 @@ class LettaAgentV2(BaseAgentV2):
740
759
  )
741
760
  return allowed_tools
742
761
 
743
- @trace_method
744
- def _load_last_function_response(self, in_context_messages: list[Message]):
745
- """Load the last function response from message history"""
746
- for msg in reversed(in_context_messages):
747
- if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
748
- text_content = msg.content[0].text
749
- try:
750
- response_json = json.loads(text_content)
751
- if response_json.get("message"):
752
- return response_json["message"]
753
- except (json.JSONDecodeError, KeyError):
754
- raise ValueError(f"Invalid JSON format in message: {text_content}")
755
- return None
756
-
757
762
  @trace_method
758
763
  def _request_checkpoint_start(self, request_start_timestamp_ns: int | None) -> Span | None:
759
764
  if request_start_timestamp_ns is not None:
@@ -796,7 +801,7 @@ class LettaAgentV2(BaseAgentV2):
796
801
  context_window_limit=self.agent_state.llm_config.context_window,
797
802
  usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
798
803
  provider_id=None,
799
- job_id=run_id,
804
+ run_id=run_id,
800
805
  step_id=step_id,
801
806
  project_id=self.agent_state.project_id,
802
807
  status=StepStatus.PENDING,
@@ -890,21 +895,22 @@ class LettaAgentV2(BaseAgentV2):
890
895
  function_arguments={},
891
896
  tool_execution_result=ToolExecutionResult(status="error"),
892
897
  tool_call_id=tool_call_id,
893
- function_call_success=False,
894
898
  function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
895
899
  timezone=agent_state.timezone,
896
- actor=self.actor,
897
900
  continue_stepping=continue_stepping,
898
901
  heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
899
902
  reasoning_content=None,
900
903
  pre_computed_assistant_message_id=None,
901
904
  step_id=step_id,
902
905
  is_approval_response=True,
906
+ run_id=run_id,
903
907
  )
904
908
  messages_to_persist = (initial_messages or []) + tool_call_messages
909
+
905
910
  persisted_messages = await self.message_manager.create_many_messages_async(
906
911
  messages_to_persist,
907
912
  actor=self.actor,
913
+ run_id=run_id,
908
914
  project_id=agent_state.project_id,
909
915
  template_id=agent_state.template_id,
910
916
  )
@@ -938,6 +944,7 @@ class LettaAgentV2(BaseAgentV2):
938
944
  reasoning_content=reasoning_content,
939
945
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
940
946
  step_id=step_id,
947
+ run_id=run_id,
941
948
  )
942
949
  messages_to_persist = (initial_messages or []) + [approval_message]
943
950
  continue_stepping = False
@@ -1004,30 +1011,22 @@ class LettaAgentV2(BaseAgentV2):
1004
1011
  function_arguments=tool_args,
1005
1012
  tool_execution_result=tool_execution_result,
1006
1013
  tool_call_id=tool_call_id,
1007
- function_call_success=tool_execution_result.success_flag,
1008
1014
  function_response=function_response_string,
1009
1015
  timezone=agent_state.timezone,
1010
- actor=self.actor,
1011
1016
  continue_stepping=continue_stepping,
1012
1017
  heartbeat_reason=heartbeat_reason,
1013
1018
  reasoning_content=reasoning_content,
1014
1019
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
1015
1020
  step_id=step_id,
1021
+ run_id=run_id,
1016
1022
  is_approval_response=is_approval or is_denial,
1017
1023
  )
1018
1024
  messages_to_persist = (initial_messages or []) + tool_call_messages
1019
1025
 
1020
1026
  persisted_messages = await self.message_manager.create_many_messages_async(
1021
- messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
1027
+ messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
1022
1028
  )
1023
1029
 
1024
- if run_id:
1025
- await self.job_manager.add_messages_to_job_async(
1026
- job_id=run_id,
1027
- message_ids=[m.id for m in persisted_messages if m.role != "user"],
1028
- actor=self.actor,
1029
- )
1030
-
1031
1030
  return persisted_messages, continue_stepping, stop_reason
1032
1031
 
1033
1032
  @trace_method
@@ -1085,6 +1084,7 @@ class LettaAgentV2(BaseAgentV2):
1085
1084
  agent_state: AgentState,
1086
1085
  agent_step_span: Span | None = None,
1087
1086
  step_id: str | None = None,
1087
+ run_id: str = None,
1088
1088
  ) -> "ToolExecutionResult":
1089
1089
  """
1090
1090
  Executes a tool and returns the ToolExecutionResult.
@@ -1110,9 +1110,9 @@ class LettaAgentV2(BaseAgentV2):
1110
1110
  tool_execution_manager = ToolExecutionManager(
1111
1111
  agent_state=agent_state,
1112
1112
  message_manager=self.message_manager,
1113
+ run_manager=self.run_manager,
1113
1114
  agent_manager=self.agent_manager,
1114
1115
  block_manager=self.block_manager,
1115
- job_manager=self.job_manager,
1116
1116
  passage_manager=self.passage_manager,
1117
1117
  sandbox_env_vars=sandbox_env_vars,
1118
1118
  actor=self.actor,
@@ -1148,28 +1148,38 @@ class LettaAgentV2(BaseAgentV2):
1148
1148
  total_tokens: int | None = None,
1149
1149
  force: bool = False,
1150
1150
  ) -> list[Message]:
1151
+ # always skip summarization if last message is an approval request message
1152
+ skip_summarization = False
1153
+ latest_messages = in_context_messages + new_letta_messages
1154
+ if latest_messages[-1].role == "approval" and len(latest_messages[-1].tool_calls) > 0:
1155
+ skip_summarization = True
1156
+
1151
1157
  # If total tokens is reached, we truncate down
1152
1158
  # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
1153
1159
  # TODO: `force` and `clear` seem to no longer be used, we should remove
1154
- if force or (total_tokens and total_tokens > self.agent_state.llm_config.context_window):
1155
- self.logger.warning(
1156
- f"Total tokens {total_tokens} exceeds configured max tokens {self.agent_state.llm_config.context_window}, forcefully clearing message history."
1157
- )
1158
- new_in_context_messages, updated = await self.summarizer.summarize(
1159
- in_context_messages=in_context_messages,
1160
- new_letta_messages=new_letta_messages,
1161
- force=True,
1162
- clear=True,
1163
- )
1160
+ if not skip_summarization:
1161
+ if force or (total_tokens and total_tokens > self.agent_state.llm_config.context_window):
1162
+ self.logger.warning(
1163
+ f"Total tokens {total_tokens} exceeds configured max tokens {self.agent_state.llm_config.context_window}, forcefully clearing message history."
1164
+ )
1165
+ new_in_context_messages, updated = await self.summarizer.summarize(
1166
+ in_context_messages=in_context_messages,
1167
+ new_letta_messages=new_letta_messages,
1168
+ force=True,
1169
+ clear=True,
1170
+ )
1171
+ else:
1172
+ # NOTE (Sarah): Seems like this is doing nothing?
1173
+ self.logger.info(
1174
+ f"Total tokens {total_tokens} does not exceed configured max tokens {self.agent_state.llm_config.context_window}, passing summarizing w/o force."
1175
+ )
1176
+ new_in_context_messages, updated = await self.summarizer.summarize(
1177
+ in_context_messages=in_context_messages,
1178
+ new_letta_messages=new_letta_messages,
1179
+ )
1164
1180
  else:
1165
- # NOTE (Sarah): Seems like this is doing nothing?
1166
- self.logger.info(
1167
- f"Total tokens {total_tokens} does not exceed configured max tokens {self.agent_state.llm_config.context_window}, passing summarizing w/o force."
1168
- )
1169
- new_in_context_messages, updated = await self.summarizer.summarize(
1170
- in_context_messages=in_context_messages,
1171
- new_letta_messages=new_letta_messages,
1172
- )
1181
+ new_in_context_messages = in_context_messages + new_letta_messages
1182
+
1173
1183
  message_ids = [m.id for m in new_in_context_messages]
1174
1184
  await self.agent_manager.update_message_ids_async(
1175
1185
  agent_id=self.agent_state.id,
@@ -1195,7 +1205,7 @@ class LettaAgentV2(BaseAgentV2):
1195
1205
  tool_execution_ns=step_metrics.tool_execution_ns,
1196
1206
  step_ns=step_metrics.step_ns,
1197
1207
  agent_id=self.agent_state.id,
1198
- job_id=run_id,
1208
+ run_id=run_id,
1199
1209
  project_id=self.agent_state.project_id,
1200
1210
  template_id=self.agent_state.template_id,
1201
1211
  base_template_id=self.agent_state.base_template_id,
@@ -1219,15 +1229,15 @@ class LettaAgentV2(BaseAgentV2):
1219
1229
  if request_span:
1220
1230
  request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
1221
1231
  await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
1222
- if settings.track_agent_run and run_id:
1223
- await self.job_manager.record_response_duration(run_id, duration_ns, self.actor)
1224
- await self.job_manager.safe_update_job_status_async(
1225
- job_id=run_id,
1226
- new_status=JobStatus.failed if is_error else JobStatus.completed,
1227
- actor=self.actor,
1228
- metadata=job_update_metadata,
1229
- stop_reason=self.stop_reason.stop_reason if self.stop_reason else StopReasonType.error,
1230
- )
1232
+ # if settings.track_agent_run and run_id:
1233
+ # await self.job_manager.record_response_duration(run_id, duration_ns, self.actor)
1234
+ # await self.job_manager.safe_update_job_status_async(
1235
+ # job_id=run_id,
1236
+ # new_status=JobStatus.failed if is_error else JobStatus.completed,
1237
+ # actor=self.actor,
1238
+ # stop_reason=self.stop_reason.stop_reason if self.stop_reason else StopReasonType.error,
1239
+ # metadata=job_update_metadata,
1240
+ # )
1231
1241
  if request_span:
1232
1242
  request_span.end()
1233
1243