letta-nightly 0.6.53.dev20250417104214__py3-none-any.whl → 0.6.54.dev20250419104029__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +6 -31
  3. letta/agents/letta_agent.py +1 -0
  4. letta/agents/letta_agent_batch.py +369 -18
  5. letta/constants.py +15 -4
  6. letta/functions/function_sets/base.py +168 -21
  7. letta/groups/sleeptime_multi_agent.py +3 -3
  8. letta/helpers/converters.py +1 -1
  9. letta/helpers/message_helper.py +1 -0
  10. letta/jobs/llm_batch_job_polling.py +39 -10
  11. letta/jobs/scheduler.py +54 -13
  12. letta/jobs/types.py +26 -6
  13. letta/llm_api/anthropic_client.py +3 -1
  14. letta/llm_api/llm_api_tools.py +7 -1
  15. letta/llm_api/openai.py +2 -0
  16. letta/orm/agent.py +5 -29
  17. letta/orm/base.py +2 -2
  18. letta/orm/enums.py +1 -0
  19. letta/orm/job.py +5 -0
  20. letta/orm/llm_batch_items.py +2 -2
  21. letta/orm/llm_batch_job.py +5 -2
  22. letta/orm/message.py +12 -4
  23. letta/orm/passage.py +0 -6
  24. letta/orm/sqlalchemy_base.py +0 -3
  25. letta/personas/examples/sleeptime_doc_persona.txt +2 -0
  26. letta/prompts/system/sleeptime.txt +20 -11
  27. letta/prompts/system/sleeptime_doc_ingest.txt +35 -0
  28. letta/schemas/agent.py +24 -1
  29. letta/schemas/enums.py +3 -1
  30. letta/schemas/job.py +39 -0
  31. letta/schemas/letta_message.py +24 -7
  32. letta/schemas/letta_request.py +7 -2
  33. letta/schemas/letta_response.py +3 -1
  34. letta/schemas/llm_batch_job.py +4 -3
  35. letta/schemas/llm_config.py +6 -2
  36. letta/schemas/message.py +11 -1
  37. letta/schemas/providers.py +10 -58
  38. letta/serialize_schemas/marshmallow_agent.py +25 -22
  39. letta/serialize_schemas/marshmallow_message.py +1 -1
  40. letta/server/db.py +75 -49
  41. letta/server/rest_api/app.py +1 -0
  42. letta/server/rest_api/interface.py +7 -2
  43. letta/server/rest_api/routers/v1/__init__.py +2 -0
  44. letta/server/rest_api/routers/v1/agents.py +33 -6
  45. letta/server/rest_api/routers/v1/messages.py +132 -0
  46. letta/server/rest_api/routers/v1/sources.py +21 -2
  47. letta/server/rest_api/utils.py +23 -10
  48. letta/server/server.py +67 -21
  49. letta/services/agent_manager.py +44 -21
  50. letta/services/group_manager.py +2 -2
  51. letta/services/helpers/agent_manager_helper.py +5 -3
  52. letta/services/job_manager.py +34 -5
  53. letta/services/llm_batch_manager.py +200 -57
  54. letta/services/message_manager.py +23 -1
  55. letta/services/passage_manager.py +2 -2
  56. letta/services/tool_executor/tool_execution_manager.py +13 -3
  57. letta/services/tool_executor/tool_execution_sandbox.py +0 -1
  58. letta/services/tool_executor/tool_executor.py +48 -9
  59. letta/services/tool_sandbox/base.py +24 -6
  60. letta/services/tool_sandbox/e2b_sandbox.py +25 -5
  61. letta/services/tool_sandbox/local_sandbox.py +23 -7
  62. letta/settings.py +2 -2
  63. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/METADATA +2 -1
  64. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/RECORD +67 -65
  65. letta/sleeptime_agent.py +0 -61
  66. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.6.53"
1
+ __version__ = "0.6.54"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -36,7 +36,7 @@ from letta.log import get_logger
36
36
  from letta.memory import summarize_messages
37
37
  from letta.orm import User
38
38
  from letta.orm.enums import ToolType
39
- from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent
39
+ from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
40
40
  from letta.schemas.block import BlockUpdate
41
41
  from letta.schemas.embedding_config import EmbeddingConfig
42
42
  from letta.schemas.enums import MessageRole
@@ -52,11 +52,7 @@ from letta.schemas.tool_rule import TerminalToolRule
52
52
  from letta.schemas.usage import LettaUsageStatistics
53
53
  from letta.services.agent_manager import AgentManager
54
54
  from letta.services.block_manager import BlockManager
55
- from letta.services.helpers.agent_manager_helper import (
56
- check_supports_structured_output,
57
- compile_memory_metadata_block,
58
- compile_system_message,
59
- )
55
+ from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
60
56
  from letta.services.job_manager import JobManager
61
57
  from letta.services.message_manager import MessageManager
62
58
  from letta.services.passage_manager import PassageManager
@@ -204,7 +200,8 @@ class Agent(BaseAgent):
204
200
 
205
201
  # refresh memory from DB (using block ids)
206
202
  self.agent_state.memory = Memory(
207
- blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()]
203
+ blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
204
+ prompt_template=get_prompt_template_for_agent_type(self.agent_state.agent_type),
208
205
  )
209
206
 
210
207
  # NOTE: don't do this since re-buildin the memory is handled at the start of the step
@@ -306,29 +303,6 @@ class Agent(BaseAgent):
306
303
  elif step_count is not None and step_count > 0 and len(allowed_tool_names) == 1:
307
304
  force_tool_call = allowed_tool_names[0]
308
305
 
309
- if force_tool_call == "core_memory_insert":
310
- current_system_message = message_sequence[0]
311
- new_memory = Memory(
312
- blocks=self.agent_state.memory.blocks,
313
- prompt_template=(
314
- "{% for block in blocks %}"
315
- '<{{ block.label }} characters="{{ block.value|length }}/{{ block.limit }}">\n'
316
- "{% for line in block.value.splitlines() %}"
317
- "{{ loop.index0 }}: {{ line }}\n"
318
- "{% endfor %}"
319
- "</{{ block.label }}>"
320
- "{% if not loop.last %}\n{% endif %}"
321
- "{% endfor %}"
322
- ),
323
- )
324
- new_system_message_str = compile_system_message(
325
- system_prompt=self.agent_state.system,
326
- in_context_memory=new_memory,
327
- in_context_memory_last_edit=current_system_message.created_at,
328
- previous_message_count=len(message_sequence),
329
- )
330
- message_sequence[0].content = [TextContent(text=new_system_message_str)]
331
-
332
306
  for attempt in range(1, empty_response_retry_limit + 1):
333
307
  try:
334
308
  log_telemetry(self.logger, "_get_ai_reply create start")
@@ -834,7 +808,8 @@ class Agent(BaseAgent):
834
808
  # Step 0: update core memory
835
809
  # only pulling latest block data if shared memory is being used
836
810
  current_persisted_memory = Memory(
837
- blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()]
811
+ blocks=[self.block_manager.get_block_by_id(block.id, actor=self.user) for block in self.agent_state.memory.get_blocks()],
812
+ prompt_template=get_prompt_template_for_agent_type(self.agent_state.agent_type),
838
813
  ) # read blocks from DB
839
814
  self.update_memory_if_changed(current_persisted_memory)
840
815
 
@@ -76,6 +76,7 @@ class LettaAgent(BaseAgent):
76
76
  agent_state=agent_state,
77
77
  tool_rules_solver=tool_rules_solver,
78
78
  stream=False,
79
+ # TODO: also pass in reasoning content
79
80
  )
80
81
 
81
82
  tool_call = response.choices[0].message.tool_calls[0]
@@ -1,58 +1,137 @@
1
- from typing import Dict, List
1
+ import json
2
+ import uuid
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
+
6
+ from aiomultiprocess import Pool
7
+ from anthropic.types.beta.messages import BetaMessageBatchCanceledResult, BetaMessageBatchErroredResult, BetaMessageBatchSucceededResult
2
8
 
3
9
  from letta.agents.helpers import _prepare_in_context_messages
4
10
  from letta.helpers import ToolRulesSolver
5
11
  from letta.helpers.datetime_helpers import get_utc_time
6
12
  from letta.helpers.tool_execution_helper import enable_strict_mode
13
+ from letta.jobs.types import RequestStatusUpdateInfo, StepStatusUpdateInfo
7
14
  from letta.llm_api.llm_client import LLMClient
15
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
8
16
  from letta.log import get_logger
9
17
  from letta.orm.enums import ToolType
10
18
  from letta.schemas.agent import AgentState, AgentStepState
11
- from letta.schemas.enums import JobStatus, ProviderType
19
+ from letta.schemas.enums import AgentStepStatus, JobStatus, ProviderType
20
+ from letta.schemas.job import JobUpdate
21
+ from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
12
22
  from letta.schemas.letta_request import LettaBatchRequest
13
23
  from letta.schemas.letta_response import LettaBatchResponse
24
+ from letta.schemas.llm_batch_job import LLMBatchItem
14
25
  from letta.schemas.message import Message, MessageCreate, MessageUpdate
26
+ from letta.schemas.openai.chat_completion_response import ToolCall as OpenAIToolCall
27
+ from letta.schemas.sandbox_config import SandboxConfig, SandboxType
15
28
  from letta.schemas.user import User
29
+ from letta.server.rest_api.utils import create_heartbeat_system_message, create_letta_messages_from_llm_response
16
30
  from letta.services.agent_manager import AgentManager
17
31
  from letta.services.block_manager import BlockManager
18
32
  from letta.services.helpers.agent_manager_helper import compile_system_message
33
+ from letta.services.job_manager import JobManager
19
34
  from letta.services.llm_batch_manager import LLMBatchManager
20
35
  from letta.services.message_manager import MessageManager
21
36
  from letta.services.passage_manager import PassageManager
37
+ from letta.services.sandbox_config_manager import SandboxConfigManager
38
+ from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
39
+ from letta.settings import tool_settings
22
40
  from letta.utils import united_diff
23
41
 
24
42
  logger = get_logger(__name__)
25
43
 
26
44
 
45
+ @dataclass
46
+ class ToolExecutionParams:
47
+ agent_id: str
48
+ tool_call_name: str
49
+ tool_args: Dict[str, Any]
50
+ agent_state: AgentState
51
+ actor: User
52
+ sbx_config: SandboxConfig
53
+ sbx_env_vars: Dict[str, Any]
54
+
55
+
56
+ @dataclass
57
+ class _ResumeContext:
58
+ batch_items: List[LLMBatchItem]
59
+ agent_ids: List[str]
60
+ agent_state_map: Dict[str, AgentState]
61
+ provider_results: Dict[str, Any]
62
+ tool_call_name_map: Dict[str, str]
63
+ tool_call_args_map: Dict[str, Dict[str, Any]]
64
+ should_continue_map: Dict[str, bool]
65
+ request_status_updates: List[RequestStatusUpdateInfo]
66
+
67
+
68
+ async def execute_tool_wrapper(params: ToolExecutionParams):
69
+ """
70
+ Executes the tool in an out‑of‑process worker and returns:
71
+ (agent_id, (tool_result:str, success_flag:bool))
72
+ """
73
+ # locate the tool on the agent
74
+ target_tool = next((t for t in params.agent_state.tools if t.name == params.tool_call_name), None)
75
+ if not target_tool:
76
+ return params.agent_id, (f"Tool not found: {params.tool_call_name}", False)
77
+
78
+ try:
79
+ mgr = ToolExecutionManager(
80
+ agent_state=params.agent_state,
81
+ actor=params.actor,
82
+ sandbox_config=params.sbx_config,
83
+ sandbox_env_vars=params.sbx_env_vars,
84
+ )
85
+ result, _ = await mgr.execute_tool_async(
86
+ function_name=params.tool_call_name,
87
+ function_args=params.tool_args,
88
+ tool=target_tool,
89
+ )
90
+ return params.agent_id, (result, True)
91
+ except Exception as e:
92
+ return params.agent_id, (f"Failed to call tool. Error: {e}", False)
93
+
94
+
27
95
  # TODO: Limitations ->
28
96
  # TODO: Only works with anthropic for now
29
97
  class LettaAgentBatch:
30
98
 
31
99
  def __init__(
32
100
  self,
33
- batch_id: str,
34
101
  message_manager: MessageManager,
35
102
  agent_manager: AgentManager,
36
103
  block_manager: BlockManager,
37
104
  passage_manager: PassageManager,
38
105
  batch_manager: LLMBatchManager,
106
+ sandbox_config_manager: SandboxConfigManager,
107
+ job_manager: JobManager,
39
108
  actor: User,
40
109
  use_assistant_message: bool = True,
41
110
  max_steps: int = 10,
42
111
  ):
43
- self.batch_id = batch_id
44
112
  self.message_manager = message_manager
45
113
  self.agent_manager = agent_manager
46
114
  self.block_manager = block_manager
47
115
  self.passage_manager = passage_manager
48
116
  self.batch_manager = batch_manager
117
+ self.sandbox_config_manager = sandbox_config_manager
118
+ self.job_manager = job_manager
49
119
  self.use_assistant_message = use_assistant_message
50
120
  self.actor = actor
51
121
  self.max_steps = max_steps
52
122
 
53
123
  async def step_until_request(
54
- self, batch_requests: List[LettaBatchRequest], agent_step_state_mapping: Dict[str, AgentStepState]
124
+ self,
125
+ batch_requests: List[LettaBatchRequest],
126
+ letta_batch_job_id: str,
127
+ agent_step_state_mapping: Optional[Dict[str, AgentStepState]] = None,
55
128
  ) -> LettaBatchResponse:
129
+ # Basic checks
130
+ if not batch_requests:
131
+ raise ValueError("Empty list of batch_requests passed in!")
132
+ if agent_step_state_mapping is None:
133
+ agent_step_state_mapping = {}
134
+
56
135
  agent_messages_mapping: Dict[str, List[Message]] = {}
57
136
  agent_tools_mapping: Dict[str, List[dict]] = {}
58
137
  agent_states = []
@@ -61,10 +140,17 @@ class LettaAgentBatch:
61
140
  agent_id = batch_request.agent_id
62
141
  agent_state = self.agent_manager.get_agent_by_id(agent_id, actor=self.actor)
63
142
  agent_states.append(agent_state)
64
- agent_messages_mapping[agent_id] = self.get_in_context_messages_per_agent(
143
+ agent_messages_mapping[agent_id] = self._get_in_context_messages_per_agent(
65
144
  agent_state=agent_state, input_messages=batch_request.messages
66
145
  )
67
- agent_tools_mapping[agent_id] = self.prepare_tools_per_agent(
146
+
147
+ # TODO: Think about a cleaner way to do this?
148
+ if agent_id not in agent_step_state_mapping:
149
+ agent_step_state_mapping[agent_id] = AgentStepState(
150
+ step_number=0, tool_rules_solver=ToolRulesSolver(tool_rules=agent_state.tool_rules)
151
+ )
152
+
153
+ agent_tools_mapping[agent_id] = self._prepare_tools_per_agent(
68
154
  agent_state, agent_step_state_mapping.get(agent_id).tool_rules_solver
69
155
  )
70
156
 
@@ -83,37 +169,302 @@ class LettaAgentBatch:
83
169
  )
84
170
 
85
171
  # Write the response into the jobs table, where it will get picked up by the next cron run
86
- batch_job = self.batch_manager.create_batch_job(
87
- llm_provider=ProviderType.anthropic, # TODO: Expand to more
172
+ llm_batch_job = self.batch_manager.create_llm_batch_job(
173
+ llm_provider=ProviderType.anthropic, # TODO: Expand to more providers
88
174
  create_batch_response=batch_response,
89
175
  actor=self.actor,
90
176
  status=JobStatus.running,
177
+ letta_batch_job_id=letta_batch_job_id,
91
178
  )
92
179
 
93
- # TODO: Make this much more efficient by doing creates in bulk
180
+ # Create batch items in bulk for all agents
181
+ batch_items = []
94
182
  for agent_state in agent_states:
95
183
  agent_step_state = agent_step_state_mapping.get(agent_state.id)
96
- self.batch_manager.create_batch_item(
97
- batch_id=batch_job.id,
184
+ batch_item = LLMBatchItem(
185
+ llm_batch_id=llm_batch_job.id,
98
186
  agent_id=agent_state.id,
99
187
  llm_config=agent_state.llm_config,
100
- actor=self.actor,
188
+ request_status=JobStatus.created,
189
+ step_status=AgentStepStatus.paused,
101
190
  step_state=agent_step_state,
102
191
  )
192
+ batch_items.append(batch_item)
193
+
194
+ # Create all batch items at once using the bulk operation
195
+ if batch_items:
196
+ self.batch_manager.create_llm_batch_items_bulk(batch_items, actor=self.actor)
103
197
 
104
198
  return LettaBatchResponse(
105
- batch_id=batch_job.id, status=batch_job.status, last_polled_at=get_utc_time(), created_at=batch_job.created_at
199
+ letta_batch_id=llm_batch_job.letta_batch_job_id,
200
+ last_llm_batch_id=llm_batch_job.id,
201
+ status=llm_batch_job.status,
202
+ agent_count=len(agent_states),
203
+ last_polled_at=get_utc_time(),
204
+ created_at=llm_batch_job.created_at,
205
+ )
206
+
207
+ async def resume_step_after_request(self, letta_batch_id: str, llm_batch_id: str) -> LettaBatchResponse:
208
+ # 1. gather everything we need
209
+ llm_batch_job = self.batch_manager.get_llm_batch_job_by_id(llm_batch_id=llm_batch_id, actor=self.actor)
210
+ ctx = await self._collect_resume_context(llm_batch_id)
211
+
212
+ # 2. persist request‑level status updates
213
+ self._update_request_statuses(ctx.request_status_updates)
214
+
215
+ # 3. run the tools in parallel
216
+ exec_results = await self._execute_tools(ctx)
217
+
218
+ # 4. create + save assistant/tool messages
219
+ msg_map = self._persist_tool_messages(exec_results, ctx)
220
+
221
+ # 5. mark steps complete
222
+ self._mark_steps_complete(llm_batch_id, ctx.agent_ids)
223
+
224
+ # 6. build next‑round requests / step‑state map
225
+ next_reqs, next_step_state = self._prepare_next_iteration(exec_results, ctx, msg_map)
226
+ if len(next_reqs) == 0:
227
+ # mark batch job as completed
228
+ self.job_manager.update_job_by_id(job_id=letta_batch_id, job_update=JobUpdate(status=JobStatus.completed), actor=self.actor)
229
+ return LettaBatchResponse(
230
+ letta_batch_id=llm_batch_job.letta_batch_job_id,
231
+ last_llm_batch_id=llm_batch_job.id,
232
+ status=JobStatus.completed,
233
+ agent_count=len(ctx.agent_ids),
234
+ last_polled_at=get_utc_time(),
235
+ created_at=llm_batch_job.created_at,
236
+ )
237
+
238
+ # 7. recurse into the normal stepping pipeline
239
+ return await self.step_until_request(
240
+ batch_requests=next_reqs,
241
+ letta_batch_job_id=letta_batch_id,
242
+ agent_step_state_mapping=next_step_state,
243
+ )
244
+
245
+ async def _collect_resume_context(self, llm_batch_id: str) -> _ResumeContext:
246
+ batch_items = self.batch_manager.list_llm_batch_items(llm_batch_id=llm_batch_id)
247
+
248
+ agent_ids, agent_state_map = [], {}
249
+ provider_results, name_map, args_map, cont_map = {}, {}, {}, {}
250
+ request_status_updates: List[RequestStatusUpdateInfo] = []
251
+
252
+ for item in batch_items:
253
+ aid = item.agent_id
254
+ agent_ids.append(aid)
255
+ agent_state_map[aid] = self.agent_manager.get_agent_by_id(aid, actor=self.actor)
256
+ provider_results[aid] = item.batch_request_result.result
257
+
258
+ # status bookkeeping
259
+ pr = provider_results[aid]
260
+ status = (
261
+ JobStatus.completed
262
+ if isinstance(pr, BetaMessageBatchSucceededResult)
263
+ else (
264
+ JobStatus.failed
265
+ if isinstance(pr, BetaMessageBatchErroredResult)
266
+ else JobStatus.cancelled if isinstance(pr, BetaMessageBatchCanceledResult) else JobStatus.expired
267
+ )
268
+ )
269
+ request_status_updates.append(RequestStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, request_status=status))
270
+
271
+ # translate provider‑specific response → OpenAI‑style tool call (unchanged)
272
+ llm_client = LLMClient.create(llm_config=item.llm_config, put_inner_thoughts_first=True)
273
+ tool_call = (
274
+ llm_client.convert_response_to_chat_completion(response_data=pr.message.model_dump(), input_messages=[])
275
+ .choices[0]
276
+ .message.tool_calls[0]
277
+ )
278
+
279
+ name, args, cont = self._extract_tool_call_and_decide_continue(tool_call, item.step_state)
280
+ name_map[aid], args_map[aid], cont_map[aid] = name, args, cont
281
+
282
+ return _ResumeContext(
283
+ batch_items=batch_items,
284
+ agent_ids=agent_ids,
285
+ agent_state_map=agent_state_map,
286
+ provider_results=provider_results,
287
+ tool_call_name_map=name_map,
288
+ tool_call_args_map=args_map,
289
+ should_continue_map=cont_map,
290
+ request_status_updates=request_status_updates,
106
291
  )
107
292
 
108
- async def resume_step_after_request(self, batch_id: str):
109
- pass
293
+ def _update_request_statuses(self, updates: List[RequestStatusUpdateInfo]) -> None:
294
+ if updates:
295
+ self.batch_manager.bulk_update_llm_batch_items_request_status_by_agent(updates=updates)
296
+
297
+ def _build_sandbox(self) -> Tuple[SandboxConfig, Dict[str, Any]]:
298
+ sbx_type = SandboxType.E2B if tool_settings.e2b_api_key else SandboxType.LOCAL
299
+ cfg = self.sandbox_config_manager.get_or_create_default_sandbox_config(sandbox_type=sbx_type, actor=self.actor)
300
+ env = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(cfg.id, actor=self.actor, limit=100)
301
+ return cfg, env
302
+
303
+ async def _execute_tools(self, ctx: _ResumeContext) -> Sequence[Tuple[str, Tuple[str, bool]]]:
304
+ sbx_cfg, sbx_env = self._build_sandbox()
305
+ params = [
306
+ ToolExecutionParams(
307
+ agent_id=aid,
308
+ tool_call_name=ctx.tool_call_name_map[aid],
309
+ tool_args=ctx.tool_call_args_map[aid],
310
+ agent_state=ctx.agent_state_map[aid],
311
+ actor=self.actor,
312
+ sbx_config=sbx_cfg,
313
+ sbx_env_vars=sbx_env,
314
+ )
315
+ for aid in ctx.agent_ids
316
+ ]
317
+ async with Pool() as pool:
318
+ return await pool.map(execute_tool_wrapper, params)
319
+
320
+ def _persist_tool_messages(
321
+ self,
322
+ exec_results: Sequence[Tuple[str, Tuple[str, bool]]],
323
+ ctx: _ResumeContext,
324
+ ) -> Dict[str, List[Message]]:
325
+ msg_map: Dict[str, List[Message]] = {}
326
+ for aid, (tool_res, success) in exec_results:
327
+ msgs = self._create_tool_call_messages(
328
+ agent_state=ctx.agent_state_map[aid],
329
+ tool_call_name=ctx.tool_call_name_map[aid],
330
+ tool_call_args=ctx.tool_call_args_map[aid],
331
+ tool_exec_result=tool_res,
332
+ success_flag=success,
333
+ reasoning_content=None,
334
+ )
335
+ msg_map[aid] = msgs
336
+ # flatten & persist
337
+ self.message_manager.create_many_messages([m for msgs in msg_map.values() for m in msgs], actor=self.actor)
338
+ return msg_map
339
+
340
+ def _mark_steps_complete(self, llm_batch_id: str, agent_ids: List[str]) -> None:
341
+ updates = [
342
+ StepStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, step_status=AgentStepStatus.completed) for aid in agent_ids
343
+ ]
344
+ self.batch_manager.bulk_update_llm_batch_items_step_status_by_agent(updates)
345
+
346
+ def _prepare_next_iteration(
347
+ self,
348
+ exec_results: Sequence[Tuple[str, Tuple[str, bool]]],
349
+ ctx: _ResumeContext,
350
+ msg_map: Dict[str, List[Message]],
351
+ ) -> Tuple[List[LettaBatchRequest], Dict[str, AgentStepState]]:
352
+ # who continues?
353
+ continues = [aid for aid, cont in ctx.should_continue_map.items() if cont]
354
+
355
+ success_flag_map = {aid: flag for aid, (_res, flag) in exec_results}
356
+
357
+ batch_reqs: List[LettaBatchRequest] = []
358
+ for aid in continues:
359
+ heartbeat = create_heartbeat_system_message(
360
+ agent_id=aid,
361
+ model=ctx.agent_state_map[aid].llm_config.model,
362
+ function_call_success=success_flag_map[aid],
363
+ actor=self.actor,
364
+ )
365
+ batch_reqs.append(
366
+ LettaBatchRequest(
367
+ agent_id=aid, messages=[MessageCreate.model_validate(heartbeat.model_dump(include={"role", "content", "name", "otid"}))]
368
+ )
369
+ )
370
+
371
+ # extend in‑context ids when necessary
372
+ for aid, new_msgs in msg_map.items():
373
+ ast = ctx.agent_state_map[aid]
374
+ if not ast.message_buffer_autoclear:
375
+ self.agent_manager.set_in_context_messages(
376
+ agent_id=aid,
377
+ message_ids=ast.message_ids + [m.id for m in new_msgs],
378
+ actor=self.actor,
379
+ )
380
+
381
+ # bump step number
382
+ step_map = {
383
+ item.agent_id: item.step_state.model_copy(update={"step_number": item.step_state.step_number + 1}) for item in ctx.batch_items
384
+ }
385
+ return batch_reqs, step_map
386
+
387
+ def _create_tool_call_messages(
388
+ self,
389
+ agent_state: AgentState,
390
+ tool_call_name: str,
391
+ tool_call_args: Dict[str, Any],
392
+ tool_exec_result: str,
393
+ success_flag: bool,
394
+ reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
395
+ ) -> List[Message]:
396
+ tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
397
+
398
+ tool_call_messages = create_letta_messages_from_llm_response(
399
+ agent_id=agent_state.id,
400
+ model=agent_state.llm_config.model,
401
+ function_name=tool_call_name,
402
+ function_arguments=tool_call_args,
403
+ tool_call_id=tool_call_id,
404
+ function_call_success=success_flag,
405
+ function_response=tool_exec_result,
406
+ actor=self.actor,
407
+ add_heartbeat_request_system_message=False,
408
+ reasoning_content=reasoning_content,
409
+ pre_computed_assistant_message_id=None,
410
+ pre_computed_tool_message_id=None,
411
+ )
412
+
413
+ return tool_call_messages
414
+
415
+ # TODO: This is doing a lot of dict passing
416
+ # TODO: Make the passing here typed
417
+ def _extract_tool_call_and_decide_continue(
418
+ self, tool_call: OpenAIToolCall, agent_step_state: AgentStepState
419
+ ) -> Tuple[str, Dict[str, Any], bool]:
420
+ """
421
+ Now that streaming is done, handle the final AI response.
422
+ This might yield additional SSE tokens if we do stalling.
423
+ At the end, set self._continue_execution accordingly.
424
+ """
425
+ tool_call_name = tool_call.function.name
426
+ tool_call_args_str = tool_call.function.arguments
427
+
428
+ try:
429
+ tool_args = json.loads(tool_call_args_str)
430
+ except json.JSONDecodeError:
431
+ logger.warning(f"Failed to JSON decode tool call argument string: {tool_call_args_str}")
432
+ tool_args = {}
433
+
434
+ # Get request heartbeats and coerce to bool
435
+ request_heartbeat = tool_args.pop("request_heartbeat", False)
436
+ # Pre-emptively pop out inner_thoughts
437
+ tool_args.pop(INNER_THOUGHTS_KWARG, "")
438
+
439
+ # So this is necessary, because sometimes non-structured outputs makes mistakes
440
+ if isinstance(request_heartbeat, str):
441
+ request_heartbeat = request_heartbeat.lower() == "true"
442
+ else:
443
+ request_heartbeat = bool(request_heartbeat)
444
+
445
+ continue_stepping = request_heartbeat
446
+ tool_rules_solver = agent_step_state.tool_rules_solver
447
+ tool_rules_solver.register_tool_call(tool_name=tool_call_name)
448
+ if tool_rules_solver.is_terminal_tool(tool_name=tool_call_name):
449
+ continue_stepping = False
450
+ elif tool_rules_solver.has_children_tools(tool_name=tool_call_name):
451
+ continue_stepping = True
452
+ elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
453
+ continue_stepping = True
454
+
455
+ step_count = agent_step_state.step_number
456
+ if step_count >= self.max_steps:
457
+ logger.warning("Hit max steps, stopping agent loop prematurely.")
458
+ continue_stepping = False
459
+
460
+ return tool_call_name, tool_args, continue_stepping
110
461
 
111
- def prepare_tools_per_agent(self, agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]:
462
+ def _prepare_tools_per_agent(self, agent_state: AgentState, tool_rules_solver: ToolRulesSolver) -> List[dict]:
112
463
  tools = [t for t in agent_state.tools if t.tool_type in {ToolType.CUSTOM, ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
113
464
  valid_tool_names = tool_rules_solver.get_allowed_tool_names(available_tools=set([t.name for t in tools]))
114
465
  return [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
115
466
 
116
- def get_in_context_messages_per_agent(self, agent_state: AgentState, input_messages: List[MessageCreate]) -> List[Message]:
467
+ def _get_in_context_messages_per_agent(self, agent_state: AgentState, input_messages: List[MessageCreate]) -> List[Message]:
117
468
  current_in_context_messages, new_in_context_messages = _prepare_in_context_messages(
118
469
  input_messages, agent_state, self.message_manager, self.actor
119
470
  )
letta/constants.py CHANGED
@@ -56,10 +56,10 @@ BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
56
56
  BASE_SLEEPTIME_CHAT_TOOLS = ["send_message", "conversation_search", "archival_memory_search"]
57
57
  # Base memory tools for sleeptime agent
58
58
  BASE_SLEEPTIME_TOOLS = [
59
- "rethink_memory",
60
- "finish_rethinking_memory",
61
- "view_core_memory_with_line_numbers",
62
- "core_memory_insert",
59
+ "memory_replace",
60
+ "memory_insert",
61
+ "memory_rethink",
62
+ "memory_finish_edits",
63
63
  "archival_memory_insert",
64
64
  "archival_memory_search",
65
65
  "conversation_search",
@@ -103,6 +103,11 @@ ERROR_MESSAGE_PREFIX = "Error"
103
103
 
104
104
  NON_USER_MSG_PREFIX = "[This is an automated system message hidden from the user] "
105
105
 
106
+ CORE_MEMORY_LINE_NUMBER_WARNING = (
107
+ "# NOTE: Line numbers shown below are to help during editing. Do NOT include line number prefixes in your memory edit tool calls."
108
+ )
109
+
110
+
106
111
  # Constants to do with summarization / conversation length window
107
112
  # The max amount of tokens supported by the underlying model (eg 8k for gpt-4 and Mistral 7B)
108
113
  LLM_MAX_TOKENS = {
@@ -110,6 +115,12 @@ LLM_MAX_TOKENS = {
110
115
  "deepseek-chat": 64000,
111
116
  "deepseek-reasoner": 64000,
112
117
  ## OpenAI models: https://platform.openai.com/docs/models/overview
118
+ "gpt-4.1": 1047576,
119
+ "gpt-4.1-2025-04-14": 1047576,
120
+ "gpt-4.1-mini": 1047576,
121
+ "gpt-4.1-mini-2025-04-14": 1047576,
122
+ "gpt-4.1-nano": 1047576,
123
+ "gpt-4.1-nano-2025-04-14": 1047576,
113
124
  # gpt-4.5-preview
114
125
  "gpt-4.5-preview": 128000,
115
126
  "gpt-4.5-preview-2025-02-27": 128000,