letta-nightly 0.6.53.dev20250417104214__py3-none-any.whl → 0.6.54.dev20250419104029__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +6 -31
  3. letta/agents/letta_agent.py +1 -0
  4. letta/agents/letta_agent_batch.py +369 -18
  5. letta/constants.py +15 -4
  6. letta/functions/function_sets/base.py +168 -21
  7. letta/groups/sleeptime_multi_agent.py +3 -3
  8. letta/helpers/converters.py +1 -1
  9. letta/helpers/message_helper.py +1 -0
  10. letta/jobs/llm_batch_job_polling.py +39 -10
  11. letta/jobs/scheduler.py +54 -13
  12. letta/jobs/types.py +26 -6
  13. letta/llm_api/anthropic_client.py +3 -1
  14. letta/llm_api/llm_api_tools.py +7 -1
  15. letta/llm_api/openai.py +2 -0
  16. letta/orm/agent.py +5 -29
  17. letta/orm/base.py +2 -2
  18. letta/orm/enums.py +1 -0
  19. letta/orm/job.py +5 -0
  20. letta/orm/llm_batch_items.py +2 -2
  21. letta/orm/llm_batch_job.py +5 -2
  22. letta/orm/message.py +12 -4
  23. letta/orm/passage.py +0 -6
  24. letta/orm/sqlalchemy_base.py +0 -3
  25. letta/personas/examples/sleeptime_doc_persona.txt +2 -0
  26. letta/prompts/system/sleeptime.txt +20 -11
  27. letta/prompts/system/sleeptime_doc_ingest.txt +35 -0
  28. letta/schemas/agent.py +24 -1
  29. letta/schemas/enums.py +3 -1
  30. letta/schemas/job.py +39 -0
  31. letta/schemas/letta_message.py +24 -7
  32. letta/schemas/letta_request.py +7 -2
  33. letta/schemas/letta_response.py +3 -1
  34. letta/schemas/llm_batch_job.py +4 -3
  35. letta/schemas/llm_config.py +6 -2
  36. letta/schemas/message.py +11 -1
  37. letta/schemas/providers.py +10 -58
  38. letta/serialize_schemas/marshmallow_agent.py +25 -22
  39. letta/serialize_schemas/marshmallow_message.py +1 -1
  40. letta/server/db.py +75 -49
  41. letta/server/rest_api/app.py +1 -0
  42. letta/server/rest_api/interface.py +7 -2
  43. letta/server/rest_api/routers/v1/__init__.py +2 -0
  44. letta/server/rest_api/routers/v1/agents.py +33 -6
  45. letta/server/rest_api/routers/v1/messages.py +132 -0
  46. letta/server/rest_api/routers/v1/sources.py +21 -2
  47. letta/server/rest_api/utils.py +23 -10
  48. letta/server/server.py +67 -21
  49. letta/services/agent_manager.py +44 -21
  50. letta/services/group_manager.py +2 -2
  51. letta/services/helpers/agent_manager_helper.py +5 -3
  52. letta/services/job_manager.py +34 -5
  53. letta/services/llm_batch_manager.py +200 -57
  54. letta/services/message_manager.py +23 -1
  55. letta/services/passage_manager.py +2 -2
  56. letta/services/tool_executor/tool_execution_manager.py +13 -3
  57. letta/services/tool_executor/tool_execution_sandbox.py +0 -1
  58. letta/services/tool_executor/tool_executor.py +48 -9
  59. letta/services/tool_sandbox/base.py +24 -6
  60. letta/services/tool_sandbox/e2b_sandbox.py +25 -5
  61. letta/services/tool_sandbox/local_sandbox.py +23 -7
  62. letta/settings.py +2 -2
  63. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/METADATA +2 -1
  64. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/RECORD +67 -65
  65. letta/sleeptime_agent.py +0 -61
  66. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/LICENSE +0 -0
  67. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/WHEEL +0 -0
  68. {letta_nightly-0.6.53.dev20250417104214.dist-info → letta_nightly-0.6.54.dev20250419104029.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  from letta.agent import Agent
4
+ from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING
4
5
 
5
6
 
6
7
  def send_message(self: "Agent", message: str) -> Optional[str]:
@@ -195,40 +196,186 @@ def finish_rethinking_memory(agent_state: "AgentState") -> None: # type: ignore
195
196
  return None
196
197
 
197
198
 
198
- def view_core_memory_with_line_numbers(agent_state: "AgentState", target_block_label: str) -> None: # type: ignore
199
+ ## Attempted v2 of sleep-time function set, meant to work better across all types
200
+
201
+ SNIPPET_LINES: int = 4
202
+
203
+
204
+ # Based off of: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/tools/edit.py?ref=musings.yasyf.com#L154
205
+ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str: Optional[str] = None) -> str: # type: ignore
199
206
  """
200
- View the contents of core memory in editor mode with line numbers. Called before `core_memory_insert` to see line numbers of memory block.
207
+ The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.
201
208
 
202
209
  Args:
203
- target_block_label (str): The name of the block to view.
210
+ label (str): Section of the memory to be edited, identified by its label.
211
+ old_str (str): The text to replace (must match exactly, including whitespace and indentation).
212
+ new_str (Optional[str]): The new text to insert in place of the old text. Omit this argument to delete the old_str.
204
213
 
205
214
  Returns:
206
- None: None is always returned as this function does not produce a response.
215
+ str: The success message
207
216
  """
208
- return None
217
+ import re
218
+
219
+ if bool(re.search(r"\nLine \d+: ", old_str)):
220
+ raise ValueError(
221
+ "old_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
222
+ )
223
+ if CORE_MEMORY_LINE_NUMBER_WARNING in old_str:
224
+ raise ValueError(
225
+ "old_str contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
226
+ )
227
+ if bool(re.search(r"\nLine \d+: ", new_str)):
228
+ raise ValueError(
229
+ "new_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
230
+ )
231
+
232
+ old_str = str(old_str).expandtabs()
233
+ new_str = str(new_str).expandtabs()
234
+ current_value = str(agent_state.memory.get_block(label).value).expandtabs()
235
+
236
+ # Check if old_str is unique in the block
237
+ occurences = current_value.count(old_str)
238
+ if occurences == 0:
239
+ raise ValueError(f"No replacement was performed, old_str `{old_str}` did not appear verbatim in memory block with label `{label}`.")
240
+ elif occurences > 1:
241
+ content_value_lines = current_value.split("\n")
242
+ lines = [idx + 1 for idx, line in enumerate(content_value_lines) if old_str in line]
243
+ raise ValueError(
244
+ f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique."
245
+ )
246
+
247
+ # Replace old_str with new_str
248
+ new_value = current_value.replace(str(old_str), str(new_str))
249
+
250
+ # Write the new content to the block
251
+ agent_state.memory.update_block_value(label=label, value=new_value)
209
252
 
253
+ # Create a snippet of the edited section
254
+ SNIPPET_LINES = 3
255
+ replacement_line = current_value.split(old_str)[0].count("\n")
256
+ start_line = max(0, replacement_line - SNIPPET_LINES)
257
+ end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
258
+ snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1])
210
259
 
211
- def core_memory_insert(agent_state: "AgentState", target_block_label: str, new_memory: str, line_number: Optional[int] = None, replace: bool = False) -> None: # type: ignore
260
+ # Prepare the success message
261
+ success_msg = f"The core memory block with label `{label}` has been edited. "
262
+ # success_msg += self._make_output(
263
+ # snippet, f"a snippet of {path}", start_line + 1
264
+ # )
265
+ # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
266
+ success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
267
+
268
+ # return None
269
+ return success_msg
270
+
271
+
272
+ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_line: int = -1) -> Optional[str]: # type: ignore
212
273
  """
213
- Insert new memory content into a core memory block at a specific line number. Call `view_core_memory_with_line_numbers` to see line numbers of the memory block before using this tool.
274
+ The memory_insert command allows you to insert text at a specific location in a memory block.
214
275
 
215
276
  Args:
216
- target_block_label (str): The name of the block to write to.
217
- new_memory (str): The new memory content to insert.
218
- line_number (Optional[int]): Line number to insert content into, 0 indexed (None for end of file).
219
- replace (bool): Whether to overwrite the content at the specified line number.
277
+ label (str): Section of the memory to be edited, identified by its label.
278
+ new_str (str): The text to insert.
279
+ insert_line (int): The line number after which to insert the text (0 for beginning of file). Defaults to -1 (end of the file).
280
+
281
+ Returns:
282
+ Optional[str]: None is always returned as this function does not produce a response.
283
+ """
284
+ import re
285
+
286
+ if bool(re.search(r"\nLine \d+: ", new_str)):
287
+ raise ValueError(
288
+ "new_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
289
+ )
290
+ if CORE_MEMORY_LINE_NUMBER_WARNING in new_str:
291
+ raise ValueError(
292
+ "new_str contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
293
+ )
294
+
295
+ current_value = str(agent_state.memory.get_block(label).value).expandtabs()
296
+ new_str = str(new_str).expandtabs()
297
+ current_value_lines = current_value.split("\n")
298
+ n_lines = len(current_value_lines)
299
+
300
+ # Check if we're in range, from 0 (pre-line), to 1 (first line), to n_lines (last line)
301
+ if insert_line < 0 or insert_line > n_lines:
302
+ raise ValueError(
303
+ f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the memory block: {[0, n_lines]}, or -1 to append to the end of the memory block."
304
+ )
305
+
306
+ # Insert the new string as a line
307
+ new_str_lines = new_str.split("\n")
308
+ new_value_lines = current_value_lines[:insert_line] + new_str_lines + current_value_lines[insert_line:]
309
+ snippet_lines = (
310
+ current_value_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
311
+ + new_str_lines
312
+ + current_value_lines[insert_line : insert_line + SNIPPET_LINES]
313
+ )
314
+
315
+ # Collate into the new value to update
316
+ new_value = "\n".join(new_value_lines)
317
+ snippet = "\n".join(snippet_lines)
318
+
319
+ # Write into the block
320
+ agent_state.memory.update_block_value(label=label, value=new_value)
321
+
322
+ # Prepare the success message
323
+ success_msg = f"The core memory block with label `{label}` has been edited. "
324
+ # success_msg += self._make_output(
325
+ # snippet,
326
+ # "a snippet of the edited file",
327
+ # max(1, insert_line - SNIPPET_LINES + 1),
328
+ # )
329
+ # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
330
+ success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
331
+
332
+ return success_msg
333
+
334
+
335
+ def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> None:
336
+ """
337
+ The memory_rethink command allows you to completely rewrite the contents of a memory block. Use this tool to make large sweeping changes (e.g. when you want to condense or reorganize the memory blocks), do NOT use this tool to make small precise edits (e.g. add or remove a line, replace a specific string, etc).
338
+
339
+ Args:
340
+ label (str): The memory block to be rewritten, identified by its label.
341
+ new_memory (str): The new memory contents with information integrated from existing memory blocks and the conversation context.
220
342
 
221
343
  Returns:
222
344
  None: None is always returned as this function does not produce a response.
223
345
  """
224
- current_value = str(agent_state.memory.get_block(target_block_label).value)
225
- current_value_list = current_value.split("\n")
226
- if line_number is None:
227
- line_number = len(current_value_list)
228
- if replace:
229
- current_value_list[line_number - 1] = new_memory
230
- else:
231
- current_value_list.insert(line_number, new_memory)
232
- new_value = "\n".join(current_value_list)
233
- agent_state.memory.update_block_value(label=target_block_label, value=new_value)
346
+ import re
347
+
348
+ if bool(re.search(r"\nLine \d+: ", new_memory)):
349
+ raise ValueError(
350
+ "new_memory contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
351
+ )
352
+ if CORE_MEMORY_LINE_NUMBER_WARNING in new_memory:
353
+ raise ValueError(
354
+ "new_memory contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
355
+ )
356
+
357
+ if agent_state.memory.get_block(label) is None:
358
+ agent_state.memory.create_block(label=label, value=new_memory)
359
+
360
+ agent_state.memory.update_block_value(label=label, value=new_memory)
361
+
362
+ # Prepare the success message
363
+ success_msg = f"The core memory block with label `{label}` has been edited. "
364
+ # success_msg += self._make_output(
365
+ # snippet, f"a snippet of {path}", start_line + 1
366
+ # )
367
+ # success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
368
+ success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
369
+
370
+ # return None
371
+ return success_msg
372
+
373
+
374
+ def memory_finish_edits(agent_state: "AgentState") -> None: # type: ignore
375
+ """
376
+ Call the memory_finish_edits command when you are finished making edits (integrating all new information) into the memory blocks. This function is called when the agent is done rethinking the memory.
377
+
378
+ Returns:
379
+ Optional[str]: None is always returned as this function does not produce a response.
380
+ """
234
381
  return None
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import threading
3
- from datetime import datetime
3
+ from datetime import datetime, timezone
4
4
  from typing import List, Optional
5
5
 
6
6
  from letta.agent import Agent, AgentState
@@ -154,7 +154,7 @@ class SleeptimeMultiAgent(Agent):
154
154
  )
155
155
  job_update = JobUpdate(
156
156
  status=JobStatus.completed,
157
- completed_at=datetime.utcnow(),
157
+ completed_at=datetime.now(timezone.utc),
158
158
  metadata={
159
159
  "result": result.model_dump(mode="json"),
160
160
  "agent_id": participant_agent.agent_state.id,
@@ -165,7 +165,7 @@ class SleeptimeMultiAgent(Agent):
165
165
  except Exception as e:
166
166
  job_update = JobUpdate(
167
167
  status=JobStatus.failed,
168
- completed_at=datetime.utcnow(),
168
+ completed_at=datetime.now(timezone.utc),
169
169
  metadata={"error": str(e)},
170
170
  )
171
171
  self.job_manager.update_job_by_id(job_id=run_id, job_update=job_update, actor=self.user)
@@ -154,7 +154,7 @@ def deserialize_tool_calls(data: Optional[List[Dict]]) -> List[OpenAIToolCall]:
154
154
  calls = []
155
155
  for item in data:
156
156
  func_data = item.pop("function", None)
157
- tool_call_function = OpenAIFunction(**func_data) if func_data else None
157
+ tool_call_function = OpenAIFunction(**func_data)
158
158
  calls.append(OpenAIToolCall(function=tool_call_function, **item))
159
159
 
160
160
  return calls
@@ -39,4 +39,5 @@ def prepare_input_message_create(
39
39
  tool_calls=None, # irrelevant
40
40
  tool_call_id=None,
41
41
  otid=message.otid,
42
+ sender_id=message.sender_id,
42
43
  )
@@ -2,11 +2,14 @@ import asyncio
2
2
  import datetime
3
3
  from typing import List
4
4
 
5
+ from letta.agents.letta_agent_batch import LettaAgentBatch
5
6
  from letta.jobs.helpers import map_anthropic_batch_job_status_to_job_status, map_anthropic_individual_batch_item_status_to_job_status
6
- from letta.jobs.types import BatchId, BatchPollingResult, ItemUpdateInfo
7
+ from letta.jobs.types import BatchPollingResult, ItemUpdateInfo
7
8
  from letta.log import get_logger
8
9
  from letta.schemas.enums import JobStatus, ProviderType
10
+ from letta.schemas.letta_response import LettaBatchResponse
9
11
  from letta.schemas.llm_batch_job import LLMBatchJob
12
+ from letta.schemas.user import User
10
13
  from letta.server.server import SyncServer
11
14
 
12
15
  logger = get_logger(__name__)
@@ -49,14 +52,14 @@ async def fetch_batch_status(server: SyncServer, batch_job: LLMBatchJob) -> Batc
49
52
  response = await server.anthropic_async_client.beta.messages.batches.retrieve(batch_id_str)
50
53
  new_status = map_anthropic_batch_job_status_to_job_status(response.processing_status)
51
54
  logger.debug(f"[Poll BatchJob] Batch {batch_job.id}: provider={response.processing_status} → internal={new_status}")
52
- return (batch_job.id, new_status, response)
55
+ return BatchPollingResult(batch_job.id, new_status, response)
53
56
  except Exception as e:
54
- logger.warning(f"[Poll BatchJob] Batch {batch_job.id}: failed to retrieve {batch_id_str}: {e}")
57
+ logger.error(f"[Poll BatchJob] Batch {batch_job.id}: failed to retrieve {batch_id_str}: {e}")
55
58
  # We treat a retrieval error as still running to try again next cycle
56
- return (batch_job.id, JobStatus.running, None)
59
+ return BatchPollingResult(batch_job.id, JobStatus.running, None)
57
60
 
58
61
 
59
- async def fetch_batch_items(server: SyncServer, batch_id: BatchId, batch_resp_id: str) -> List[ItemUpdateInfo]:
62
+ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: str) -> List[ItemUpdateInfo]:
60
63
  """
61
64
  Fetch individual item results for a completed batch.
62
65
 
@@ -73,7 +76,7 @@ async def fetch_batch_items(server: SyncServer, batch_id: BatchId, batch_resp_id
73
76
  async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
74
77
  # Here, custom_id should be the agent_id
75
78
  item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
76
- updates.append((batch_id, item_result.custom_id, item_status, item_result))
79
+ updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))
77
80
  logger.info(f"[Poll BatchJob] Fetched {len(updates)} item updates for batch {batch_id}.")
78
81
  except Exception as e:
79
82
  logger.error(f"[Poll BatchJob] Error fetching item updates for batch {batch_id}: {e}")
@@ -102,7 +105,7 @@ async def poll_batch_updates(server: SyncServer, batch_jobs: List[LLMBatchJob],
102
105
  results: List[BatchPollingResult] = await asyncio.gather(*coros)
103
106
 
104
107
  # Update the server with batch status changes
105
- server.batch_manager.bulk_update_batch_statuses(updates=results)
108
+ server.batch_manager.bulk_update_llm_batch_statuses(updates=results)
106
109
  logger.info(f"[Poll BatchJob] Bulk-updated {len(results)} LLM batch(es) in the DB at job level.")
107
110
 
108
111
  return results
@@ -156,7 +159,7 @@ async def process_completed_batches(
156
159
  return item_updates
157
160
 
158
161
 
159
- async def poll_running_llm_batches(server: "SyncServer") -> None:
162
+ async def poll_running_llm_batches(server: "SyncServer") -> List[LettaBatchResponse]:
160
163
  """
161
164
  Cron job to poll all running LLM batch jobs and update their polling responses in bulk.
162
165
 
@@ -176,7 +179,7 @@ async def poll_running_llm_batches(server: "SyncServer") -> None:
176
179
 
177
180
  try:
178
181
  # 1. Retrieve running batch jobs
179
- batches = server.batch_manager.list_running_batches()
182
+ batches = server.batch_manager.list_running_llm_batches()
180
183
  metrics.total_batches = len(batches)
181
184
 
182
185
  # TODO: Expand to more providers
@@ -193,7 +196,33 @@ async def poll_running_llm_batches(server: "SyncServer") -> None:
193
196
  # 6. Bulk update all items for newly completed batch(es)
194
197
  if item_updates:
195
198
  metrics.updated_items_count = len(item_updates)
196
- server.batch_manager.bulk_update_batch_items_by_agent(item_updates)
199
+ server.batch_manager.bulk_update_batch_llm_items_results_by_agent(item_updates)
200
+
201
+ # ─── Kick off post‑processing for each batch that just completed ───
202
+ completed = [r for r in batch_results if r.request_status == JobStatus.completed]
203
+
204
+ async def _resume(batch_row: LLMBatchJob) -> LettaBatchResponse:
205
+ actor: User = server.user_manager.get_user_by_id(batch_row.created_by_id)
206
+ runner = LettaAgentBatch(
207
+ message_manager=server.message_manager,
208
+ agent_manager=server.agent_manager,
209
+ block_manager=server.block_manager,
210
+ passage_manager=server.passage_manager,
211
+ batch_manager=server.batch_manager,
212
+ sandbox_config_manager=server.sandbox_config_manager,
213
+ job_manager=server.job_manager,
214
+ actor=actor,
215
+ )
216
+ return await runner.resume_step_after_request(
217
+ letta_batch_id=batch_row.letta_batch_job_id,
218
+ llm_batch_id=batch_row.id,
219
+ )
220
+
221
+ # launch them all at once
222
+ tasks = [_resume(server.batch_manager.get_llm_batch_job_by_id(bid)) for bid, *_ in completed]
223
+ new_batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
224
+
225
+ return new_batch_responses
197
226
  else:
198
227
  logger.info("[Poll BatchJob] No item-level updates needed.")
199
228
 
letta/jobs/scheduler.py CHANGED
@@ -4,27 +4,68 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
4
4
  from apscheduler.triggers.interval import IntervalTrigger
5
5
 
6
6
  from letta.jobs.llm_batch_job_polling import poll_running_llm_batches
7
+ from letta.log import get_logger
8
+ from letta.server.db import db_context
7
9
  from letta.server.server import SyncServer
8
10
  from letta.settings import settings
9
11
 
10
12
  scheduler = AsyncIOScheduler()
13
+ logger = get_logger(__name__)
14
+ STARTUP_LOCK_KEY = 0x12345678ABCDEF00
15
+
16
+ _startup_lock_conn = None
17
+ _startup_lock_cur = None
11
18
 
12
19
 
13
20
  def start_cron_jobs(server: SyncServer):
14
- """Initialize cron jobs"""
15
- if settings.enable_batch_job_polling:
16
- scheduler.add_job(
17
- poll_running_llm_batches,
18
- args=[server],
19
- trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
20
- next_run_time=datetime.datetime.now(datetime.timezone.utc),
21
- id="poll_llm_batches",
22
- name="Poll LLM API batch jobs and update status",
23
- replace_existing=True,
24
- )
25
- scheduler.start()
21
+ global _startup_lock_conn, _startup_lock_cur
22
+
23
+ if not settings.enable_batch_job_polling:
24
+ return
25
+
26
+ with db_context() as session:
27
+ engine = session.get_bind()
28
+
29
+ raw = engine.raw_connection()
30
+ cur = raw.cursor()
31
+ cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
32
+ got = cur.fetchone()[0]
33
+ if not got:
34
+ cur.close()
35
+ raw.close()
36
+ logger.info("Batch‐poller lock already held – not starting scheduler in this worker")
37
+ return
38
+
39
+ _startup_lock_conn, _startup_lock_cur = raw, cur
40
+ jitter_seconds = 10
41
+ trigger = IntervalTrigger(
42
+ seconds=settings.poll_running_llm_batches_interval_seconds,
43
+ jitter=jitter_seconds,
44
+ )
45
+
46
+ scheduler.add_job(
47
+ poll_running_llm_batches,
48
+ args=[server],
49
+ trigger=trigger,
50
+ next_run_time=datetime.datetime.now(datetime.timezone.utc),
51
+ id="poll_llm_batches",
52
+ name="Poll LLM API batch jobs",
53
+ replace_existing=True,
54
+ )
55
+ scheduler.start()
56
+ logger.info("Started batch‐polling scheduler in this worker")
26
57
 
27
58
 
28
59
  def shutdown_cron_scheduler():
29
- if settings.enable_batch_job_polling:
60
+ global _startup_lock_conn, _startup_lock_cur
61
+
62
+ if settings.enable_batch_job_polling and scheduler.running:
30
63
  scheduler.shutdown()
64
+
65
+ if _startup_lock_cur is not None:
66
+ _startup_lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
67
+ _startup_lock_conn.commit()
68
+ _startup_lock_cur.close()
69
+ _startup_lock_conn.close()
70
+ _startup_lock_cur = None
71
+ _startup_lock_conn = None
letta/jobs/types.py CHANGED
@@ -1,10 +1,30 @@
1
- from typing import Optional, Tuple
1
+ from typing import NamedTuple, Optional
2
2
 
3
3
  from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchIndividualResponse
4
4
 
5
- from letta.schemas.enums import JobStatus
5
+ from letta.schemas.enums import AgentStepStatus, JobStatus
6
6
 
7
- BatchId = str
8
- AgentId = str
9
- BatchPollingResult = Tuple[BatchId, JobStatus, Optional[BetaMessageBatch]]
10
- ItemUpdateInfo = Tuple[BatchId, AgentId, JobStatus, BetaMessageBatchIndividualResponse]
7
+
8
+ class BatchPollingResult(NamedTuple):
9
+ llm_batch_id: str
10
+ request_status: JobStatus
11
+ batch_response: Optional[BetaMessageBatch]
12
+
13
+
14
+ class ItemUpdateInfo(NamedTuple):
15
+ llm_batch_id: str
16
+ agent_id: str
17
+ request_status: JobStatus
18
+ batch_request_result: Optional[BetaMessageBatchIndividualResponse]
19
+
20
+
21
+ class StepStatusUpdateInfo(NamedTuple):
22
+ llm_batch_id: str
23
+ agent_id: str
24
+ step_status: AgentStepStatus
25
+
26
+
27
+ class RequestStatusUpdateInfo(NamedTuple):
28
+ llm_batch_id: str
29
+ agent_id: str
30
+ request_status: JobStatus
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Union
4
4
 
5
5
  import anthropic
6
6
  from anthropic import AsyncStream
7
- from anthropic.types import Message as AnthropicMessage
7
+ from anthropic.types.beta import BetaMessage as AnthropicMessage
8
8
  from anthropic.types.beta import BetaRawMessageStreamEvent
9
9
  from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
10
10
  from anthropic.types.beta.messages import BetaMessageBatch
@@ -304,6 +304,8 @@ class AnthropicClient(LLMClientBase):
304
304
 
305
305
  return super().handle_llm_error(e)
306
306
 
307
+ # TODO: Input messages doesn't get used here
308
+ # TODO: Clean up this interface
307
309
  def convert_response_to_chat_completion(
308
310
  self,
309
311
  response_data: dict,
@@ -247,6 +247,13 @@ def create(
247
247
  use_structured_output=False, # NOTE: not supported atm for xAI
248
248
  )
249
249
 
250
+ # Specific bug for the mini models (as of Apr 14, 2025)
251
+ # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
252
+ # 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
253
+ if "grok-3-mini-" in llm_config.model:
254
+ data.presence_penalty = None
255
+ data.frequency_penalty = None
256
+
250
257
  if stream: # Client requested token streaming
251
258
  data.stream = True
252
259
  assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -322,7 +329,6 @@ def create(
322
329
 
323
330
  # Force tool calling
324
331
  tool_call = None
325
- llm_config.put_inner_thoughts_in_kwargs = True
326
332
  if functions is None:
327
333
  # Special case for summarization path
328
334
  tools = None
letta/llm_api/openai.py CHANGED
@@ -136,6 +136,7 @@ def build_openai_chat_completions_request(
136
136
  user=str(user_id),
137
137
  max_completion_tokens=llm_config.max_tokens,
138
138
  temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
139
+ reasoning_effort=llm_config.reasoning_effort,
139
140
  )
140
141
  else:
141
142
  data = ChatCompletionRequest(
@@ -146,6 +147,7 @@ def build_openai_chat_completions_request(
146
147
  user=str(user_id),
147
148
  max_completion_tokens=llm_config.max_tokens,
148
149
  temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
150
+ reasoning_effort=llm_config.reasoning_effort,
149
151
  )
150
152
  # https://platform.openai.com/docs/guides/text-generation/json-mode
151
153
  # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
letta/orm/agent.py CHANGED
@@ -7,12 +7,11 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
7
7
  from letta.orm.block import Block
8
8
  from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
9
9
  from letta.orm.identity import Identity
10
- from letta.orm.message import Message
11
10
  from letta.orm.mixins import OrganizationMixin
12
11
  from letta.orm.organization import Organization
13
12
  from letta.orm.sqlalchemy_base import SqlalchemyBase
14
13
  from letta.schemas.agent import AgentState as PydanticAgentState
15
- from letta.schemas.agent import AgentType
14
+ from letta.schemas.agent import AgentType, get_prompt_template_for_agent_type
16
15
  from letta.schemas.embedding_config import EmbeddingConfig
17
16
  from letta.schemas.llm_config import LLMConfig
18
17
  from letta.schemas.memory import Memory
@@ -91,13 +90,6 @@ class Agent(SqlalchemyBase, OrganizationMixin):
91
90
  back_populates="agents",
92
91
  doc="Blocks forming the core memory of the agent.",
93
92
  )
94
- messages: Mapped[List["Message"]] = relationship(
95
- "Message",
96
- back_populates="agent",
97
- lazy="selectin",
98
- cascade="all, delete-orphan", # Ensure messages are deleted when the agent is deleted
99
- passive_deletes=True,
100
- )
101
93
  tags: Mapped[List["AgentsTags"]] = relationship(
102
94
  "AgentsTags",
103
95
  back_populates="agent",
@@ -105,25 +97,6 @@ class Agent(SqlalchemyBase, OrganizationMixin):
105
97
  lazy="selectin",
106
98
  doc="Tags associated with the agent.",
107
99
  )
108
- source_passages: Mapped[List["SourcePassage"]] = relationship(
109
- "SourcePassage",
110
- secondary="sources_agents", # The join table for Agent -> Source
111
- primaryjoin="Agent.id == sources_agents.c.agent_id",
112
- secondaryjoin="and_(SourcePassage.source_id == sources_agents.c.source_id)",
113
- lazy="selectin",
114
- order_by="SourcePassage.created_at.desc()",
115
- viewonly=True, # Ensures SQLAlchemy doesn't attempt to manage this relationship
116
- doc="All passages derived from sources associated with this agent.",
117
- )
118
- agent_passages: Mapped[List["AgentPassage"]] = relationship(
119
- "AgentPassage",
120
- back_populates="agent",
121
- lazy="selectin",
122
- order_by="AgentPassage.created_at.desc()",
123
- cascade="all, delete-orphan",
124
- viewonly=True, # Ensures SQLAlchemy doesn't attempt to manage this relationship
125
- doc="All passages derived created by this agent.",
126
- )
127
100
  identities: Mapped[List["Identity"]] = relationship(
128
101
  "Identity",
129
102
  secondary="identities_agents",
@@ -202,7 +175,10 @@ class Agent(SqlalchemyBase, OrganizationMixin):
202
175
  "tags": lambda: [t.tag for t in self.tags],
203
176
  "tools": lambda: self.tools,
204
177
  "sources": lambda: [s.to_pydantic() for s in self.sources],
205
- "memory": lambda: Memory(blocks=[b.to_pydantic() for b in self.core_memory]),
178
+ "memory": lambda: Memory(
179
+ blocks=[b.to_pydantic() for b in self.core_memory],
180
+ prompt_template=get_prompt_template_for_agent_type(self.agent_type),
181
+ ),
206
182
  "identity_ids": lambda: [i.id for i in self.identities],
207
183
  "multi_agent_group": lambda: self.multi_agent_group,
208
184
  "tool_exec_environment_variables": lambda: self.tool_exec_environment_variables,
letta/orm/base.py CHANGED
@@ -1,4 +1,4 @@
1
- from datetime import datetime
1
+ from datetime import datetime, timezone
2
2
  from typing import Optional
3
3
 
4
4
  from sqlalchemy import Boolean, DateTime, String, func, text
@@ -25,7 +25,7 @@ class CommonSqlalchemyMetaMixins(Base):
25
25
  timestamp (Optional[datetime]): The timestamp to set.
26
26
  If None, uses the current UTC time.
27
27
  """
28
- self.updated_at = timestamp or datetime.utcnow()
28
+ self.updated_at = timestamp or datetime.now(timezone.utc)
29
29
 
30
30
  def _set_created_and_updated_by_fields(self, actor_id: str) -> None:
31
31
  """Populate created_by_id and last_updated_by_id based on actor."""
letta/orm/enums.py CHANGED
@@ -16,6 +16,7 @@ class ToolType(str, Enum):
16
16
  class JobType(str, Enum):
17
17
  JOB = "job"
18
18
  RUN = "run"
19
+ BATCH = "batch"
19
20
 
20
21
 
21
22
  class ToolSourceType(str, Enum):
letta/orm/job.py CHANGED
@@ -39,6 +39,11 @@ class Job(SqlalchemyBase, UserMixin):
39
39
  JSON, nullable=True, doc="The request configuration for the job, stored as JSON."
40
40
  )
41
41
 
42
+ # callback related columns
43
+ callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
44
+ callback_sent_at: Mapped[Optional[datetime]] = mapped_column(nullable=True, doc="Timestamp when the callback was last attempted.")
45
+ callback_status_code: Mapped[Optional[int]] = mapped_column(nullable=True, doc="HTTP status code returned by the callback endpoint.")
46
+
42
47
  # relationships
43
48
  user: Mapped["User"] = relationship("User", back_populates="jobs")
44
49
  job_messages: Mapped[List["JobMessage"]] = relationship("JobMessage", back_populates="job", cascade="all, delete-orphan")