letta-nightly 0.6.53.dev20250418104238__py3-none-any.whl → 0.6.54.dev20250419194309__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +6 -31
- letta/agents/letta_agent.py +1 -0
- letta/agents/letta_agent_batch.py +369 -18
- letta/constants.py +15 -4
- letta/functions/function_sets/base.py +168 -21
- letta/groups/sleeptime_multi_agent.py +3 -3
- letta/helpers/converters.py +1 -1
- letta/helpers/message_helper.py +1 -0
- letta/jobs/llm_batch_job_polling.py +39 -10
- letta/jobs/scheduler.py +54 -13
- letta/jobs/types.py +26 -6
- letta/llm_api/anthropic_client.py +3 -1
- letta/llm_api/llm_api_tools.py +7 -1
- letta/llm_api/openai.py +2 -0
- letta/orm/agent.py +5 -29
- letta/orm/base.py +2 -2
- letta/orm/enums.py +1 -0
- letta/orm/job.py +5 -0
- letta/orm/llm_batch_items.py +2 -2
- letta/orm/llm_batch_job.py +5 -2
- letta/orm/message.py +12 -4
- letta/orm/passage.py +0 -6
- letta/orm/sqlalchemy_base.py +0 -3
- letta/personas/examples/sleeptime_doc_persona.txt +2 -0
- letta/prompts/system/sleeptime.txt +20 -11
- letta/prompts/system/sleeptime_doc_ingest.txt +35 -0
- letta/schemas/agent.py +24 -1
- letta/schemas/enums.py +3 -1
- letta/schemas/job.py +39 -0
- letta/schemas/letta_message.py +24 -7
- letta/schemas/letta_request.py +7 -2
- letta/schemas/letta_response.py +3 -1
- letta/schemas/llm_batch_job.py +4 -3
- letta/schemas/llm_config.py +6 -2
- letta/schemas/message.py +11 -1
- letta/schemas/providers.py +10 -58
- letta/serialize_schemas/marshmallow_agent.py +25 -22
- letta/serialize_schemas/marshmallow_message.py +1 -1
- letta/server/db.py +75 -49
- letta/server/rest_api/app.py +1 -0
- letta/server/rest_api/interface.py +7 -2
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +33 -6
- letta/server/rest_api/routers/v1/messages.py +132 -0
- letta/server/rest_api/routers/v1/sources.py +21 -2
- letta/server/rest_api/utils.py +23 -10
- letta/server/server.py +67 -21
- letta/services/agent_manager.py +44 -21
- letta/services/group_manager.py +2 -2
- letta/services/helpers/agent_manager_helper.py +5 -3
- letta/services/job_manager.py +34 -5
- letta/services/llm_batch_manager.py +200 -57
- letta/services/message_manager.py +23 -1
- letta/services/passage_manager.py +2 -2
- letta/services/tool_executor/tool_execution_manager.py +13 -3
- letta/services/tool_executor/tool_execution_sandbox.py +0 -1
- letta/services/tool_executor/tool_executor.py +48 -9
- letta/services/tool_sandbox/base.py +24 -6
- letta/services/tool_sandbox/e2b_sandbox.py +25 -5
- letta/services/tool_sandbox/local_sandbox.py +23 -7
- letta/settings.py +2 -2
- {letta_nightly-0.6.53.dev20250418104238.dist-info → letta_nightly-0.6.54.dev20250419194309.dist-info}/METADATA +2 -1
- {letta_nightly-0.6.53.dev20250418104238.dist-info → letta_nightly-0.6.54.dev20250419194309.dist-info}/RECORD +67 -65
- letta/sleeptime_agent.py +0 -61
- {letta_nightly-0.6.53.dev20250418104238.dist-info → letta_nightly-0.6.54.dev20250419194309.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.53.dev20250418104238.dist-info → letta_nightly-0.6.54.dev20250419194309.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.53.dev20250418104238.dist-info → letta_nightly-0.6.54.dev20250419194309.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Optional
|
2
2
|
|
3
3
|
from letta.agent import Agent
|
4
|
+
from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING
|
4
5
|
|
5
6
|
|
6
7
|
def send_message(self: "Agent", message: str) -> Optional[str]:
|
@@ -195,40 +196,186 @@ def finish_rethinking_memory(agent_state: "AgentState") -> None: # type: ignore
|
|
195
196
|
return None
|
196
197
|
|
197
198
|
|
198
|
-
|
199
|
+
## Attempted v2 of sleep-time function set, meant to work better across all types
|
200
|
+
|
201
|
+
SNIPPET_LINES: int = 4
|
202
|
+
|
203
|
+
|
204
|
+
# Based off of: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/tools/edit.py?ref=musings.yasyf.com#L154
|
205
|
+
def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str: Optional[str] = None) -> str: # type: ignore
|
199
206
|
"""
|
200
|
-
|
207
|
+
The memory_replace command allows you to replace a specific string in a memory block with a new string. This is used for making precise edits.
|
201
208
|
|
202
209
|
Args:
|
203
|
-
|
210
|
+
label (str): Section of the memory to be edited, identified by its label.
|
211
|
+
old_str (str): The text to replace (must match exactly, including whitespace and indentation).
|
212
|
+
new_str (Optional[str]): The new text to insert in place of the old text. Omit this argument to delete the old_str.
|
204
213
|
|
205
214
|
Returns:
|
206
|
-
|
215
|
+
str: The success message
|
207
216
|
"""
|
208
|
-
|
217
|
+
import re
|
218
|
+
|
219
|
+
if bool(re.search(r"\nLine \d+: ", old_str)):
|
220
|
+
raise ValueError(
|
221
|
+
"old_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
|
222
|
+
)
|
223
|
+
if CORE_MEMORY_LINE_NUMBER_WARNING in old_str:
|
224
|
+
raise ValueError(
|
225
|
+
"old_str contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
|
226
|
+
)
|
227
|
+
if bool(re.search(r"\nLine \d+: ", new_str)):
|
228
|
+
raise ValueError(
|
229
|
+
"new_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
|
230
|
+
)
|
231
|
+
|
232
|
+
old_str = str(old_str).expandtabs()
|
233
|
+
new_str = str(new_str).expandtabs()
|
234
|
+
current_value = str(agent_state.memory.get_block(label).value).expandtabs()
|
235
|
+
|
236
|
+
# Check if old_str is unique in the block
|
237
|
+
occurences = current_value.count(old_str)
|
238
|
+
if occurences == 0:
|
239
|
+
raise ValueError(f"No replacement was performed, old_str `{old_str}` did not appear verbatim in memory block with label `{label}`.")
|
240
|
+
elif occurences > 1:
|
241
|
+
content_value_lines = current_value.split("\n")
|
242
|
+
lines = [idx + 1 for idx, line in enumerate(content_value_lines) if old_str in line]
|
243
|
+
raise ValueError(
|
244
|
+
f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique."
|
245
|
+
)
|
246
|
+
|
247
|
+
# Replace old_str with new_str
|
248
|
+
new_value = current_value.replace(str(old_str), str(new_str))
|
249
|
+
|
250
|
+
# Write the new content to the block
|
251
|
+
agent_state.memory.update_block_value(label=label, value=new_value)
|
209
252
|
|
253
|
+
# Create a snippet of the edited section
|
254
|
+
SNIPPET_LINES = 3
|
255
|
+
replacement_line = current_value.split(old_str)[0].count("\n")
|
256
|
+
start_line = max(0, replacement_line - SNIPPET_LINES)
|
257
|
+
end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
|
258
|
+
snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1])
|
210
259
|
|
211
|
-
|
260
|
+
# Prepare the success message
|
261
|
+
success_msg = f"The core memory block with label `{label}` has been edited. "
|
262
|
+
# success_msg += self._make_output(
|
263
|
+
# snippet, f"a snippet of {path}", start_line + 1
|
264
|
+
# )
|
265
|
+
# success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
|
266
|
+
success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
|
267
|
+
|
268
|
+
# return None
|
269
|
+
return success_msg
|
270
|
+
|
271
|
+
|
272
|
+
def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_line: int = -1) -> Optional[str]: # type: ignore
|
212
273
|
"""
|
213
|
-
|
274
|
+
The memory_insert command allows you to insert text at a specific location in a memory block.
|
214
275
|
|
215
276
|
Args:
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
277
|
+
label (str): Section of the memory to be edited, identified by its label.
|
278
|
+
new_str (str): The text to insert.
|
279
|
+
insert_line (int): The line number after which to insert the text (0 for beginning of file). Defaults to -1 (end of the file).
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
Optional[str]: None is always returned as this function does not produce a response.
|
283
|
+
"""
|
284
|
+
import re
|
285
|
+
|
286
|
+
if bool(re.search(r"\nLine \d+: ", new_str)):
|
287
|
+
raise ValueError(
|
288
|
+
"new_str contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
|
289
|
+
)
|
290
|
+
if CORE_MEMORY_LINE_NUMBER_WARNING in new_str:
|
291
|
+
raise ValueError(
|
292
|
+
"new_str contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
|
293
|
+
)
|
294
|
+
|
295
|
+
current_value = str(agent_state.memory.get_block(label).value).expandtabs()
|
296
|
+
new_str = str(new_str).expandtabs()
|
297
|
+
current_value_lines = current_value.split("\n")
|
298
|
+
n_lines = len(current_value_lines)
|
299
|
+
|
300
|
+
# Check if we're in range, from 0 (pre-line), to 1 (first line), to n_lines (last line)
|
301
|
+
if insert_line < 0 or insert_line > n_lines:
|
302
|
+
raise ValueError(
|
303
|
+
f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the memory block: {[0, n_lines]}, or -1 to append to the end of the memory block."
|
304
|
+
)
|
305
|
+
|
306
|
+
# Insert the new string as a line
|
307
|
+
new_str_lines = new_str.split("\n")
|
308
|
+
new_value_lines = current_value_lines[:insert_line] + new_str_lines + current_value_lines[insert_line:]
|
309
|
+
snippet_lines = (
|
310
|
+
current_value_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
|
311
|
+
+ new_str_lines
|
312
|
+
+ current_value_lines[insert_line : insert_line + SNIPPET_LINES]
|
313
|
+
)
|
314
|
+
|
315
|
+
# Collate into the new value to update
|
316
|
+
new_value = "\n".join(new_value_lines)
|
317
|
+
snippet = "\n".join(snippet_lines)
|
318
|
+
|
319
|
+
# Write into the block
|
320
|
+
agent_state.memory.update_block_value(label=label, value=new_value)
|
321
|
+
|
322
|
+
# Prepare the success message
|
323
|
+
success_msg = f"The core memory block with label `{label}` has been edited. "
|
324
|
+
# success_msg += self._make_output(
|
325
|
+
# snippet,
|
326
|
+
# "a snippet of the edited file",
|
327
|
+
# max(1, insert_line - SNIPPET_LINES + 1),
|
328
|
+
# )
|
329
|
+
# success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
|
330
|
+
success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
|
331
|
+
|
332
|
+
return success_msg
|
333
|
+
|
334
|
+
|
335
|
+
def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> None:
|
336
|
+
"""
|
337
|
+
The memory_rethink command allows you to completely rewrite the contents of a memory block. Use this tool to make large sweeping changes (e.g. when you want to condense or reorganize the memory blocks), do NOT use this tool to make small precise edits (e.g. add or remove a line, replace a specific string, etc).
|
338
|
+
|
339
|
+
Args:
|
340
|
+
label (str): The memory block to be rewritten, identified by its label.
|
341
|
+
new_memory (str): The new memory contents with information integrated from existing memory blocks and the conversation context.
|
220
342
|
|
221
343
|
Returns:
|
222
344
|
None: None is always returned as this function does not produce a response.
|
223
345
|
"""
|
224
|
-
|
225
|
-
|
226
|
-
if
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
346
|
+
import re
|
347
|
+
|
348
|
+
if bool(re.search(r"\nLine \d+: ", new_memory)):
|
349
|
+
raise ValueError(
|
350
|
+
"new_memory contains a line number prefix, which is not allowed. Do not include line numbers when calling memory tools (line numbers are for display purposes only)."
|
351
|
+
)
|
352
|
+
if CORE_MEMORY_LINE_NUMBER_WARNING in new_memory:
|
353
|
+
raise ValueError(
|
354
|
+
"new_memory contains a line number warning, which is not allowed. Do not include line number information when calling memory tools (line numbers are for display purposes only)."
|
355
|
+
)
|
356
|
+
|
357
|
+
if agent_state.memory.get_block(label) is None:
|
358
|
+
agent_state.memory.create_block(label=label, value=new_memory)
|
359
|
+
|
360
|
+
agent_state.memory.update_block_value(label=label, value=new_memory)
|
361
|
+
|
362
|
+
# Prepare the success message
|
363
|
+
success_msg = f"The core memory block with label `{label}` has been edited. "
|
364
|
+
# success_msg += self._make_output(
|
365
|
+
# snippet, f"a snippet of {path}", start_line + 1
|
366
|
+
# )
|
367
|
+
# success_msg += f"A snippet of core memory block `{label}`:\n{snippet}\n"
|
368
|
+
success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the memory block again if necessary."
|
369
|
+
|
370
|
+
# return None
|
371
|
+
return success_msg
|
372
|
+
|
373
|
+
|
374
|
+
def memory_finish_edits(agent_state: "AgentState") -> None: # type: ignore
|
375
|
+
"""
|
376
|
+
Call the memory_finish_edits command when you are finished making edits (integrating all new information) into the memory blocks. This function is called when the agent is done rethinking the memory.
|
377
|
+
|
378
|
+
Returns:
|
379
|
+
Optional[str]: None is always returned as this function does not produce a response.
|
380
|
+
"""
|
234
381
|
return None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import threading
|
3
|
-
from datetime import datetime
|
3
|
+
from datetime import datetime, timezone
|
4
4
|
from typing import List, Optional
|
5
5
|
|
6
6
|
from letta.agent import Agent, AgentState
|
@@ -154,7 +154,7 @@ class SleeptimeMultiAgent(Agent):
|
|
154
154
|
)
|
155
155
|
job_update = JobUpdate(
|
156
156
|
status=JobStatus.completed,
|
157
|
-
completed_at=datetime.
|
157
|
+
completed_at=datetime.now(timezone.utc),
|
158
158
|
metadata={
|
159
159
|
"result": result.model_dump(mode="json"),
|
160
160
|
"agent_id": participant_agent.agent_state.id,
|
@@ -165,7 +165,7 @@ class SleeptimeMultiAgent(Agent):
|
|
165
165
|
except Exception as e:
|
166
166
|
job_update = JobUpdate(
|
167
167
|
status=JobStatus.failed,
|
168
|
-
completed_at=datetime.
|
168
|
+
completed_at=datetime.now(timezone.utc),
|
169
169
|
metadata={"error": str(e)},
|
170
170
|
)
|
171
171
|
self.job_manager.update_job_by_id(job_id=run_id, job_update=job_update, actor=self.user)
|
letta/helpers/converters.py
CHANGED
@@ -154,7 +154,7 @@ def deserialize_tool_calls(data: Optional[List[Dict]]) -> List[OpenAIToolCall]:
|
|
154
154
|
calls = []
|
155
155
|
for item in data:
|
156
156
|
func_data = item.pop("function", None)
|
157
|
-
tool_call_function = OpenAIFunction(**func_data)
|
157
|
+
tool_call_function = OpenAIFunction(**func_data)
|
158
158
|
calls.append(OpenAIToolCall(function=tool_call_function, **item))
|
159
159
|
|
160
160
|
return calls
|
letta/helpers/message_helper.py
CHANGED
@@ -2,11 +2,14 @@ import asyncio
|
|
2
2
|
import datetime
|
3
3
|
from typing import List
|
4
4
|
|
5
|
+
from letta.agents.letta_agent_batch import LettaAgentBatch
|
5
6
|
from letta.jobs.helpers import map_anthropic_batch_job_status_to_job_status, map_anthropic_individual_batch_item_status_to_job_status
|
6
|
-
from letta.jobs.types import
|
7
|
+
from letta.jobs.types import BatchPollingResult, ItemUpdateInfo
|
7
8
|
from letta.log import get_logger
|
8
9
|
from letta.schemas.enums import JobStatus, ProviderType
|
10
|
+
from letta.schemas.letta_response import LettaBatchResponse
|
9
11
|
from letta.schemas.llm_batch_job import LLMBatchJob
|
12
|
+
from letta.schemas.user import User
|
10
13
|
from letta.server.server import SyncServer
|
11
14
|
|
12
15
|
logger = get_logger(__name__)
|
@@ -49,14 +52,14 @@ async def fetch_batch_status(server: SyncServer, batch_job: LLMBatchJob) -> Batc
|
|
49
52
|
response = await server.anthropic_async_client.beta.messages.batches.retrieve(batch_id_str)
|
50
53
|
new_status = map_anthropic_batch_job_status_to_job_status(response.processing_status)
|
51
54
|
logger.debug(f"[Poll BatchJob] Batch {batch_job.id}: provider={response.processing_status} → internal={new_status}")
|
52
|
-
return (batch_job.id, new_status, response)
|
55
|
+
return BatchPollingResult(batch_job.id, new_status, response)
|
53
56
|
except Exception as e:
|
54
|
-
logger.
|
57
|
+
logger.error(f"[Poll BatchJob] Batch {batch_job.id}: failed to retrieve {batch_id_str}: {e}")
|
55
58
|
# We treat a retrieval error as still running to try again next cycle
|
56
|
-
return (batch_job.id, JobStatus.running, None)
|
59
|
+
return BatchPollingResult(batch_job.id, JobStatus.running, None)
|
57
60
|
|
58
61
|
|
59
|
-
async def fetch_batch_items(server: SyncServer, batch_id:
|
62
|
+
async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: str) -> List[ItemUpdateInfo]:
|
60
63
|
"""
|
61
64
|
Fetch individual item results for a completed batch.
|
62
65
|
|
@@ -73,7 +76,7 @@ async def fetch_batch_items(server: SyncServer, batch_id: BatchId, batch_resp_id
|
|
73
76
|
async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
|
74
77
|
# Here, custom_id should be the agent_id
|
75
78
|
item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
|
76
|
-
updates.append((batch_id, item_result.custom_id, item_status, item_result))
|
79
|
+
updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))
|
77
80
|
logger.info(f"[Poll BatchJob] Fetched {len(updates)} item updates for batch {batch_id}.")
|
78
81
|
except Exception as e:
|
79
82
|
logger.error(f"[Poll BatchJob] Error fetching item updates for batch {batch_id}: {e}")
|
@@ -102,7 +105,7 @@ async def poll_batch_updates(server: SyncServer, batch_jobs: List[LLMBatchJob],
|
|
102
105
|
results: List[BatchPollingResult] = await asyncio.gather(*coros)
|
103
106
|
|
104
107
|
# Update the server with batch status changes
|
105
|
-
server.batch_manager.
|
108
|
+
server.batch_manager.bulk_update_llm_batch_statuses(updates=results)
|
106
109
|
logger.info(f"[Poll BatchJob] Bulk-updated {len(results)} LLM batch(es) in the DB at job level.")
|
107
110
|
|
108
111
|
return results
|
@@ -156,7 +159,7 @@ async def process_completed_batches(
|
|
156
159
|
return item_updates
|
157
160
|
|
158
161
|
|
159
|
-
async def poll_running_llm_batches(server: "SyncServer") ->
|
162
|
+
async def poll_running_llm_batches(server: "SyncServer") -> List[LettaBatchResponse]:
|
160
163
|
"""
|
161
164
|
Cron job to poll all running LLM batch jobs and update their polling responses in bulk.
|
162
165
|
|
@@ -176,7 +179,7 @@ async def poll_running_llm_batches(server: "SyncServer") -> None:
|
|
176
179
|
|
177
180
|
try:
|
178
181
|
# 1. Retrieve running batch jobs
|
179
|
-
batches = server.batch_manager.
|
182
|
+
batches = server.batch_manager.list_running_llm_batches()
|
180
183
|
metrics.total_batches = len(batches)
|
181
184
|
|
182
185
|
# TODO: Expand to more providers
|
@@ -193,7 +196,33 @@ async def poll_running_llm_batches(server: "SyncServer") -> None:
|
|
193
196
|
# 6. Bulk update all items for newly completed batch(es)
|
194
197
|
if item_updates:
|
195
198
|
metrics.updated_items_count = len(item_updates)
|
196
|
-
server.batch_manager.
|
199
|
+
server.batch_manager.bulk_update_batch_llm_items_results_by_agent(item_updates)
|
200
|
+
|
201
|
+
# ─── Kick off post‑processing for each batch that just completed ───
|
202
|
+
completed = [r for r in batch_results if r.request_status == JobStatus.completed]
|
203
|
+
|
204
|
+
async def _resume(batch_row: LLMBatchJob) -> LettaBatchResponse:
|
205
|
+
actor: User = server.user_manager.get_user_by_id(batch_row.created_by_id)
|
206
|
+
runner = LettaAgentBatch(
|
207
|
+
message_manager=server.message_manager,
|
208
|
+
agent_manager=server.agent_manager,
|
209
|
+
block_manager=server.block_manager,
|
210
|
+
passage_manager=server.passage_manager,
|
211
|
+
batch_manager=server.batch_manager,
|
212
|
+
sandbox_config_manager=server.sandbox_config_manager,
|
213
|
+
job_manager=server.job_manager,
|
214
|
+
actor=actor,
|
215
|
+
)
|
216
|
+
return await runner.resume_step_after_request(
|
217
|
+
letta_batch_id=batch_row.letta_batch_job_id,
|
218
|
+
llm_batch_id=batch_row.id,
|
219
|
+
)
|
220
|
+
|
221
|
+
# launch them all at once
|
222
|
+
tasks = [_resume(server.batch_manager.get_llm_batch_job_by_id(bid)) for bid, *_ in completed]
|
223
|
+
new_batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
|
224
|
+
|
225
|
+
return new_batch_responses
|
197
226
|
else:
|
198
227
|
logger.info("[Poll BatchJob] No item-level updates needed.")
|
199
228
|
|
letta/jobs/scheduler.py
CHANGED
@@ -4,27 +4,68 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
4
4
|
from apscheduler.triggers.interval import IntervalTrigger
|
5
5
|
|
6
6
|
from letta.jobs.llm_batch_job_polling import poll_running_llm_batches
|
7
|
+
from letta.log import get_logger
|
8
|
+
from letta.server.db import db_context
|
7
9
|
from letta.server.server import SyncServer
|
8
10
|
from letta.settings import settings
|
9
11
|
|
10
12
|
scheduler = AsyncIOScheduler()
|
13
|
+
logger = get_logger(__name__)
|
14
|
+
STARTUP_LOCK_KEY = 0x12345678ABCDEF00
|
15
|
+
|
16
|
+
_startup_lock_conn = None
|
17
|
+
_startup_lock_cur = None
|
11
18
|
|
12
19
|
|
13
20
|
def start_cron_jobs(server: SyncServer):
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
global _startup_lock_conn, _startup_lock_cur
|
22
|
+
|
23
|
+
if not settings.enable_batch_job_polling:
|
24
|
+
return
|
25
|
+
|
26
|
+
with db_context() as session:
|
27
|
+
engine = session.get_bind()
|
28
|
+
|
29
|
+
raw = engine.raw_connection()
|
30
|
+
cur = raw.cursor()
|
31
|
+
cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
|
32
|
+
got = cur.fetchone()[0]
|
33
|
+
if not got:
|
34
|
+
cur.close()
|
35
|
+
raw.close()
|
36
|
+
logger.info("Batch‐poller lock already held – not starting scheduler in this worker")
|
37
|
+
return
|
38
|
+
|
39
|
+
_startup_lock_conn, _startup_lock_cur = raw, cur
|
40
|
+
jitter_seconds = 10
|
41
|
+
trigger = IntervalTrigger(
|
42
|
+
seconds=settings.poll_running_llm_batches_interval_seconds,
|
43
|
+
jitter=jitter_seconds,
|
44
|
+
)
|
45
|
+
|
46
|
+
scheduler.add_job(
|
47
|
+
poll_running_llm_batches,
|
48
|
+
args=[server],
|
49
|
+
trigger=trigger,
|
50
|
+
next_run_time=datetime.datetime.now(datetime.timezone.utc),
|
51
|
+
id="poll_llm_batches",
|
52
|
+
name="Poll LLM API batch jobs",
|
53
|
+
replace_existing=True,
|
54
|
+
)
|
55
|
+
scheduler.start()
|
56
|
+
logger.info("Started batch‐polling scheduler in this worker")
|
26
57
|
|
27
58
|
|
28
59
|
def shutdown_cron_scheduler():
|
29
|
-
|
60
|
+
global _startup_lock_conn, _startup_lock_cur
|
61
|
+
|
62
|
+
if settings.enable_batch_job_polling and scheduler.running:
|
30
63
|
scheduler.shutdown()
|
64
|
+
|
65
|
+
if _startup_lock_cur is not None:
|
66
|
+
_startup_lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
|
67
|
+
_startup_lock_conn.commit()
|
68
|
+
_startup_lock_cur.close()
|
69
|
+
_startup_lock_conn.close()
|
70
|
+
_startup_lock_cur = None
|
71
|
+
_startup_lock_conn = None
|
letta/jobs/types.py
CHANGED
@@ -1,10 +1,30 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import NamedTuple, Optional
|
2
2
|
|
3
3
|
from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchIndividualResponse
|
4
4
|
|
5
|
-
from letta.schemas.enums import JobStatus
|
5
|
+
from letta.schemas.enums import AgentStepStatus, JobStatus
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
|
8
|
+
class BatchPollingResult(NamedTuple):
|
9
|
+
llm_batch_id: str
|
10
|
+
request_status: JobStatus
|
11
|
+
batch_response: Optional[BetaMessageBatch]
|
12
|
+
|
13
|
+
|
14
|
+
class ItemUpdateInfo(NamedTuple):
|
15
|
+
llm_batch_id: str
|
16
|
+
agent_id: str
|
17
|
+
request_status: JobStatus
|
18
|
+
batch_request_result: Optional[BetaMessageBatchIndividualResponse]
|
19
|
+
|
20
|
+
|
21
|
+
class StepStatusUpdateInfo(NamedTuple):
|
22
|
+
llm_batch_id: str
|
23
|
+
agent_id: str
|
24
|
+
step_status: AgentStepStatus
|
25
|
+
|
26
|
+
|
27
|
+
class RequestStatusUpdateInfo(NamedTuple):
|
28
|
+
llm_batch_id: str
|
29
|
+
agent_id: str
|
30
|
+
request_status: JobStatus
|
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Union
|
|
4
4
|
|
5
5
|
import anthropic
|
6
6
|
from anthropic import AsyncStream
|
7
|
-
from anthropic.types import
|
7
|
+
from anthropic.types.beta import BetaMessage as AnthropicMessage
|
8
8
|
from anthropic.types.beta import BetaRawMessageStreamEvent
|
9
9
|
from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
|
10
10
|
from anthropic.types.beta.messages import BetaMessageBatch
|
@@ -304,6 +304,8 @@ class AnthropicClient(LLMClientBase):
|
|
304
304
|
|
305
305
|
return super().handle_llm_error(e)
|
306
306
|
|
307
|
+
# TODO: Input messages doesn't get used here
|
308
|
+
# TODO: Clean up this interface
|
307
309
|
def convert_response_to_chat_completion(
|
308
310
|
self,
|
309
311
|
response_data: dict,
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -247,6 +247,13 @@ def create(
|
|
247
247
|
use_structured_output=False, # NOTE: not supported atm for xAI
|
248
248
|
)
|
249
249
|
|
250
|
+
# Specific bug for the mini models (as of Apr 14, 2025)
|
251
|
+
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: presencePenalty'}
|
252
|
+
# 400 - {'code': 'Client specified an invalid argument', 'error': 'Argument not supported on this model: frequencyPenalty'}
|
253
|
+
if "grok-3-mini-" in llm_config.model:
|
254
|
+
data.presence_penalty = None
|
255
|
+
data.frequency_penalty = None
|
256
|
+
|
250
257
|
if stream: # Client requested token streaming
|
251
258
|
data.stream = True
|
252
259
|
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
@@ -322,7 +329,6 @@ def create(
|
|
322
329
|
|
323
330
|
# Force tool calling
|
324
331
|
tool_call = None
|
325
|
-
llm_config.put_inner_thoughts_in_kwargs = True
|
326
332
|
if functions is None:
|
327
333
|
# Special case for summarization path
|
328
334
|
tools = None
|
letta/llm_api/openai.py
CHANGED
@@ -136,6 +136,7 @@ def build_openai_chat_completions_request(
|
|
136
136
|
user=str(user_id),
|
137
137
|
max_completion_tokens=llm_config.max_tokens,
|
138
138
|
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
|
139
|
+
reasoning_effort=llm_config.reasoning_effort,
|
139
140
|
)
|
140
141
|
else:
|
141
142
|
data = ChatCompletionRequest(
|
@@ -146,6 +147,7 @@ def build_openai_chat_completions_request(
|
|
146
147
|
user=str(user_id),
|
147
148
|
max_completion_tokens=llm_config.max_tokens,
|
148
149
|
temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
|
150
|
+
reasoning_effort=llm_config.reasoning_effort,
|
149
151
|
)
|
150
152
|
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
151
153
|
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
letta/orm/agent.py
CHANGED
@@ -7,12 +7,11 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
7
7
|
from letta.orm.block import Block
|
8
8
|
from letta.orm.custom_columns import EmbeddingConfigColumn, LLMConfigColumn, ToolRulesColumn
|
9
9
|
from letta.orm.identity import Identity
|
10
|
-
from letta.orm.message import Message
|
11
10
|
from letta.orm.mixins import OrganizationMixin
|
12
11
|
from letta.orm.organization import Organization
|
13
12
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
14
13
|
from letta.schemas.agent import AgentState as PydanticAgentState
|
15
|
-
from letta.schemas.agent import AgentType
|
14
|
+
from letta.schemas.agent import AgentType, get_prompt_template_for_agent_type
|
16
15
|
from letta.schemas.embedding_config import EmbeddingConfig
|
17
16
|
from letta.schemas.llm_config import LLMConfig
|
18
17
|
from letta.schemas.memory import Memory
|
@@ -91,13 +90,6 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
91
90
|
back_populates="agents",
|
92
91
|
doc="Blocks forming the core memory of the agent.",
|
93
92
|
)
|
94
|
-
messages: Mapped[List["Message"]] = relationship(
|
95
|
-
"Message",
|
96
|
-
back_populates="agent",
|
97
|
-
lazy="selectin",
|
98
|
-
cascade="all, delete-orphan", # Ensure messages are deleted when the agent is deleted
|
99
|
-
passive_deletes=True,
|
100
|
-
)
|
101
93
|
tags: Mapped[List["AgentsTags"]] = relationship(
|
102
94
|
"AgentsTags",
|
103
95
|
back_populates="agent",
|
@@ -105,25 +97,6 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
105
97
|
lazy="selectin",
|
106
98
|
doc="Tags associated with the agent.",
|
107
99
|
)
|
108
|
-
source_passages: Mapped[List["SourcePassage"]] = relationship(
|
109
|
-
"SourcePassage",
|
110
|
-
secondary="sources_agents", # The join table for Agent -> Source
|
111
|
-
primaryjoin="Agent.id == sources_agents.c.agent_id",
|
112
|
-
secondaryjoin="and_(SourcePassage.source_id == sources_agents.c.source_id)",
|
113
|
-
lazy="selectin",
|
114
|
-
order_by="SourcePassage.created_at.desc()",
|
115
|
-
viewonly=True, # Ensures SQLAlchemy doesn't attempt to manage this relationship
|
116
|
-
doc="All passages derived from sources associated with this agent.",
|
117
|
-
)
|
118
|
-
agent_passages: Mapped[List["AgentPassage"]] = relationship(
|
119
|
-
"AgentPassage",
|
120
|
-
back_populates="agent",
|
121
|
-
lazy="selectin",
|
122
|
-
order_by="AgentPassage.created_at.desc()",
|
123
|
-
cascade="all, delete-orphan",
|
124
|
-
viewonly=True, # Ensures SQLAlchemy doesn't attempt to manage this relationship
|
125
|
-
doc="All passages derived created by this agent.",
|
126
|
-
)
|
127
100
|
identities: Mapped[List["Identity"]] = relationship(
|
128
101
|
"Identity",
|
129
102
|
secondary="identities_agents",
|
@@ -202,7 +175,10 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
202
175
|
"tags": lambda: [t.tag for t in self.tags],
|
203
176
|
"tools": lambda: self.tools,
|
204
177
|
"sources": lambda: [s.to_pydantic() for s in self.sources],
|
205
|
-
"memory": lambda: Memory(
|
178
|
+
"memory": lambda: Memory(
|
179
|
+
blocks=[b.to_pydantic() for b in self.core_memory],
|
180
|
+
prompt_template=get_prompt_template_for_agent_type(self.agent_type),
|
181
|
+
),
|
206
182
|
"identity_ids": lambda: [i.id for i in self.identities],
|
207
183
|
"multi_agent_group": lambda: self.multi_agent_group,
|
208
184
|
"tool_exec_environment_variables": lambda: self.tool_exec_environment_variables,
|
letta/orm/base.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from datetime import datetime
|
1
|
+
from datetime import datetime, timezone
|
2
2
|
from typing import Optional
|
3
3
|
|
4
4
|
from sqlalchemy import Boolean, DateTime, String, func, text
|
@@ -25,7 +25,7 @@ class CommonSqlalchemyMetaMixins(Base):
|
|
25
25
|
timestamp (Optional[datetime]): The timestamp to set.
|
26
26
|
If None, uses the current UTC time.
|
27
27
|
"""
|
28
|
-
self.updated_at = timestamp or datetime.
|
28
|
+
self.updated_at = timestamp or datetime.now(timezone.utc)
|
29
29
|
|
30
30
|
def _set_created_and_updated_by_fields(self, actor_id: str) -> None:
|
31
31
|
"""Populate created_by_id and last_updated_by_id based on actor."""
|
letta/orm/enums.py
CHANGED
letta/orm/job.py
CHANGED
@@ -39,6 +39,11 @@ class Job(SqlalchemyBase, UserMixin):
|
|
39
39
|
JSON, nullable=True, doc="The request configuration for the job, stored as JSON."
|
40
40
|
)
|
41
41
|
|
42
|
+
# callback related columns
|
43
|
+
callback_url: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="When set, POST to this URL after job completion.")
|
44
|
+
callback_sent_at: Mapped[Optional[datetime]] = mapped_column(nullable=True, doc="Timestamp when the callback was last attempted.")
|
45
|
+
callback_status_code: Mapped[Optional[int]] = mapped_column(nullable=True, doc="HTTP status code returned by the callback endpoint.")
|
46
|
+
|
42
47
|
# relationships
|
43
48
|
user: Mapped["User"] = relationship("User", back_populates="jobs")
|
44
49
|
job_messages: Mapped[List["JobMessage"]] = relationship("JobMessage", back_populates="job", cascade="all, delete-orphan")
|