letta-nightly 0.7.8.dev20250502104219__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +2 -2
- letta/agents/helpers.py +58 -1
- letta/agents/letta_agent.py +13 -3
- letta/agents/letta_agent_batch.py +33 -17
- letta/agents/voice_agent.py +1 -2
- letta/agents/voice_sleeptime_agent.py +75 -320
- letta/functions/function_sets/multi_agent.py +1 -1
- letta/functions/function_sets/voice.py +20 -32
- letta/functions/helpers.py +7 -7
- letta/helpers/datetime_helpers.py +6 -0
- letta/helpers/message_helper.py +19 -18
- letta/jobs/scheduler.py +233 -49
- letta/llm_api/google_ai_client.py +13 -4
- letta/llm_api/google_vertex_client.py +5 -1
- letta/llm_api/openai.py +10 -2
- letta/llm_api/openai_client.py +14 -2
- letta/orm/message.py +4 -0
- letta/prompts/system/voice_sleeptime.txt +2 -3
- letta/schemas/letta_message.py +1 -0
- letta/schemas/letta_request.py +8 -1
- letta/schemas/letta_response.py +5 -0
- letta/schemas/llm_batch_job.py +6 -4
- letta/schemas/llm_config.py +9 -0
- letta/schemas/message.py +23 -2
- letta/schemas/providers.py +3 -1
- letta/server/rest_api/app.py +15 -7
- letta/server/rest_api/routers/v1/agents.py +3 -0
- letta/server/rest_api/routers/v1/messages.py +46 -1
- letta/server/rest_api/routers/v1/steps.py +1 -1
- letta/server/rest_api/utils.py +25 -6
- letta/server/server.py +11 -3
- letta/services/llm_batch_manager.py +60 -1
- letta/services/message_manager.py +1 -0
- letta/services/summarizer/summarizer.py +42 -36
- letta/settings.py +1 -0
- letta/tracing.py +5 -0
- {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/METADATA +2 -2
- {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/RECORD +41 -41
- {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/entry_points.txt +0 -0
letta/helpers/message_helper.py
CHANGED
@@ -5,57 +5,58 @@ from letta.schemas.message import Message, MessageCreate
|
|
5
5
|
|
6
6
|
|
7
7
|
def convert_message_creates_to_messages(
|
8
|
-
|
8
|
+
message_creates: list[MessageCreate],
|
9
9
|
agent_id: str,
|
10
10
|
wrap_user_message: bool = True,
|
11
11
|
wrap_system_message: bool = True,
|
12
12
|
) -> list[Message]:
|
13
13
|
return [
|
14
14
|
_convert_message_create_to_message(
|
15
|
-
|
15
|
+
message_create=create,
|
16
16
|
agent_id=agent_id,
|
17
17
|
wrap_user_message=wrap_user_message,
|
18
18
|
wrap_system_message=wrap_system_message,
|
19
19
|
)
|
20
|
-
for
|
20
|
+
for create in message_creates
|
21
21
|
]
|
22
22
|
|
23
23
|
|
24
24
|
def _convert_message_create_to_message(
|
25
|
-
|
25
|
+
message_create: MessageCreate,
|
26
26
|
agent_id: str,
|
27
27
|
wrap_user_message: bool = True,
|
28
28
|
wrap_system_message: bool = True,
|
29
29
|
) -> Message:
|
30
30
|
"""Converts a MessageCreate object into a Message object, applying wrapping if needed."""
|
31
31
|
# TODO: This seems like extra boilerplate with little benefit
|
32
|
-
assert isinstance(
|
32
|
+
assert isinstance(message_create, MessageCreate)
|
33
33
|
|
34
34
|
# Extract message content
|
35
|
-
if isinstance(
|
36
|
-
message_content =
|
37
|
-
elif
|
38
|
-
message_content =
|
35
|
+
if isinstance(message_create.content, str):
|
36
|
+
message_content = message_create.content
|
37
|
+
elif message_create.content and len(message_create.content) > 0 and isinstance(message_create.content[0], TextContent):
|
38
|
+
message_content = message_create.content[0].text
|
39
39
|
else:
|
40
40
|
raise ValueError("Message content is empty or invalid")
|
41
41
|
|
42
42
|
# Apply wrapping if needed
|
43
|
-
if
|
44
|
-
raise ValueError(f"Invalid message role: {
|
45
|
-
elif
|
43
|
+
if message_create.role not in {MessageRole.user, MessageRole.system}:
|
44
|
+
raise ValueError(f"Invalid message role: {message_create.role}")
|
45
|
+
elif message_create.role == MessageRole.user and wrap_user_message:
|
46
46
|
message_content = system.package_user_message(user_message=message_content)
|
47
|
-
elif
|
47
|
+
elif message_create.role == MessageRole.system and wrap_system_message:
|
48
48
|
message_content = system.package_system_message(system_message=message_content)
|
49
49
|
|
50
50
|
return Message(
|
51
51
|
agent_id=agent_id,
|
52
|
-
role=
|
52
|
+
role=message_create.role,
|
53
53
|
content=[TextContent(text=message_content)] if message_content else [],
|
54
|
-
name=
|
54
|
+
name=message_create.name,
|
55
55
|
model=None, # assigned later?
|
56
56
|
tool_calls=None, # irrelevant
|
57
57
|
tool_call_id=None,
|
58
|
-
otid=
|
59
|
-
sender_id=
|
60
|
-
group_id=
|
58
|
+
otid=message_create.otid,
|
59
|
+
sender_id=message_create.sender_id,
|
60
|
+
group_id=message_create.group_id,
|
61
|
+
batch_item_id=message_create.batch_item_id,
|
61
62
|
)
|
letta/jobs/scheduler.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
import asyncio
|
1
2
|
import datetime
|
3
|
+
from typing import Optional
|
2
4
|
|
3
5
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
4
6
|
from apscheduler.triggers.interval import IntervalTrigger
|
@@ -9,63 +11,245 @@ from letta.server.db import db_context
|
|
9
11
|
from letta.server.server import SyncServer
|
10
12
|
from letta.settings import settings
|
11
13
|
|
14
|
+
# --- Global State ---
|
12
15
|
scheduler = AsyncIOScheduler()
|
13
16
|
logger = get_logger(__name__)
|
14
|
-
|
17
|
+
ADVISORY_LOCK_KEY = 0x12345678ABCDEF00
|
15
18
|
|
16
|
-
|
17
|
-
|
19
|
+
_advisory_lock_conn = None # Holds the raw DB connection if leader
|
20
|
+
_advisory_lock_cur = None # Holds the cursor for the lock connection if leader
|
21
|
+
_lock_retry_task: Optional[asyncio.Task] = None # Background task handle for non-leaders
|
22
|
+
_is_scheduler_leader = False # Flag indicating if this instance runs the scheduler
|
18
23
|
|
19
24
|
|
20
|
-
def
|
21
|
-
|
25
|
+
async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
|
26
|
+
"""Attempts to acquire lock, starts scheduler if successful."""
|
27
|
+
global _advisory_lock_conn, _advisory_lock_cur, _is_scheduler_leader, scheduler
|
28
|
+
|
29
|
+
if _is_scheduler_leader:
|
30
|
+
return True # Already leading
|
31
|
+
|
32
|
+
raw_conn = None
|
33
|
+
cur = None
|
34
|
+
acquired_lock = False
|
35
|
+
try:
|
36
|
+
# Use a temporary connection context for the attempt initially
|
37
|
+
with db_context() as session:
|
38
|
+
engine = session.get_bind()
|
39
|
+
# Get raw connection - MUST be kept open if lock is acquired
|
40
|
+
raw_conn = engine.raw_connection()
|
41
|
+
cur = raw_conn.cursor()
|
42
|
+
|
43
|
+
cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
|
44
|
+
acquired_lock = cur.fetchone()[0]
|
45
|
+
|
46
|
+
if not acquired_lock:
|
47
|
+
cur.close()
|
48
|
+
raw_conn.close()
|
49
|
+
logger.info("Scheduler lock held by another instance.")
|
50
|
+
return False
|
51
|
+
|
52
|
+
# --- Lock Acquired ---
|
53
|
+
logger.info("Acquired scheduler lock.")
|
54
|
+
_advisory_lock_conn = raw_conn # Keep connection for lock duration
|
55
|
+
_advisory_lock_cur = cur # Keep cursor for lock duration
|
56
|
+
raw_conn = None # Prevent closing in finally block
|
57
|
+
cur = None # Prevent closing in finally block
|
58
|
+
|
59
|
+
trigger = IntervalTrigger(
|
60
|
+
seconds=settings.poll_running_llm_batches_interval_seconds,
|
61
|
+
jitter=10, # Jitter for the job execution
|
62
|
+
)
|
63
|
+
scheduler.add_job(
|
64
|
+
poll_running_llm_batches,
|
65
|
+
args=[server],
|
66
|
+
trigger=trigger,
|
67
|
+
id="poll_llm_batches",
|
68
|
+
name="Poll LLM API batch jobs",
|
69
|
+
replace_existing=True,
|
70
|
+
next_run_time=datetime.datetime.now(datetime.timezone.utc),
|
71
|
+
)
|
72
|
+
|
73
|
+
if not scheduler.running:
|
74
|
+
scheduler.start()
|
75
|
+
elif scheduler.state == 2: # PAUSED
|
76
|
+
scheduler.resume()
|
77
|
+
|
78
|
+
_is_scheduler_leader = True
|
79
|
+
return True
|
80
|
+
|
81
|
+
except Exception as e:
|
82
|
+
logger.error(f"Error during lock acquisition/scheduler start: {e}", exc_info=True)
|
83
|
+
if acquired_lock: # If lock was acquired before error, try to release
|
84
|
+
logger.warning("Attempting to release lock due to error during startup.")
|
85
|
+
try:
|
86
|
+
# Use the cursor/connection we were about to store
|
87
|
+
_advisory_lock_cur = cur
|
88
|
+
_advisory_lock_conn = raw_conn
|
89
|
+
await _release_advisory_lock() # Attempt cleanup
|
90
|
+
except Exception as unlock_err:
|
91
|
+
logger.error(f"Failed to release lock during error handling: {unlock_err}", exc_info=True)
|
92
|
+
finally:
|
93
|
+
# Ensure globals are cleared after failed attempt
|
94
|
+
_advisory_lock_cur = None
|
95
|
+
_advisory_lock_conn = None
|
96
|
+
_is_scheduler_leader = False
|
97
|
+
|
98
|
+
# Ensure scheduler is stopped if we failed partially
|
99
|
+
if scheduler.running:
|
100
|
+
try:
|
101
|
+
scheduler.shutdown(wait=False)
|
102
|
+
except:
|
103
|
+
pass # Best effort
|
104
|
+
return False
|
105
|
+
finally:
|
106
|
+
# Clean up temporary resources if lock wasn't acquired or error occurred before storing
|
107
|
+
if cur:
|
108
|
+
try:
|
109
|
+
cur.close()
|
110
|
+
except:
|
111
|
+
pass
|
112
|
+
if raw_conn:
|
113
|
+
try:
|
114
|
+
raw_conn.close()
|
115
|
+
except:
|
116
|
+
pass
|
117
|
+
|
118
|
+
|
119
|
+
async def _background_lock_retry_loop(server: SyncServer):
|
120
|
+
"""Periodically attempts to acquire the lock if not initially acquired."""
|
121
|
+
global _lock_retry_task, _is_scheduler_leader
|
122
|
+
logger.info("Starting background task to periodically check for scheduler lock.")
|
123
|
+
|
124
|
+
while True:
|
125
|
+
if _is_scheduler_leader: # Should be cancelled first, but safety check
|
126
|
+
break
|
127
|
+
try:
|
128
|
+
wait_time = settings.poll_lock_retry_interval_seconds
|
129
|
+
await asyncio.sleep(wait_time)
|
130
|
+
|
131
|
+
# Re-check state before attempting lock
|
132
|
+
if _is_scheduler_leader or _lock_retry_task is None:
|
133
|
+
break # Stop if became leader or task was cancelled
|
134
|
+
|
135
|
+
acquired = await _try_acquire_lock_and_start_scheduler(server)
|
136
|
+
if acquired:
|
137
|
+
logger.info("Background task acquired lock and started scheduler.")
|
138
|
+
_lock_retry_task = None # Clear self handle
|
139
|
+
break # Exit loop, we are now the leader
|
140
|
+
|
141
|
+
except asyncio.CancelledError:
|
142
|
+
logger.info("Background lock retry task cancelled.")
|
143
|
+
break
|
144
|
+
except Exception as e:
|
145
|
+
logger.error(f"Error in background lock retry loop: {e}", exc_info=True)
|
146
|
+
# Avoid tight loop on persistent errors
|
147
|
+
await asyncio.sleep(settings.poll_lock_retry_interval_seconds)
|
148
|
+
|
149
|
+
|
150
|
+
async def _release_advisory_lock():
|
151
|
+
"""Releases the advisory lock using the stored connection."""
|
152
|
+
global _advisory_lock_conn, _advisory_lock_cur
|
153
|
+
|
154
|
+
lock_cur = _advisory_lock_cur
|
155
|
+
lock_conn = _advisory_lock_conn
|
156
|
+
_advisory_lock_cur = None # Clear global immediately
|
157
|
+
_advisory_lock_conn = None # Clear global immediately
|
158
|
+
|
159
|
+
if lock_cur is not None and lock_conn is not None:
|
160
|
+
logger.info(f"Attempting to release advisory lock {ADVISORY_LOCK_KEY}")
|
161
|
+
try:
|
162
|
+
if not lock_conn.closed:
|
163
|
+
if not lock_cur.closed:
|
164
|
+
lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
|
165
|
+
lock_cur.fetchone() # Consume result
|
166
|
+
lock_conn.commit()
|
167
|
+
logger.info(f"Executed pg_advisory_unlock for lock {ADVISORY_LOCK_KEY}")
|
168
|
+
else:
|
169
|
+
logger.warning("Advisory lock cursor closed before unlock.")
|
170
|
+
else:
|
171
|
+
logger.warning("Advisory lock connection closed before unlock.")
|
172
|
+
except Exception as e:
|
173
|
+
logger.error(f"Error executing pg_advisory_unlock: {e}", exc_info=True)
|
174
|
+
finally:
|
175
|
+
# Ensure resources are closed regardless of unlock success
|
176
|
+
try:
|
177
|
+
if lock_cur and not lock_cur.closed:
|
178
|
+
lock_cur.close()
|
179
|
+
except Exception as e:
|
180
|
+
logger.error(f"Error closing advisory lock cursor: {e}", exc_info=True)
|
181
|
+
try:
|
182
|
+
if lock_conn and not lock_conn.closed:
|
183
|
+
lock_conn.close()
|
184
|
+
logger.info("Closed database connection that held advisory lock.")
|
185
|
+
except Exception as e:
|
186
|
+
logger.error(f"Error closing advisory lock connection: {e}", exc_info=True)
|
187
|
+
else:
|
188
|
+
logger.warning("Attempted to release lock, but connection/cursor not found.")
|
189
|
+
|
190
|
+
|
191
|
+
async def start_scheduler_with_leader_election(server: SyncServer):
|
192
|
+
"""
|
193
|
+
Call this function from your FastAPI startup event handler.
|
194
|
+
Attempts immediate lock acquisition, starts background retry if failed.
|
195
|
+
"""
|
196
|
+
global _lock_retry_task, _is_scheduler_leader
|
22
197
|
|
23
198
|
if not settings.enable_batch_job_polling:
|
199
|
+
logger.info("Batch job polling is disabled.")
|
24
200
|
return
|
25
201
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
raw = engine.raw_connection()
|
30
|
-
cur = raw.cursor()
|
31
|
-
cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
|
32
|
-
got = cur.fetchone()[0]
|
33
|
-
if not got:
|
34
|
-
cur.close()
|
35
|
-
raw.close()
|
36
|
-
logger.info("Batch‐poller lock already held – not starting scheduler in this worker")
|
202
|
+
if _is_scheduler_leader:
|
203
|
+
logger.warning("Scheduler start requested, but already leader.")
|
37
204
|
return
|
38
205
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
206
|
+
acquired_immediately = await _try_acquire_lock_and_start_scheduler(server)
|
207
|
+
|
208
|
+
if not acquired_immediately and _lock_retry_task is None:
|
209
|
+
# Failed initial attempt, start background retry task
|
210
|
+
loop = asyncio.get_running_loop()
|
211
|
+
_lock_retry_task = loop.create_task(_background_lock_retry_loop(server))
|
212
|
+
|
213
|
+
|
214
|
+
async def shutdown_scheduler_and_release_lock():
|
215
|
+
"""
|
216
|
+
Call this function from your FastAPI shutdown event handler.
|
217
|
+
Stops scheduler/releases lock if leader, cancels retry task otherwise.
|
218
|
+
"""
|
219
|
+
global _is_scheduler_leader, _lock_retry_task, scheduler
|
220
|
+
|
221
|
+
# 1. Cancel retry task if running (for non-leaders)
|
222
|
+
if _lock_retry_task is not None:
|
223
|
+
logger.info("Shutting down: Cancelling background lock retry task.")
|
224
|
+
current_task = _lock_retry_task
|
225
|
+
_lock_retry_task = None # Clear handle first
|
226
|
+
current_task.cancel()
|
227
|
+
try:
|
228
|
+
await current_task # Wait for cancellation
|
229
|
+
except asyncio.CancelledError:
|
230
|
+
logger.info("Background lock retry task successfully cancelled.")
|
231
|
+
except Exception as e:
|
232
|
+
logger.warning(f"Exception waiting for cancelled retry task: {e}", exc_info=True)
|
233
|
+
|
234
|
+
# 2. Shutdown scheduler and release lock if we were the leader
|
235
|
+
if _is_scheduler_leader:
|
236
|
+
logger.info("Shutting down: Leader instance stopping scheduler and releasing lock.")
|
237
|
+
if scheduler.running:
|
238
|
+
try:
|
239
|
+
scheduler.shutdown() # wait=True by default
|
240
|
+
logger.info("APScheduler shut down.")
|
241
|
+
except Exception as e:
|
242
|
+
logger.error(f"Error shutting down APScheduler: {e}", exc_info=True)
|
243
|
+
|
244
|
+
await _release_advisory_lock()
|
245
|
+
_is_scheduler_leader = False # Update state after cleanup
|
246
|
+
else:
|
247
|
+
logger.info("Shutting down: Non-leader instance.")
|
248
|
+
|
249
|
+
# Final cleanup check for scheduler state (belt and suspenders)
|
250
|
+
if scheduler.running:
|
251
|
+
logger.warning("Scheduler still running after shutdown logic completed? Forcing shutdown.")
|
252
|
+
try:
|
253
|
+
scheduler.shutdown(wait=False)
|
254
|
+
except:
|
255
|
+
pass
|
@@ -13,6 +13,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
|
|
13
13
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
14
14
|
from letta.local_llm.utils import count_tokens
|
15
15
|
from letta.log import get_logger
|
16
|
+
from letta.schemas.enums import ProviderType
|
16
17
|
from letta.schemas.llm_config import LLMConfig
|
17
18
|
from letta.schemas.message import Message as PydanticMessage
|
18
19
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -29,12 +30,20 @@ class GoogleAIClient(LLMClientBase):
|
|
29
30
|
"""
|
30
31
|
Performs underlying request to llm and returns raw response.
|
31
32
|
"""
|
32
|
-
|
33
|
+
api_key = None
|
34
|
+
if llm_config.provider_name and llm_config.provider_name != ProviderType.google_ai.value:
|
35
|
+
from letta.services.provider_manager import ProviderManager
|
36
|
+
|
37
|
+
api_key = ProviderManager().get_override_key(llm_config.provider_name)
|
33
38
|
|
39
|
+
if not api_key:
|
40
|
+
api_key = model_settings.gemini_api_key
|
41
|
+
|
42
|
+
# print("[google_ai request]", json.dumps(request_data, indent=2))
|
34
43
|
url, headers = get_gemini_endpoint_and_headers(
|
35
44
|
base_url=str(llm_config.model_endpoint),
|
36
45
|
model=llm_config.model,
|
37
|
-
api_key=str(
|
46
|
+
api_key=str(api_key),
|
38
47
|
key_in_header=True,
|
39
48
|
generate_content=True,
|
40
49
|
)
|
@@ -122,8 +131,8 @@ class GoogleAIClient(LLMClientBase):
|
|
122
131
|
for candidate in response_data["candidates"]:
|
123
132
|
content = candidate["content"]
|
124
133
|
|
125
|
-
if "role" not in content:
|
126
|
-
# This means the response is malformed
|
134
|
+
if "role" not in content or not content["role"]:
|
135
|
+
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
127
136
|
# NOTE: must be a ValueError to trigger a retry
|
128
137
|
raise ValueError(f"Error in response data from LLM: {response_data}")
|
129
138
|
role = content["role"]
|
@@ -110,7 +110,11 @@ class GoogleVertexClient(GoogleAIClient):
|
|
110
110
|
for candidate in response.candidates:
|
111
111
|
content = candidate.content
|
112
112
|
|
113
|
-
role
|
113
|
+
if "role" not in content or not content["role"]:
|
114
|
+
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
115
|
+
# NOTE: must be a ValueError to trigger a retry
|
116
|
+
raise ValueError(f"Error in response data from LLM: {response_data}")
|
117
|
+
role = content["role"]
|
114
118
|
assert role == "model", f"Unknown role in response: {role}"
|
115
119
|
|
116
120
|
parts = content.parts
|
letta/llm_api/openai.py
CHANGED
@@ -7,7 +7,7 @@ from openai import OpenAI
|
|
7
7
|
from letta.constants import LETTA_MODEL_ENDPOINT
|
8
8
|
from letta.helpers.datetime_helpers import timestamp_to_datetime
|
9
9
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
|
10
|
-
from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
|
10
|
+
from letta.llm_api.openai_client import accepts_developer_role, supports_parallel_tool_calling, supports_temperature_param
|
11
11
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
12
12
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
13
13
|
from letta.log import get_logger
|
@@ -114,8 +114,16 @@ def build_openai_chat_completions_request(
|
|
114
114
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
115
115
|
)
|
116
116
|
|
117
|
+
use_developer_message = accepts_developer_role(llm_config.model)
|
118
|
+
|
117
119
|
openai_message_list = [
|
118
|
-
cast_message_to_subtype(
|
120
|
+
cast_message_to_subtype(
|
121
|
+
m.to_openai_dict(
|
122
|
+
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
123
|
+
use_developer_message=use_developer_message,
|
124
|
+
)
|
125
|
+
)
|
126
|
+
for m in messages
|
119
127
|
]
|
120
128
|
|
121
129
|
if llm_config.model:
|
letta/llm_api/openai_client.py
CHANGED
@@ -40,7 +40,19 @@ def is_openai_reasoning_model(model: str) -> bool:
|
|
40
40
|
"""Utility function to check if the model is a 'reasoner'"""
|
41
41
|
|
42
42
|
# NOTE: needs to be updated with new model releases
|
43
|
-
|
43
|
+
is_reasoning = model.startswith("o1") or model.startswith("o3")
|
44
|
+
return is_reasoning
|
45
|
+
|
46
|
+
|
47
|
+
def accepts_developer_role(model: str) -> bool:
|
48
|
+
"""Checks if the model accepts the 'developer' role. Note that not all reasoning models accept this role.
|
49
|
+
|
50
|
+
See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
|
51
|
+
"""
|
52
|
+
if is_openai_reasoning_model(model):
|
53
|
+
return True
|
54
|
+
else:
|
55
|
+
return False
|
44
56
|
|
45
57
|
|
46
58
|
def supports_temperature_param(model: str) -> bool:
|
@@ -102,7 +114,7 @@ class OpenAIClient(LLMClientBase):
|
|
102
114
|
put_inner_thoughts_first=True,
|
103
115
|
)
|
104
116
|
|
105
|
-
use_developer_message =
|
117
|
+
use_developer_message = accepts_developer_role(llm_config.model)
|
106
118
|
|
107
119
|
openai_message_list = [
|
108
120
|
cast_message_to_subtype(
|
letta/orm/message.py
CHANGED
@@ -44,6 +44,10 @@ class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
|
|
44
44
|
sender_id: Mapped[Optional[str]] = mapped_column(
|
45
45
|
nullable=True, doc="The id of the sender of the message, can be an identity id or agent id"
|
46
46
|
)
|
47
|
+
batch_item_id: Mapped[Optional[str]] = mapped_column(
|
48
|
+
nullable=True,
|
49
|
+
doc="The id of the LLMBatchItem that this message is associated with",
|
50
|
+
)
|
47
51
|
|
48
52
|
# Monotonically increasing sequence for efficient/correct listing
|
49
53
|
sequence_id: Mapped[int] = mapped_column(
|
@@ -53,7 +53,7 @@ Example output:
|
|
53
53
|
|
54
54
|
**Phase 2: Refine User Memory using `rethink_user_memory` and `finish_rethinking_memory`**
|
55
55
|
|
56
|
-
After the `store_memories` tool call is processed,
|
56
|
+
After the `store_memories` tool call is processed, consider the current content of the `human` memory block (the read-write block storing details about the user).
|
57
57
|
- Your goal is to refine this block by integrating information from the **ENTIRE** conversation transcript (both `Older` and `Newer` sections) with the existing memory content.
|
58
58
|
|
59
59
|
- Refinement Principles:
|
@@ -67,8 +67,7 @@ After the `store_memories` tool call is processed, you will be presented with th
|
|
67
67
|
- Tool Usage:
|
68
68
|
- Use the `rethink_user_memory(new_memory: string)` tool iteratively. Each call MUST submit the complete, rewritten version of the `human` memory block as you refine it.
|
69
69
|
- Continue calling `rethink_user_memory` until you are satisfied that the memory block is accurate, comprehensive, organized, and up-to-date according to the principles above.
|
70
|
-
- Once the `human` block is fully polished, call the `finish_rethinking_memory
|
70
|
+
- Once the `human` block is fully polished, call the `finish_rethinking_memory` tool exactly once to signal completion.
|
71
71
|
|
72
72
|
Output Requirements:
|
73
73
|
- You MUST ONLY output tool calls in the specified sequence: First `store_memories` (once), then one or more `rethink_user_memory` calls, and finally `finish_rethinking_memory` (once).
|
74
|
-
- Do not output any other text or explanations outside of the required JSON tool call format.
|
letta/schemas/letta_message.py
CHANGED
@@ -48,6 +48,7 @@ class LettaMessage(BaseModel):
|
|
48
48
|
message_type: MessageType = Field(..., description="The type of the message.")
|
49
49
|
otid: Optional[str] = None
|
50
50
|
sender_id: Optional[str] = None
|
51
|
+
step_id: Optional[str] = None
|
51
52
|
|
52
53
|
@field_serializer("date")
|
53
54
|
def serialize_datetime(self, dt: datetime, _info):
|
letta/schemas/letta_request.py
CHANGED
@@ -35,4 +35,11 @@ class LettaBatchRequest(LettaRequest):
|
|
35
35
|
|
36
36
|
class CreateBatch(BaseModel):
|
37
37
|
requests: List[LettaBatchRequest] = Field(..., description="List of requests to be processed in batch.")
|
38
|
-
callback_url: Optional[HttpUrl] = Field(
|
38
|
+
callback_url: Optional[HttpUrl] = Field(
|
39
|
+
None,
|
40
|
+
description="Optional URL to call via POST when the batch completes. The callback payload will be a JSON object with the following fields: "
|
41
|
+
"{'job_id': string, 'status': string, 'completed_at': string}. "
|
42
|
+
"Where 'job_id' is the unique batch job identifier, "
|
43
|
+
"'status' is the final batch status (e.g., 'completed', 'failed'), and "
|
44
|
+
"'completed_at' is an ISO 8601 timestamp indicating when the batch job completed.",
|
45
|
+
)
|
letta/schemas/letta_response.py
CHANGED
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
|
|
9
9
|
from letta.helpers.json_helpers import json_dumps
|
10
10
|
from letta.schemas.enums import JobStatus, MessageStreamStatus
|
11
11
|
from letta.schemas.letta_message import LettaMessage, LettaMessageUnion
|
12
|
+
from letta.schemas.message import Message
|
12
13
|
from letta.schemas.usage import LettaUsageStatistics
|
13
14
|
|
14
15
|
# TODO: consider moving into own file
|
@@ -175,3 +176,7 @@ class LettaBatchResponse(BaseModel):
|
|
175
176
|
agent_count: int = Field(..., description="The number of agents in the batch request.")
|
176
177
|
last_polled_at: datetime = Field(..., description="The timestamp when the batch was last polled for updates.")
|
177
178
|
created_at: datetime = Field(..., description="The timestamp when the batch request was created.")
|
179
|
+
|
180
|
+
|
181
|
+
class LettaBatchMessages(BaseModel):
|
182
|
+
messages: List[Message]
|
letta/schemas/llm_batch_job.py
CHANGED
@@ -10,16 +10,18 @@ from letta.schemas.letta_base import OrmMetadataBase
|
|
10
10
|
from letta.schemas.llm_config import LLMConfig
|
11
11
|
|
12
12
|
|
13
|
-
class
|
13
|
+
class LLMBatchItemBase(OrmMetadataBase, validate_assignment=True):
|
14
|
+
__id_prefix__ = "batch_item"
|
15
|
+
|
16
|
+
|
17
|
+
class LLMBatchItem(LLMBatchItemBase, validate_assignment=True):
|
14
18
|
"""
|
15
19
|
Represents a single agent's LLM request within a batch.
|
16
20
|
|
17
21
|
This object captures the configuration, execution status, and eventual result of one agent's request within a larger LLM batch job.
|
18
22
|
"""
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
id: Optional[str] = Field(None, description="The id of the batch item. Assigned by the database.")
|
24
|
+
id: str = LLMBatchItemBase.generate_id_field()
|
23
25
|
llm_batch_id: str = Field(..., description="The id of the parent LLM batch job this item belongs to.")
|
24
26
|
agent_id: str = Field(..., description="The id of the agent associated with this LLM request.")
|
25
27
|
|
letta/schemas/llm_config.py
CHANGED
@@ -164,6 +164,15 @@ class LLMConfig(BaseModel):
|
|
164
164
|
model_wrapper=None,
|
165
165
|
context_window=128000,
|
166
166
|
)
|
167
|
+
elif model_name == "gpt-4.1":
|
168
|
+
return cls(
|
169
|
+
model="gpt-4.1",
|
170
|
+
model_endpoint_type="openai",
|
171
|
+
model_endpoint="https://api.openai.com/v1",
|
172
|
+
model_wrapper=None,
|
173
|
+
context_window=256000,
|
174
|
+
max_tokens=8192,
|
175
|
+
)
|
167
176
|
elif model_name == "letta":
|
168
177
|
return cls(
|
169
178
|
model="memgpt-openai",
|