letta-nightly 0.7.8.dev20250502104219__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. letta/__init__.py +2 -2
  2. letta/agents/helpers.py +58 -1
  3. letta/agents/letta_agent.py +13 -3
  4. letta/agents/letta_agent_batch.py +33 -17
  5. letta/agents/voice_agent.py +1 -2
  6. letta/agents/voice_sleeptime_agent.py +75 -320
  7. letta/functions/function_sets/multi_agent.py +1 -1
  8. letta/functions/function_sets/voice.py +20 -32
  9. letta/functions/helpers.py +7 -7
  10. letta/helpers/datetime_helpers.py +6 -0
  11. letta/helpers/message_helper.py +19 -18
  12. letta/jobs/scheduler.py +233 -49
  13. letta/llm_api/google_ai_client.py +13 -4
  14. letta/llm_api/google_vertex_client.py +5 -1
  15. letta/llm_api/openai.py +10 -2
  16. letta/llm_api/openai_client.py +14 -2
  17. letta/orm/message.py +4 -0
  18. letta/prompts/system/voice_sleeptime.txt +2 -3
  19. letta/schemas/letta_message.py +1 -0
  20. letta/schemas/letta_request.py +8 -1
  21. letta/schemas/letta_response.py +5 -0
  22. letta/schemas/llm_batch_job.py +6 -4
  23. letta/schemas/llm_config.py +9 -0
  24. letta/schemas/message.py +23 -2
  25. letta/schemas/providers.py +3 -1
  26. letta/server/rest_api/app.py +15 -7
  27. letta/server/rest_api/routers/v1/agents.py +3 -0
  28. letta/server/rest_api/routers/v1/messages.py +46 -1
  29. letta/server/rest_api/routers/v1/steps.py +1 -1
  30. letta/server/rest_api/utils.py +25 -6
  31. letta/server/server.py +11 -3
  32. letta/services/llm_batch_manager.py +60 -1
  33. letta/services/message_manager.py +1 -0
  34. letta/services/summarizer/summarizer.py +42 -36
  35. letta/settings.py +1 -0
  36. letta/tracing.py +5 -0
  37. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/METADATA +2 -2
  38. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/RECORD +41 -41
  39. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/LICENSE +0 -0
  40. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/WHEEL +0 -0
  41. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/entry_points.txt +0 -0
@@ -5,57 +5,58 @@ from letta.schemas.message import Message, MessageCreate
5
5
 
6
6
 
7
7
  def convert_message_creates_to_messages(
8
- messages: list[MessageCreate],
8
+ message_creates: list[MessageCreate],
9
9
  agent_id: str,
10
10
  wrap_user_message: bool = True,
11
11
  wrap_system_message: bool = True,
12
12
  ) -> list[Message]:
13
13
  return [
14
14
  _convert_message_create_to_message(
15
- message=message,
15
+ message_create=create,
16
16
  agent_id=agent_id,
17
17
  wrap_user_message=wrap_user_message,
18
18
  wrap_system_message=wrap_system_message,
19
19
  )
20
- for message in messages
20
+ for create in message_creates
21
21
  ]
22
22
 
23
23
 
24
24
  def _convert_message_create_to_message(
25
- message: MessageCreate,
25
+ message_create: MessageCreate,
26
26
  agent_id: str,
27
27
  wrap_user_message: bool = True,
28
28
  wrap_system_message: bool = True,
29
29
  ) -> Message:
30
30
  """Converts a MessageCreate object into a Message object, applying wrapping if needed."""
31
31
  # TODO: This seems like extra boilerplate with little benefit
32
- assert isinstance(message, MessageCreate)
32
+ assert isinstance(message_create, MessageCreate)
33
33
 
34
34
  # Extract message content
35
- if isinstance(message.content, str):
36
- message_content = message.content
37
- elif message.content and len(message.content) > 0 and isinstance(message.content[0], TextContent):
38
- message_content = message.content[0].text
35
+ if isinstance(message_create.content, str):
36
+ message_content = message_create.content
37
+ elif message_create.content and len(message_create.content) > 0 and isinstance(message_create.content[0], TextContent):
38
+ message_content = message_create.content[0].text
39
39
  else:
40
40
  raise ValueError("Message content is empty or invalid")
41
41
 
42
42
  # Apply wrapping if needed
43
- if message.role not in {MessageRole.user, MessageRole.system}:
44
- raise ValueError(f"Invalid message role: {message.role}")
45
- elif message.role == MessageRole.user and wrap_user_message:
43
+ if message_create.role not in {MessageRole.user, MessageRole.system}:
44
+ raise ValueError(f"Invalid message role: {message_create.role}")
45
+ elif message_create.role == MessageRole.user and wrap_user_message:
46
46
  message_content = system.package_user_message(user_message=message_content)
47
- elif message.role == MessageRole.system and wrap_system_message:
47
+ elif message_create.role == MessageRole.system and wrap_system_message:
48
48
  message_content = system.package_system_message(system_message=message_content)
49
49
 
50
50
  return Message(
51
51
  agent_id=agent_id,
52
- role=message.role,
52
+ role=message_create.role,
53
53
  content=[TextContent(text=message_content)] if message_content else [],
54
- name=message.name,
54
+ name=message_create.name,
55
55
  model=None, # assigned later?
56
56
  tool_calls=None, # irrelevant
57
57
  tool_call_id=None,
58
- otid=message.otid,
59
- sender_id=message.sender_id,
60
- group_id=message.group_id,
58
+ otid=message_create.otid,
59
+ sender_id=message_create.sender_id,
60
+ group_id=message_create.group_id,
61
+ batch_item_id=message_create.batch_item_id,
61
62
  )
letta/jobs/scheduler.py CHANGED
@@ -1,4 +1,6 @@
1
+ import asyncio
1
2
  import datetime
3
+ from typing import Optional
2
4
 
3
5
  from apscheduler.schedulers.asyncio import AsyncIOScheduler
4
6
  from apscheduler.triggers.interval import IntervalTrigger
@@ -9,63 +11,245 @@ from letta.server.db import db_context
9
11
  from letta.server.server import SyncServer
10
12
  from letta.settings import settings
11
13
 
14
+ # --- Global State ---
12
15
  scheduler = AsyncIOScheduler()
13
16
  logger = get_logger(__name__)
14
- STARTUP_LOCK_KEY = 0x12345678ABCDEF00
17
+ ADVISORY_LOCK_KEY = 0x12345678ABCDEF00
15
18
 
16
- _startup_lock_conn = None
17
- _startup_lock_cur = None
19
+ _advisory_lock_conn = None # Holds the raw DB connection if leader
20
+ _advisory_lock_cur = None # Holds the cursor for the lock connection if leader
21
+ _lock_retry_task: Optional[asyncio.Task] = None # Background task handle for non-leaders
22
+ _is_scheduler_leader = False # Flag indicating if this instance runs the scheduler
18
23
 
19
24
 
20
- def start_cron_jobs(server: SyncServer):
21
- global _startup_lock_conn, _startup_lock_cur
25
+ async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
26
+ """Attempts to acquire lock, starts scheduler if successful."""
27
+ global _advisory_lock_conn, _advisory_lock_cur, _is_scheduler_leader, scheduler
28
+
29
+ if _is_scheduler_leader:
30
+ return True # Already leading
31
+
32
+ raw_conn = None
33
+ cur = None
34
+ acquired_lock = False
35
+ try:
36
+ # Use a temporary connection context for the attempt initially
37
+ with db_context() as session:
38
+ engine = session.get_bind()
39
+ # Get raw connection - MUST be kept open if lock is acquired
40
+ raw_conn = engine.raw_connection()
41
+ cur = raw_conn.cursor()
42
+
43
+ cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
44
+ acquired_lock = cur.fetchone()[0]
45
+
46
+ if not acquired_lock:
47
+ cur.close()
48
+ raw_conn.close()
49
+ logger.info("Scheduler lock held by another instance.")
50
+ return False
51
+
52
+ # --- Lock Acquired ---
53
+ logger.info("Acquired scheduler lock.")
54
+ _advisory_lock_conn = raw_conn # Keep connection for lock duration
55
+ _advisory_lock_cur = cur # Keep cursor for lock duration
56
+ raw_conn = None # Prevent closing in finally block
57
+ cur = None # Prevent closing in finally block
58
+
59
+ trigger = IntervalTrigger(
60
+ seconds=settings.poll_running_llm_batches_interval_seconds,
61
+ jitter=10, # Jitter for the job execution
62
+ )
63
+ scheduler.add_job(
64
+ poll_running_llm_batches,
65
+ args=[server],
66
+ trigger=trigger,
67
+ id="poll_llm_batches",
68
+ name="Poll LLM API batch jobs",
69
+ replace_existing=True,
70
+ next_run_time=datetime.datetime.now(datetime.timezone.utc),
71
+ )
72
+
73
+ if not scheduler.running:
74
+ scheduler.start()
75
+ elif scheduler.state == 2: # PAUSED
76
+ scheduler.resume()
77
+
78
+ _is_scheduler_leader = True
79
+ return True
80
+
81
+ except Exception as e:
82
+ logger.error(f"Error during lock acquisition/scheduler start: {e}", exc_info=True)
83
+ if acquired_lock: # If lock was acquired before error, try to release
84
+ logger.warning("Attempting to release lock due to error during startup.")
85
+ try:
86
+ # Use the cursor/connection we were about to store
87
+ _advisory_lock_cur = cur
88
+ _advisory_lock_conn = raw_conn
89
+ await _release_advisory_lock() # Attempt cleanup
90
+ except Exception as unlock_err:
91
+ logger.error(f"Failed to release lock during error handling: {unlock_err}", exc_info=True)
92
+ finally:
93
+ # Ensure globals are cleared after failed attempt
94
+ _advisory_lock_cur = None
95
+ _advisory_lock_conn = None
96
+ _is_scheduler_leader = False
97
+
98
+ # Ensure scheduler is stopped if we failed partially
99
+ if scheduler.running:
100
+ try:
101
+ scheduler.shutdown(wait=False)
102
+ except:
103
+ pass # Best effort
104
+ return False
105
+ finally:
106
+ # Clean up temporary resources if lock wasn't acquired or error occurred before storing
107
+ if cur:
108
+ try:
109
+ cur.close()
110
+ except:
111
+ pass
112
+ if raw_conn:
113
+ try:
114
+ raw_conn.close()
115
+ except:
116
+ pass
117
+
118
+
119
+ async def _background_lock_retry_loop(server: SyncServer):
120
+ """Periodically attempts to acquire the lock if not initially acquired."""
121
+ global _lock_retry_task, _is_scheduler_leader
122
+ logger.info("Starting background task to periodically check for scheduler lock.")
123
+
124
+ while True:
125
+ if _is_scheduler_leader: # Should be cancelled first, but safety check
126
+ break
127
+ try:
128
+ wait_time = settings.poll_lock_retry_interval_seconds
129
+ await asyncio.sleep(wait_time)
130
+
131
+ # Re-check state before attempting lock
132
+ if _is_scheduler_leader or _lock_retry_task is None:
133
+ break # Stop if became leader or task was cancelled
134
+
135
+ acquired = await _try_acquire_lock_and_start_scheduler(server)
136
+ if acquired:
137
+ logger.info("Background task acquired lock and started scheduler.")
138
+ _lock_retry_task = None # Clear self handle
139
+ break # Exit loop, we are now the leader
140
+
141
+ except asyncio.CancelledError:
142
+ logger.info("Background lock retry task cancelled.")
143
+ break
144
+ except Exception as e:
145
+ logger.error(f"Error in background lock retry loop: {e}", exc_info=True)
146
+ # Avoid tight loop on persistent errors
147
+ await asyncio.sleep(settings.poll_lock_retry_interval_seconds)
148
+
149
+
150
+ async def _release_advisory_lock():
151
+ """Releases the advisory lock using the stored connection."""
152
+ global _advisory_lock_conn, _advisory_lock_cur
153
+
154
+ lock_cur = _advisory_lock_cur
155
+ lock_conn = _advisory_lock_conn
156
+ _advisory_lock_cur = None # Clear global immediately
157
+ _advisory_lock_conn = None # Clear global immediately
158
+
159
+ if lock_cur is not None and lock_conn is not None:
160
+ logger.info(f"Attempting to release advisory lock {ADVISORY_LOCK_KEY}")
161
+ try:
162
+ if not lock_conn.closed:
163
+ if not lock_cur.closed:
164
+ lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
165
+ lock_cur.fetchone() # Consume result
166
+ lock_conn.commit()
167
+ logger.info(f"Executed pg_advisory_unlock for lock {ADVISORY_LOCK_KEY}")
168
+ else:
169
+ logger.warning("Advisory lock cursor closed before unlock.")
170
+ else:
171
+ logger.warning("Advisory lock connection closed before unlock.")
172
+ except Exception as e:
173
+ logger.error(f"Error executing pg_advisory_unlock: {e}", exc_info=True)
174
+ finally:
175
+ # Ensure resources are closed regardless of unlock success
176
+ try:
177
+ if lock_cur and not lock_cur.closed:
178
+ lock_cur.close()
179
+ except Exception as e:
180
+ logger.error(f"Error closing advisory lock cursor: {e}", exc_info=True)
181
+ try:
182
+ if lock_conn and not lock_conn.closed:
183
+ lock_conn.close()
184
+ logger.info("Closed database connection that held advisory lock.")
185
+ except Exception as e:
186
+ logger.error(f"Error closing advisory lock connection: {e}", exc_info=True)
187
+ else:
188
+ logger.warning("Attempted to release lock, but connection/cursor not found.")
189
+
190
+
191
+ async def start_scheduler_with_leader_election(server: SyncServer):
192
+ """
193
+ Call this function from your FastAPI startup event handler.
194
+ Attempts immediate lock acquisition, starts background retry if failed.
195
+ """
196
+ global _lock_retry_task, _is_scheduler_leader
22
197
 
23
198
  if not settings.enable_batch_job_polling:
199
+ logger.info("Batch job polling is disabled.")
24
200
  return
25
201
 
26
- with db_context() as session:
27
- engine = session.get_bind()
28
-
29
- raw = engine.raw_connection()
30
- cur = raw.cursor()
31
- cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
32
- got = cur.fetchone()[0]
33
- if not got:
34
- cur.close()
35
- raw.close()
36
- logger.info("Batch‐poller lock already held – not starting scheduler in this worker")
202
+ if _is_scheduler_leader:
203
+ logger.warning("Scheduler start requested, but already leader.")
37
204
  return
38
205
 
39
- _startup_lock_conn, _startup_lock_cur = raw, cur
40
- jitter_seconds = 10
41
- trigger = IntervalTrigger(
42
- seconds=settings.poll_running_llm_batches_interval_seconds,
43
- jitter=jitter_seconds,
44
- )
45
-
46
- scheduler.add_job(
47
- poll_running_llm_batches,
48
- args=[server],
49
- trigger=trigger,
50
- next_run_time=datetime.datetime.now(datetime.timezone.utc),
51
- id="poll_llm_batches",
52
- name="Poll LLM API batch jobs",
53
- replace_existing=True,
54
- )
55
- scheduler.start()
56
- logger.info("Started batch‐polling scheduler in this worker")
57
-
58
-
59
- def shutdown_cron_scheduler():
60
- global _startup_lock_conn, _startup_lock_cur
61
-
62
- if settings.enable_batch_job_polling and scheduler.running:
63
- scheduler.shutdown()
64
-
65
- if _startup_lock_cur is not None:
66
- _startup_lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
67
- _startup_lock_conn.commit()
68
- _startup_lock_cur.close()
69
- _startup_lock_conn.close()
70
- _startup_lock_cur = None
71
- _startup_lock_conn = None
206
+ acquired_immediately = await _try_acquire_lock_and_start_scheduler(server)
207
+
208
+ if not acquired_immediately and _lock_retry_task is None:
209
+ # Failed initial attempt, start background retry task
210
+ loop = asyncio.get_running_loop()
211
+ _lock_retry_task = loop.create_task(_background_lock_retry_loop(server))
212
+
213
+
214
+ async def shutdown_scheduler_and_release_lock():
215
+ """
216
+ Call this function from your FastAPI shutdown event handler.
217
+ Stops scheduler/releases lock if leader, cancels retry task otherwise.
218
+ """
219
+ global _is_scheduler_leader, _lock_retry_task, scheduler
220
+
221
+ # 1. Cancel retry task if running (for non-leaders)
222
+ if _lock_retry_task is not None:
223
+ logger.info("Shutting down: Cancelling background lock retry task.")
224
+ current_task = _lock_retry_task
225
+ _lock_retry_task = None # Clear handle first
226
+ current_task.cancel()
227
+ try:
228
+ await current_task # Wait for cancellation
229
+ except asyncio.CancelledError:
230
+ logger.info("Background lock retry task successfully cancelled.")
231
+ except Exception as e:
232
+ logger.warning(f"Exception waiting for cancelled retry task: {e}", exc_info=True)
233
+
234
+ # 2. Shutdown scheduler and release lock if we were the leader
235
+ if _is_scheduler_leader:
236
+ logger.info("Shutting down: Leader instance stopping scheduler and releasing lock.")
237
+ if scheduler.running:
238
+ try:
239
+ scheduler.shutdown() # wait=True by default
240
+ logger.info("APScheduler shut down.")
241
+ except Exception as e:
242
+ logger.error(f"Error shutting down APScheduler: {e}", exc_info=True)
243
+
244
+ await _release_advisory_lock()
245
+ _is_scheduler_leader = False # Update state after cleanup
246
+ else:
247
+ logger.info("Shutting down: Non-leader instance.")
248
+
249
+ # Final cleanup check for scheduler state (belt and suspenders)
250
+ if scheduler.running:
251
+ logger.warning("Scheduler still running after shutdown logic completed? Forcing shutdown.")
252
+ try:
253
+ scheduler.shutdown(wait=False)
254
+ except:
255
+ pass
@@ -13,6 +13,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
13
13
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
14
14
  from letta.local_llm.utils import count_tokens
15
15
  from letta.log import get_logger
16
+ from letta.schemas.enums import ProviderType
16
17
  from letta.schemas.llm_config import LLMConfig
17
18
  from letta.schemas.message import Message as PydanticMessage
18
19
  from letta.schemas.openai.chat_completion_request import Tool
@@ -29,12 +30,20 @@ class GoogleAIClient(LLMClientBase):
29
30
  """
30
31
  Performs underlying request to llm and returns raw response.
31
32
  """
32
- # print("[google_ai request]", json.dumps(request_data, indent=2))
33
+ api_key = None
34
+ if llm_config.provider_name and llm_config.provider_name != ProviderType.google_ai.value:
35
+ from letta.services.provider_manager import ProviderManager
36
+
37
+ api_key = ProviderManager().get_override_key(llm_config.provider_name)
33
38
 
39
+ if not api_key:
40
+ api_key = model_settings.gemini_api_key
41
+
42
+ # print("[google_ai request]", json.dumps(request_data, indent=2))
34
43
  url, headers = get_gemini_endpoint_and_headers(
35
44
  base_url=str(llm_config.model_endpoint),
36
45
  model=llm_config.model,
37
- api_key=str(model_settings.gemini_api_key),
46
+ api_key=str(api_key),
38
47
  key_in_header=True,
39
48
  generate_content=True,
40
49
  )
@@ -122,8 +131,8 @@ class GoogleAIClient(LLMClientBase):
122
131
  for candidate in response_data["candidates"]:
123
132
  content = candidate["content"]
124
133
 
125
- if "role" not in content:
126
- # This means the response is malformed
134
+ if "role" not in content or not content["role"]:
135
+ # This means the response is malformed like MALFORMED_FUNCTION_CALL
127
136
  # NOTE: must be a ValueError to trigger a retry
128
137
  raise ValueError(f"Error in response data from LLM: {response_data}")
129
138
  role = content["role"]
@@ -110,7 +110,11 @@ class GoogleVertexClient(GoogleAIClient):
110
110
  for candidate in response.candidates:
111
111
  content = candidate.content
112
112
 
113
- role = content.role
113
+ if "role" not in content or not content["role"]:
114
+ # This means the response is malformed like MALFORMED_FUNCTION_CALL
115
+ # NOTE: must be a ValueError to trigger a retry
116
+ raise ValueError(f"Error in response data from LLM: {response_data}")
117
+ role = content["role"]
114
118
  assert role == "model", f"Unknown role in response: {role}"
115
119
 
116
120
  parts = content.parts
letta/llm_api/openai.py CHANGED
@@ -7,7 +7,7 @@ from openai import OpenAI
7
7
  from letta.constants import LETTA_MODEL_ENDPOINT
8
8
  from letta.helpers.datetime_helpers import timestamp_to_datetime
9
9
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
10
- from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
10
+ from letta.llm_api.openai_client import accepts_developer_role, supports_parallel_tool_calling, supports_temperature_param
11
11
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
12
12
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
13
13
  from letta.log import get_logger
@@ -114,8 +114,16 @@ def build_openai_chat_completions_request(
114
114
  put_inner_thoughts_first=put_inner_thoughts_first,
115
115
  )
116
116
 
117
+ use_developer_message = accepts_developer_role(llm_config.model)
118
+
117
119
  openai_message_list = [
118
- cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)) for m in messages
120
+ cast_message_to_subtype(
121
+ m.to_openai_dict(
122
+ put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
123
+ use_developer_message=use_developer_message,
124
+ )
125
+ )
126
+ for m in messages
119
127
  ]
120
128
 
121
129
  if llm_config.model:
@@ -40,7 +40,19 @@ def is_openai_reasoning_model(model: str) -> bool:
40
40
  """Utility function to check if the model is a 'reasoner'"""
41
41
 
42
42
  # NOTE: needs to be updated with new model releases
43
- return model.startswith("o1") or model.startswith("o3")
43
+ is_reasoning = model.startswith("o1") or model.startswith("o3")
44
+ return is_reasoning
45
+
46
+
47
+ def accepts_developer_role(model: str) -> bool:
48
+ """Checks if the model accepts the 'developer' role. Note that not all reasoning models accept this role.
49
+
50
+ See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
51
+ """
52
+ if is_openai_reasoning_model(model):
53
+ return True
54
+ else:
55
+ return False
44
56
 
45
57
 
46
58
  def supports_temperature_param(model: str) -> bool:
@@ -102,7 +114,7 @@ class OpenAIClient(LLMClientBase):
102
114
  put_inner_thoughts_first=True,
103
115
  )
104
116
 
105
- use_developer_message = is_openai_reasoning_model(llm_config.model)
117
+ use_developer_message = accepts_developer_role(llm_config.model)
106
118
 
107
119
  openai_message_list = [
108
120
  cast_message_to_subtype(
letta/orm/message.py CHANGED
@@ -44,6 +44,10 @@ class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
44
44
  sender_id: Mapped[Optional[str]] = mapped_column(
45
45
  nullable=True, doc="The id of the sender of the message, can be an identity id or agent id"
46
46
  )
47
+ batch_item_id: Mapped[Optional[str]] = mapped_column(
48
+ nullable=True,
49
+ doc="The id of the LLMBatchItem that this message is associated with",
50
+ )
47
51
 
48
52
  # Monotonically increasing sequence for efficient/correct listing
49
53
  sequence_id: Mapped[int] = mapped_column(
@@ -53,7 +53,7 @@ Example output:
53
53
 
54
54
  **Phase 2: Refine User Memory using `rethink_user_memory` and `finish_rethinking_memory`**
55
55
 
56
- After the `store_memories` tool call is processed, you will be presented with the current content of the `human` memory block (the read-write block storing details about the user).
56
+ After the `store_memories` tool call is processed, consider the current content of the `human` memory block (the read-write block storing details about the user).
57
57
  - Your goal is to refine this block by integrating information from the **ENTIRE** conversation transcript (both `Older` and `Newer` sections) with the existing memory content.
58
58
 
59
59
  - Refinement Principles:
@@ -67,8 +67,7 @@ After the `store_memories` tool call is processed, you will be presented with th
67
67
  - Tool Usage:
68
68
  - Use the `rethink_user_memory(new_memory: string)` tool iteratively. Each call MUST submit the complete, rewritten version of the `human` memory block as you refine it.
69
69
  - Continue calling `rethink_user_memory` until you are satisfied that the memory block is accurate, comprehensive, organized, and up-to-date according to the principles above.
70
- - Once the `human` block is fully polished, call the `finish_rethinking_memory()` tool exactly once to signal completion.
70
+ - Once the `human` block is fully polished, call the `finish_rethinking_memory` tool exactly once to signal completion.
71
71
 
72
72
  Output Requirements:
73
73
  - You MUST ONLY output tool calls in the specified sequence: First `store_memories` (once), then one or more `rethink_user_memory` calls, and finally `finish_rethinking_memory` (once).
74
- - Do not output any other text or explanations outside of the required JSON tool call format.
@@ -48,6 +48,7 @@ class LettaMessage(BaseModel):
48
48
  message_type: MessageType = Field(..., description="The type of the message.")
49
49
  otid: Optional[str] = None
50
50
  sender_id: Optional[str] = None
51
+ step_id: Optional[str] = None
51
52
 
52
53
  @field_serializer("date")
53
54
  def serialize_datetime(self, dt: datetime, _info):
@@ -35,4 +35,11 @@ class LettaBatchRequest(LettaRequest):
35
35
 
36
36
  class CreateBatch(BaseModel):
37
37
  requests: List[LettaBatchRequest] = Field(..., description="List of requests to be processed in batch.")
38
- callback_url: Optional[HttpUrl] = Field(None, description="Optional URL to call via POST when the batch completes.")
38
+ callback_url: Optional[HttpUrl] = Field(
39
+ None,
40
+ description="Optional URL to call via POST when the batch completes. The callback payload will be a JSON object with the following fields: "
41
+ "{'job_id': string, 'status': string, 'completed_at': string}. "
42
+ "Where 'job_id' is the unique batch job identifier, "
43
+ "'status' is the final batch status (e.g., 'completed', 'failed'), and "
44
+ "'completed_at' is an ISO 8601 timestamp indicating when the batch job completed.",
45
+ )
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
9
9
  from letta.helpers.json_helpers import json_dumps
10
10
  from letta.schemas.enums import JobStatus, MessageStreamStatus
11
11
  from letta.schemas.letta_message import LettaMessage, LettaMessageUnion
12
+ from letta.schemas.message import Message
12
13
  from letta.schemas.usage import LettaUsageStatistics
13
14
 
14
15
  # TODO: consider moving into own file
@@ -175,3 +176,7 @@ class LettaBatchResponse(BaseModel):
175
176
  agent_count: int = Field(..., description="The number of agents in the batch request.")
176
177
  last_polled_at: datetime = Field(..., description="The timestamp when the batch was last polled for updates.")
177
178
  created_at: datetime = Field(..., description="The timestamp when the batch request was created.")
179
+
180
+
181
+ class LettaBatchMessages(BaseModel):
182
+ messages: List[Message]
@@ -10,16 +10,18 @@ from letta.schemas.letta_base import OrmMetadataBase
10
10
  from letta.schemas.llm_config import LLMConfig
11
11
 
12
12
 
13
- class LLMBatchItem(OrmMetadataBase, validate_assignment=True):
13
+ class LLMBatchItemBase(OrmMetadataBase, validate_assignment=True):
14
+ __id_prefix__ = "batch_item"
15
+
16
+
17
+ class LLMBatchItem(LLMBatchItemBase, validate_assignment=True):
14
18
  """
15
19
  Represents a single agent's LLM request within a batch.
16
20
 
17
21
  This object captures the configuration, execution status, and eventual result of one agent's request within a larger LLM batch job.
18
22
  """
19
23
 
20
- __id_prefix__ = "batch_item"
21
-
22
- id: Optional[str] = Field(None, description="The id of the batch item. Assigned by the database.")
24
+ id: str = LLMBatchItemBase.generate_id_field()
23
25
  llm_batch_id: str = Field(..., description="The id of the parent LLM batch job this item belongs to.")
24
26
  agent_id: str = Field(..., description="The id of the agent associated with this LLM request.")
25
27
 
@@ -164,6 +164,15 @@ class LLMConfig(BaseModel):
164
164
  model_wrapper=None,
165
165
  context_window=128000,
166
166
  )
167
+ elif model_name == "gpt-4.1":
168
+ return cls(
169
+ model="gpt-4.1",
170
+ model_endpoint_type="openai",
171
+ model_endpoint="https://api.openai.com/v1",
172
+ model_wrapper=None,
173
+ context_window=256000,
174
+ max_tokens=8192,
175
+ )
167
176
  elif model_name == "letta":
168
177
  return cls(
169
178
  model="memgpt-openai",