PyPI - letta-nightly - Versions diffs - 0.7.8.dev20250502104219__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl - Mend

letta-nightly 0.7.8.dev20250502104219py3-none-any.whl → 0.7.9.dev20250502222710py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

letta/__init__.py +2 -2
letta/agents/helpers.py +58 -1
letta/agents/letta_agent.py +13 -3
letta/agents/letta_agent_batch.py +33 -17
letta/agents/voice_agent.py +1 -2
letta/agents/voice_sleeptime_agent.py +75 -320
letta/functions/function_sets/multi_agent.py +1 -1
letta/functions/function_sets/voice.py +20 -32
letta/functions/helpers.py +7 -7
letta/helpers/datetime_helpers.py +6 -0
letta/helpers/message_helper.py +19 -18
letta/jobs/scheduler.py +233 -49
letta/llm_api/google_ai_client.py +13 -4
letta/llm_api/google_vertex_client.py +5 -1
letta/llm_api/openai.py +10 -2
letta/llm_api/openai_client.py +14 -2
letta/orm/message.py +4 -0
letta/prompts/system/voice_sleeptime.txt +2 -3
letta/schemas/letta_message.py +1 -0
letta/schemas/letta_request.py +8 -1
letta/schemas/letta_response.py +5 -0
letta/schemas/llm_batch_job.py +6 -4
letta/schemas/llm_config.py +9 -0
letta/schemas/message.py +23 -2
letta/schemas/providers.py +3 -1
letta/server/rest_api/app.py +15 -7
letta/server/rest_api/routers/v1/agents.py +3 -0
letta/server/rest_api/routers/v1/messages.py +46 -1
letta/server/rest_api/routers/v1/steps.py +1 -1
letta/server/rest_api/utils.py +25 -6
letta/server/server.py +11 -3
letta/services/llm_batch_manager.py +60 -1
letta/services/message_manager.py +1 -0
letta/services/summarizer/summarizer.py +42 -36
letta/settings.py +1 -0
letta/tracing.py +5 -0
{letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/METADATA +2 -2
{letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/RECORD +41 -41
{letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/entry_points.txt +0 -0

letta/helpers/message_helper.py CHANGED Viewed

@@ -5,57 +5,58 @@ from letta.schemas.message import Message, MessageCreate
 def convert_message_creates_to_messages(
-    messages: list[MessageCreate],
+    message_creates: list[MessageCreate],
     agent_id: str,
     wrap_user_message: bool = True,
     wrap_system_message: bool = True,
 ) -> list[Message]:
     return [
         _convert_message_create_to_message(
-            message=message,
+            message_create=create,
             agent_id=agent_id,
             wrap_user_message=wrap_user_message,
             wrap_system_message=wrap_system_message,
         )
-        for message in messages
+        for create in message_creates
     ]
 def _convert_message_create_to_message(
-    message: MessageCreate,
+    message_create: MessageCreate,
     agent_id: str,
     wrap_user_message: bool = True,
     wrap_system_message: bool = True,
 ) -> Message:
     """Converts a MessageCreate object into a Message object, applying wrapping if needed."""
     # TODO: This seems like extra boilerplate with little benefit
-    assert isinstance(message, MessageCreate)
+    assert isinstance(message_create, MessageCreate)
     # Extract message content
-    if isinstance(message.content, str):
-        message_content = message.content
-    elif message.content and len(message.content) > 0 and isinstance(message.content[0], TextContent):
-        message_content = message.content[0].text
+    if isinstance(message_create.content, str):
+        message_content = message_create.content
+    elif message_create.content and len(message_create.content) > 0 and isinstance(message_create.content[0], TextContent):
+        message_content = message_create.content[0].text
     else:
         raise ValueError("Message content is empty or invalid")
     # Apply wrapping if needed
-    if message.role not in {MessageRole.user, MessageRole.system}:
-        raise ValueError(f"Invalid message role: {message.role}")
-    elif message.role == MessageRole.user and wrap_user_message:
+    if message_create.role not in {MessageRole.user, MessageRole.system}:
+        raise ValueError(f"Invalid message role: {message_create.role}")
+    elif message_create.role == MessageRole.user and wrap_user_message:
         message_content = system.package_user_message(user_message=message_content)
-    elif message.role == MessageRole.system and wrap_system_message:
+    elif message_create.role == MessageRole.system and wrap_system_message:
         message_content = system.package_system_message(system_message=message_content)
     return Message(
         agent_id=agent_id,
-        role=message.role,
+        role=message_create.role,
         content=[TextContent(text=message_content)] if message_content else [],
-        name=message.name,
+        name=message_create.name,
         model=None,  # assigned later?
         tool_calls=None,  # irrelevant
         tool_call_id=None,
-        otid=message.otid,
-        sender_id=message.sender_id,
-        group_id=message.group_id,
+        otid=message_create.otid,
+        sender_id=message_create.sender_id,
+        group_id=message_create.group_id,
+        batch_item_id=message_create.batch_item_id,
     )

letta/jobs/scheduler.py CHANGED Viewed

@@ -1,4 +1,6 @@
+import asyncio
 import datetime
+from typing import Optional
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.interval import IntervalTrigger
@@ -9,63 +11,245 @@ from letta.server.db import db_context
 from letta.server.server import SyncServer
 from letta.settings import settings
+# --- Global State ---
 scheduler = AsyncIOScheduler()
 logger = get_logger(__name__)
-STARTUP_LOCK_KEY = 0x12345678ABCDEF00
+ADVISORY_LOCK_KEY = 0x12345678ABCDEF00
-_startup_lock_conn = None
-_startup_lock_cur = None
+_advisory_lock_conn = None  # Holds the raw DB connection if leader
+_advisory_lock_cur = None  # Holds the cursor for the lock connection if leader
+_lock_retry_task: Optional[asyncio.Task] = None  # Background task handle for non-leaders
+_is_scheduler_leader = False  # Flag indicating if this instance runs the scheduler
-def start_cron_jobs(server: SyncServer):
-    global _startup_lock_conn, _startup_lock_cur
+async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
+    """Attempts to acquire lock, starts scheduler if successful."""
+    global _advisory_lock_conn, _advisory_lock_cur, _is_scheduler_leader, scheduler
+    if _is_scheduler_leader:
+        return True  # Already leading
+    raw_conn = None
+    cur = None
+    acquired_lock = False
+    try:
+        # Use a temporary connection context for the attempt initially
+        with db_context() as session:
+            engine = session.get_bind()
+            # Get raw connection - MUST be kept open if lock is acquired
+            raw_conn = engine.raw_connection()
+            cur = raw_conn.cursor()
+        cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
+        acquired_lock = cur.fetchone()[0]
+        if not acquired_lock:
+            cur.close()
+            raw_conn.close()
+            logger.info("Scheduler lock held by another instance.")
+            return False
+        # --- Lock Acquired ---
+        logger.info("Acquired scheduler lock.")
+        _advisory_lock_conn = raw_conn  # Keep connection for lock duration
+        _advisory_lock_cur = cur  # Keep cursor for lock duration
+        raw_conn = None  # Prevent closing in finally block
+        cur = None  # Prevent closing in finally block
+        trigger = IntervalTrigger(
+            seconds=settings.poll_running_llm_batches_interval_seconds,
+            jitter=10,  # Jitter for the job execution
+        )
+        scheduler.add_job(
+            poll_running_llm_batches,
+            args=[server],
+            trigger=trigger,
+            id="poll_llm_batches",
+            name="Poll LLM API batch jobs",
+            replace_existing=True,
+            next_run_time=datetime.datetime.now(datetime.timezone.utc),
+        )
+        if not scheduler.running:
+            scheduler.start()
+        elif scheduler.state == 2:  # PAUSED
+            scheduler.resume()
+        _is_scheduler_leader = True
+        return True
+    except Exception as e:
+        logger.error(f"Error during lock acquisition/scheduler start: {e}", exc_info=True)
+        if acquired_lock:  # If lock was acquired before error, try to release
+            logger.warning("Attempting to release lock due to error during startup.")
+            try:
+                # Use the cursor/connection we were about to store
+                _advisory_lock_cur = cur
+                _advisory_lock_conn = raw_conn
+                await _release_advisory_lock()  # Attempt cleanup
+            except Exception as unlock_err:
+                logger.error(f"Failed to release lock during error handling: {unlock_err}", exc_info=True)
+            finally:
+                # Ensure globals are cleared after failed attempt
+                _advisory_lock_cur = None
+                _advisory_lock_conn = None
+                _is_scheduler_leader = False
+        # Ensure scheduler is stopped if we failed partially
+        if scheduler.running:
+            try:
+                scheduler.shutdown(wait=False)
+            except:
+                pass  # Best effort
+        return False
+    finally:
+        # Clean up temporary resources if lock wasn't acquired or error occurred before storing
+        if cur:
+            try:
+                cur.close()
+            except:
+                pass
+        if raw_conn:
+            try:
+                raw_conn.close()
+            except:
+                pass
+async def _background_lock_retry_loop(server: SyncServer):
+    """Periodically attempts to acquire the lock if not initially acquired."""
+    global _lock_retry_task, _is_scheduler_leader
+    logger.info("Starting background task to periodically check for scheduler lock.")
+    while True:
+        if _is_scheduler_leader:  # Should be cancelled first, but safety check
+            break
+        try:
+            wait_time = settings.poll_lock_retry_interval_seconds
+            await asyncio.sleep(wait_time)
+            # Re-check state before attempting lock
+            if _is_scheduler_leader or _lock_retry_task is None:
+                break  # Stop if became leader or task was cancelled
+            acquired = await _try_acquire_lock_and_start_scheduler(server)
+            if acquired:
+                logger.info("Background task acquired lock and started scheduler.")
+                _lock_retry_task = None  # Clear self handle
+                break  # Exit loop, we are now the leader
+        except asyncio.CancelledError:
+            logger.info("Background lock retry task cancelled.")
+            break
+        except Exception as e:
+            logger.error(f"Error in background lock retry loop: {e}", exc_info=True)
+            # Avoid tight loop on persistent errors
+            await asyncio.sleep(settings.poll_lock_retry_interval_seconds)
+async def _release_advisory_lock():
+    """Releases the advisory lock using the stored connection."""
+    global _advisory_lock_conn, _advisory_lock_cur
+    lock_cur = _advisory_lock_cur
+    lock_conn = _advisory_lock_conn
+    _advisory_lock_cur = None  # Clear global immediately
+    _advisory_lock_conn = None  # Clear global immediately
+    if lock_cur is not None and lock_conn is not None:
+        logger.info(f"Attempting to release advisory lock {ADVISORY_LOCK_KEY}")
+        try:
+            if not lock_conn.closed:
+                if not lock_cur.closed:
+                    lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
+                    lock_cur.fetchone()  # Consume result
+                    lock_conn.commit()
+                    logger.info(f"Executed pg_advisory_unlock for lock {ADVISORY_LOCK_KEY}")
+                else:
+                    logger.warning("Advisory lock cursor closed before unlock.")
+            else:
+                logger.warning("Advisory lock connection closed before unlock.")
+        except Exception as e:
+            logger.error(f"Error executing pg_advisory_unlock: {e}", exc_info=True)
+        finally:
+            # Ensure resources are closed regardless of unlock success
+            try:
+                if lock_cur and not lock_cur.closed:
+                    lock_cur.close()
+            except Exception as e:
+                logger.error(f"Error closing advisory lock cursor: {e}", exc_info=True)
+            try:
+                if lock_conn and not lock_conn.closed:
+                    lock_conn.close()
+                logger.info("Closed database connection that held advisory lock.")
+            except Exception as e:
+                logger.error(f"Error closing advisory lock connection: {e}", exc_info=True)
+    else:
+        logger.warning("Attempted to release lock, but connection/cursor not found.")
+async def start_scheduler_with_leader_election(server: SyncServer):
+    """
+    Call this function from your FastAPI startup event handler.
+    Attempts immediate lock acquisition, starts background retry if failed.
+    """
+    global _lock_retry_task, _is_scheduler_leader
     if not settings.enable_batch_job_polling:
+        logger.info("Batch job polling is disabled.")
         return
-    with db_context() as session:
-        engine = session.get_bind()
-    raw = engine.raw_connection()
-    cur = raw.cursor()
-    cur.execute("SELECT pg_try_advisory_lock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
-    got = cur.fetchone()[0]
-    if not got:
-        cur.close()
-        raw.close()
-        logger.info("Batch‐poller lock already held – not starting scheduler in this worker")
+    if _is_scheduler_leader:
+        logger.warning("Scheduler start requested, but already leader.")
         return
-    _startup_lock_conn, _startup_lock_cur = raw, cur
-    jitter_seconds = 10
-    trigger = IntervalTrigger(
-        seconds=settings.poll_running_llm_batches_interval_seconds,
-        jitter=jitter_seconds,
-    )
-    scheduler.add_job(
-        poll_running_llm_batches,
-        args=[server],
-        trigger=trigger,
-        next_run_time=datetime.datetime.now(datetime.timezone.utc),
-        id="poll_llm_batches",
-        name="Poll LLM API batch jobs",
-        replace_existing=True,
-    )
-    scheduler.start()
-    logger.info("Started batch‐polling scheduler in this worker")
-def shutdown_cron_scheduler():
-    global _startup_lock_conn, _startup_lock_cur
-    if settings.enable_batch_job_polling and scheduler.running:
-        scheduler.shutdown()
-    if _startup_lock_cur is not None:
-        _startup_lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (STARTUP_LOCK_KEY,))
-        _startup_lock_conn.commit()
-        _startup_lock_cur.close()
-        _startup_lock_conn.close()
-        _startup_lock_cur = None
-        _startup_lock_conn = None
+    acquired_immediately = await _try_acquire_lock_and_start_scheduler(server)
+    if not acquired_immediately and _lock_retry_task is None:
+        # Failed initial attempt, start background retry task
+        loop = asyncio.get_running_loop()
+        _lock_retry_task = loop.create_task(_background_lock_retry_loop(server))
+async def shutdown_scheduler_and_release_lock():
+    """
+    Call this function from your FastAPI shutdown event handler.
+    Stops scheduler/releases lock if leader, cancels retry task otherwise.
+    """
+    global _is_scheduler_leader, _lock_retry_task, scheduler
+    # 1. Cancel retry task if running (for non-leaders)
+    if _lock_retry_task is not None:
+        logger.info("Shutting down: Cancelling background lock retry task.")
+        current_task = _lock_retry_task
+        _lock_retry_task = None  # Clear handle first
+        current_task.cancel()
+        try:
+            await current_task  # Wait for cancellation
+        except asyncio.CancelledError:
+            logger.info("Background lock retry task successfully cancelled.")
+        except Exception as e:
+            logger.warning(f"Exception waiting for cancelled retry task: {e}", exc_info=True)
+    # 2. Shutdown scheduler and release lock if we were the leader
+    if _is_scheduler_leader:
+        logger.info("Shutting down: Leader instance stopping scheduler and releasing lock.")
+        if scheduler.running:
+            try:
+                scheduler.shutdown()  # wait=True by default
+                logger.info("APScheduler shut down.")
+            except Exception as e:
+                logger.error(f"Error shutting down APScheduler: {e}", exc_info=True)
+        await _release_advisory_lock()
+        _is_scheduler_leader = False  # Update state after cleanup
+    else:
+        logger.info("Shutting down: Non-leader instance.")
+    # Final cleanup check for scheduler state (belt and suspenders)
+    if scheduler.running:
+        logger.warning("Scheduler still running after shutdown logic completed? Forcing shutdown.")
+        try:
+            scheduler.shutdown(wait=False)
+        except:
+            pass

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -13,6 +13,7 @@ from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.local_llm.utils import count_tokens
 from letta.log import get_logger
+from letta.schemas.enums import ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import Tool
@@ -29,12 +30,20 @@ class GoogleAIClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
-        # print("[google_ai request]", json.dumps(request_data, indent=2))
+        api_key = None
+        if llm_config.provider_name and llm_config.provider_name != ProviderType.google_ai.value:
+            from letta.services.provider_manager import ProviderManager
+            api_key = ProviderManager().get_override_key(llm_config.provider_name)
+        if not api_key:
+            api_key = model_settings.gemini_api_key
+        # print("[google_ai request]", json.dumps(request_data, indent=2))
         url, headers = get_gemini_endpoint_and_headers(
             base_url=str(llm_config.model_endpoint),
             model=llm_config.model,
-            api_key=str(model_settings.gemini_api_key),
+            api_key=str(api_key),
             key_in_header=True,
             generate_content=True,
         )
@@ -122,8 +131,8 @@ class GoogleAIClient(LLMClientBase):
             for candidate in response_data["candidates"]:
                 content = candidate["content"]
-                if "role" not in content:
-                    # This means the response is malformed
+                if "role" not in content or not content["role"]:
+                    # This means the response is malformed like MALFORMED_FUNCTION_CALL
                     # NOTE: must be a ValueError to trigger a retry
                     raise ValueError(f"Error in response data from LLM: {response_data}")
                 role = content["role"]

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -110,7 +110,11 @@ class GoogleVertexClient(GoogleAIClient):
             for candidate in response.candidates:
                 content = candidate.content
-                role = content.role
+                if "role" not in content or not content["role"]:
+                    # This means the response is malformed like MALFORMED_FUNCTION_CALL
+                    # NOTE: must be a ValueError to trigger a retry
+                    raise ValueError(f"Error in response data from LLM: {response_data}")
+                role = content["role"]
                 assert role == "model", f"Unknown role in response: {role}"
                 parts = content.parts

letta/llm_api/openai.py CHANGED Viewed

@@ -7,7 +7,7 @@ from openai import OpenAI
 from letta.constants import LETTA_MODEL_ENDPOINT
 from letta.helpers.datetime_helpers import timestamp_to_datetime
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
-from letta.llm_api.openai_client import supports_parallel_tool_calling, supports_temperature_param
+from letta.llm_api.openai_client import accepts_developer_role, supports_parallel_tool_calling, supports_temperature_param
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.log import get_logger
@@ -114,8 +114,16 @@ def build_openai_chat_completions_request(
             put_inner_thoughts_first=put_inner_thoughts_first,
         )
+    use_developer_message = accepts_developer_role(llm_config.model)
     openai_message_list = [
-        cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)) for m in messages
+        cast_message_to_subtype(
+            m.to_openai_dict(
+                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+                use_developer_message=use_developer_message,
+            )
+        )
+        for m in messages
     ]
     if llm_config.model:

letta/llm_api/openai_client.py CHANGED Viewed

@@ -40,7 +40,19 @@ def is_openai_reasoning_model(model: str) -> bool:
     """Utility function to check if the model is a 'reasoner'"""
     # NOTE: needs to be updated with new model releases
-    return model.startswith("o1") or model.startswith("o3")
+    is_reasoning = model.startswith("o1") or model.startswith("o3")
+    return is_reasoning
+def accepts_developer_role(model: str) -> bool:
+    """Checks if the model accepts the 'developer' role. Note that not all reasoning models accept this role.
+    See: https://community.openai.com/t/developer-role-not-accepted-for-o1-o1-mini-o3-mini/1110750/7
+    """
+    if is_openai_reasoning_model(model):
+        return True
+    else:
+        return False
 def supports_temperature_param(model: str) -> bool:
@@ -102,7 +114,7 @@ class OpenAIClient(LLMClientBase):
                 put_inner_thoughts_first=True,
             )
-        use_developer_message = is_openai_reasoning_model(llm_config.model)
+        use_developer_message = accepts_developer_role(llm_config.model)
         openai_message_list = [
             cast_message_to_subtype(

letta/orm/message.py CHANGED Viewed

@@ -44,6 +44,10 @@ class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
     sender_id: Mapped[Optional[str]] = mapped_column(
         nullable=True, doc="The id of the sender of the message, can be an identity id or agent id"
     )
+    batch_item_id: Mapped[Optional[str]] = mapped_column(
+        nullable=True,
+        doc="The id of the LLMBatchItem that this message is associated with",
+    )
     # Monotonically increasing sequence for efficient/correct listing
     sequence_id: Mapped[int] = mapped_column(

letta/prompts/system/voice_sleeptime.txt CHANGED Viewed

@@ -53,7 +53,7 @@ Example output:
 **Phase 2: Refine User Memory using `rethink_user_memory` and `finish_rethinking_memory`**
-After the `store_memories` tool call is processed, you will be presented with the current content of the `human` memory block (the read-write block storing details about the user).
+After the `store_memories` tool call is processed, consider the current content of the `human` memory block (the read-write block storing details about the user).
 -   Your goal is to refine this block by integrating information from the **ENTIRE** conversation transcript (both `Older` and `Newer` sections) with the existing memory content.
 -   Refinement Principles:
@@ -67,8 +67,7 @@ After the `store_memories` tool call is processed, you will be presented with th
 -   Tool Usage:
     -   Use the `rethink_user_memory(new_memory: string)` tool iteratively. Each call MUST submit the complete, rewritten version of the `human` memory block as you refine it.
     -   Continue calling `rethink_user_memory` until you are satisfied that the memory block is accurate, comprehensive, organized, and up-to-date according to the principles above.
-    -   Once the `human` block is fully polished, call the `finish_rethinking_memory()` tool exactly once to signal completion.
+    -   Once the `human` block is fully polished, call the `finish_rethinking_memory` tool exactly once to signal completion.
 Output Requirements:
 -   You MUST ONLY output tool calls in the specified sequence: First `store_memories` (once), then one or more `rethink_user_memory` calls, and finally `finish_rethinking_memory` (once).
--   Do not output any other text or explanations outside of the required JSON tool call format.

letta/schemas/letta_message.py CHANGED Viewed

@@ -48,6 +48,7 @@ class LettaMessage(BaseModel):
     message_type: MessageType = Field(..., description="The type of the message.")
     otid: Optional[str] = None
     sender_id: Optional[str] = None
+    step_id: Optional[str] = None
     @field_serializer("date")
     def serialize_datetime(self, dt: datetime, _info):

letta/schemas/letta_request.py CHANGED Viewed

@@ -35,4 +35,11 @@ class LettaBatchRequest(LettaRequest):
 class CreateBatch(BaseModel):
     requests: List[LettaBatchRequest] = Field(..., description="List of requests to be processed in batch.")
-    callback_url: Optional[HttpUrl] = Field(None, description="Optional URL to call via POST when the batch completes.")
+    callback_url: Optional[HttpUrl] = Field(
+        None,
+        description="Optional URL to call via POST when the batch completes. The callback payload will be a JSON object with the following fields: "
+        "{'job_id': string, 'status': string, 'completed_at': string}. "
+        "Where 'job_id' is the unique batch job identifier, "
+        "'status' is the final batch status (e.g., 'completed', 'failed'), and "
+        "'completed_at' is an ISO 8601 timestamp indicating when the batch job completed.",
+    )

letta/schemas/letta_response.py CHANGED Viewed

@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
 from letta.helpers.json_helpers import json_dumps
 from letta.schemas.enums import JobStatus, MessageStreamStatus
 from letta.schemas.letta_message import LettaMessage, LettaMessageUnion
+from letta.schemas.message import Message
 from letta.schemas.usage import LettaUsageStatistics
 # TODO: consider moving into own file
@@ -175,3 +176,7 @@ class LettaBatchResponse(BaseModel):
     agent_count: int = Field(..., description="The number of agents in the batch request.")
     last_polled_at: datetime = Field(..., description="The timestamp when the batch was last polled for updates.")
     created_at: datetime = Field(..., description="The timestamp when the batch request was created.")
+class LettaBatchMessages(BaseModel):
+    messages: List[Message]

letta/schemas/llm_batch_job.py CHANGED Viewed

@@ -10,16 +10,18 @@ from letta.schemas.letta_base import OrmMetadataBase
 from letta.schemas.llm_config import LLMConfig
-class LLMBatchItem(OrmMetadataBase, validate_assignment=True):
+class LLMBatchItemBase(OrmMetadataBase, validate_assignment=True):
+    __id_prefix__ = "batch_item"
+class LLMBatchItem(LLMBatchItemBase, validate_assignment=True):
     """
     Represents a single agent's LLM request within a batch.
     This object captures the configuration, execution status, and eventual result of one agent's request within a larger LLM batch job.
     """
-    __id_prefix__ = "batch_item"
-    id: Optional[str] = Field(None, description="The id of the batch item. Assigned by the database.")
+    id: str = LLMBatchItemBase.generate_id_field()
     llm_batch_id: str = Field(..., description="The id of the parent LLM batch job this item belongs to.")
     agent_id: str = Field(..., description="The id of the agent associated with this LLM request.")

letta/schemas/llm_config.py CHANGED Viewed

@@ -164,6 +164,15 @@ class LLMConfig(BaseModel):
                 model_wrapper=None,
                 context_window=128000,
             )
+        elif model_name == "gpt-4.1":
+            return cls(
+                model="gpt-4.1",
+                model_endpoint_type="openai",
+                model_endpoint="https://api.openai.com/v1",
+                model_wrapper=None,
+                context_window=256000,
+                max_tokens=8192,
+            )
         elif model_name == "letta":
             return cls(
                 model="memgpt-openai",

letta-nightly 0.7.8.dev20250502104219__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl

letta-nightly 0.7.8.dev20250502104219py3-none-any.whl → 0.7.9.dev20250502222710py3-none-any.whl