PyPI - letta-nightly - Versions diffs - 0.6.49.dev20250408104230__py3-none-any.whl → 0.6.50.dev20250409043626__py3-none-any.whl - Mend

letta-nightly 0.6.49.dev20250408104230py3-none-any.whl → 0.6.50.dev20250409043626py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (33) hide show

letta/__init__.py +1 -1
letta/agent.py +8 -1
letta/functions/function_sets/base.py +4 -1
letta/functions/helpers.py +16 -2
letta/jobs/__init__.py +0 -0
letta/jobs/helpers.py +25 -0
letta/jobs/llm_batch_job_polling.py +204 -0
letta/jobs/scheduler.py +28 -0
letta/jobs/types.py +10 -0
letta/llm_api/anthropic.py +8 -3
letta/llm_api/anthropic_client.py +5 -4
letta/llm_api/llm_api_tools.py +2 -0
letta/llm_api/openai_client.py +3 -1
letta/memory.py +20 -4
letta/orm/message.py +21 -5
letta/schemas/enums.py +1 -0
letta/schemas/llm_config.py +8 -4
letta/schemas/message.py +8 -7
letta/server/rest_api/app.py +11 -0
letta/server/rest_api/chat_completions_interface.py +1 -0
letta/server/rest_api/routers/v1/agents.py +16 -3
letta/server/server.py +5 -1
letta/services/agent_manager.py +34 -28
letta/services/helpers/agent_manager_helper.py +3 -1
letta/services/llm_batch_manager.py +97 -6
letta/services/tool_sandbox/local_sandbox.py +2 -1
letta/settings.py +4 -0
letta/streaming_interface.py +2 -0
{letta_nightly-0.6.49.dev20250408104230.dist-info → letta_nightly-0.6.50.dev20250409043626.dist-info}/METADATA +5 -4
{letta_nightly-0.6.49.dev20250408104230.dist-info → letta_nightly-0.6.50.dev20250409043626.dist-info}/RECORD +33 -28
{letta_nightly-0.6.49.dev20250408104230.dist-info → letta_nightly-0.6.50.dev20250409043626.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.49.dev20250408104230.dist-info → letta_nightly-0.6.50.dev20250409043626.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.49.dev20250408104230.dist-info → letta_nightly-0.6.50.dev20250409043626.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.49"
+__version__ = "0.6.50"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -376,7 +376,6 @@ class Agent(BaseAgent):
                     else:
                         raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
                 log_telemetry(self.logger, "_handle_ai_response finish")
-                return response
             except ValueError as ve:
                 if attempt >= empty_response_retry_limit:
@@ -393,6 +392,14 @@ class Agent(BaseAgent):
                 log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
                 raise e
+            # check if we are going over the context window: this allows for articifial constraints
+            if response.usage.total_tokens > self.agent_state.llm_config.context_window:
+                # trigger summarization
+                log_telemetry(self.logger, "_get_ai_reply summarize_messages_inplace")
+                self.summarize_messages_inplace()
+            # return the response
+            return response
         log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
         raise Exception("Retries exhausted and no valid response received.")

letta/functions/function_sets/base.py CHANGED Viewed

@@ -225,7 +225,10 @@ def core_memory_insert(agent_state: "AgentState", target_block_label: str, new_m
     current_value_list = current_value.split("\n")
     if line_number is None:
         line_number = len(current_value_list)
-    current_value_list.insert(line_number, new_memory)
+    if replace:
+        current_value_list[line_number] = new_memory
+    else:
+        current_value_list.insert(line_number, new_memory)
     new_value = "\n".join(current_value_list)
     agent_state.memory.update_block_value(label=target_block_label, value=new_value)
     return None

letta/functions/helpers.py CHANGED Viewed

@@ -629,8 +629,22 @@ def _get_field_type(field_schema: Dict[str, Any], nested_models: Dict[str, Type[
             if nested_models and ref_type in nested_models:
                 return nested_models[ref_type]
         elif "additionalProperties" in field_schema:
-            value_type = _get_field_type(field_schema["additionalProperties"], nested_models)
-            return Dict[str, value_type]
+            # TODO: This is totally GPT generated and I'm not sure it works
+            # TODO: This is done to quickly patch some tests, we should nuke this whole pathway asap
+            ap = field_schema["additionalProperties"]
+            if ap is True:
+                return dict
+            elif ap is False:
+                raise ValueError("additionalProperties=false is not supported.")
+            else:
+                # Try resolving nested type
+                nested_type = _get_field_type(ap, nested_models)
+                # If nested_type is Any, fall back to `dict`, or raise, depending on how strict you want to be
+                if nested_type == Any:
+                    return dict
+                return Dict[str, nested_type]
         return dict
     elif field_schema.get("$ref") is not None:
         ref_type = field_schema["$ref"].split("/")[-1]

letta/jobs/__init__.py ADDED Viewed

File without changes

letta/jobs/helpers.py ADDED Viewed

@@ -0,0 +1,25 @@
+from anthropic.types.beta.messages import (
+    BetaMessageBatchCanceledResult,
+    BetaMessageBatchIndividualResponse,
+    BetaMessageBatchSucceededResult,
+)
+from letta.schemas.enums import JobStatus
+def map_anthropic_batch_job_status_to_job_status(anthropic_status: str) -> JobStatus:
+    mapping = {
+        "in_progress": JobStatus.running,
+        "canceling": JobStatus.cancelled,
+        "ended": JobStatus.completed,
+    }
+    return mapping.get(anthropic_status, JobStatus.pending)  # fallback just in case
+def map_anthropic_individual_batch_item_status_to_job_status(individual_item: BetaMessageBatchIndividualResponse) -> JobStatus:
+    if isinstance(individual_item.result, BetaMessageBatchSucceededResult):
+        return JobStatus.completed
+    elif isinstance(individual_item.result, BetaMessageBatchCanceledResult):
+        return JobStatus.cancelled
+    else:
+        return JobStatus.failed

letta/jobs/llm_batch_job_polling.py ADDED Viewed

@@ -0,0 +1,204 @@
+import asyncio
+import datetime
+from typing import List
+from letta.jobs.helpers import map_anthropic_batch_job_status_to_job_status, map_anthropic_individual_batch_item_status_to_job_status
+from letta.jobs.types import BatchId, BatchPollingResult, ItemUpdateInfo
+from letta.log import get_logger
+from letta.schemas.enums import JobStatus, ProviderType
+from letta.schemas.llm_batch_job import LLMBatchJob
+from letta.server.server import SyncServer
+logger = get_logger(__name__)
+class BatchPollingMetrics:
+    """Class to track metrics for batch polling operations."""
+    def __init__(self):
+        self.start_time = datetime.datetime.now()
+        self.total_batches = 0
+        self.anthropic_batches = 0
+        self.running_count = 0
+        self.completed_count = 0
+        self.updated_items_count = 0
+    def log_summary(self):
+        """Log a summary of the metrics collected during polling."""
+        elapsed = (datetime.datetime.now() - self.start_time).total_seconds()
+        logger.info(f"[Poll BatchJob] Finished poll_running_llm_batches job in {elapsed:.2f}s")
+        logger.info(f"[Poll BatchJob] Found {self.total_batches} running batches total.")
+        logger.info(f"[Poll BatchJob] Found {self.anthropic_batches} Anthropic batch(es) to poll.")
+        logger.info(f"[Poll BatchJob] Final results: {self.completed_count} completed, {self.running_count} still running.")
+        logger.info(f"[Poll BatchJob] Updated {self.updated_items_count} items for newly completed batch(es).")
+async def fetch_batch_status(server: SyncServer, batch_job: LLMBatchJob) -> BatchPollingResult:
+    """
+    Fetch the current status of a single batch job from the provider.
+    Args:
+        server: The SyncServer instance
+        batch_job: The batch job to check status for
+    Returns:
+        A tuple containing (batch_id, new_status, polling_response)
+    """
+    batch_id_str = batch_job.create_batch_response.id
+    try:
+        response = await server.anthropic_async_client.beta.messages.batches.retrieve(batch_id_str)
+        new_status = map_anthropic_batch_job_status_to_job_status(response.processing_status)
+        logger.debug(f"[Poll BatchJob] Batch {batch_job.id}: provider={response.processing_status} → internal={new_status}")
+        return (batch_job.id, new_status, response)
+    except Exception as e:
+        logger.warning(f"[Poll BatchJob] Batch {batch_job.id}: failed to retrieve {batch_id_str}: {e}")
+        # We treat a retrieval error as still running to try again next cycle
+        return (batch_job.id, JobStatus.running, None)
+async def fetch_batch_items(server: SyncServer, batch_id: BatchId, batch_resp_id: str) -> List[ItemUpdateInfo]:
+    """
+    Fetch individual item results for a completed batch.
+    Args:
+        server: The SyncServer instance
+        batch_id: The internal batch ID
+        batch_resp_id: The provider's batch response ID
+    Returns:
+        A list of item update information tuples
+    """
+    updates = []
+    try:
+        async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
+            # Here, custom_id should be the agent_id
+            item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
+            updates.append((batch_id, item_result.custom_id, item_status, item_result))
+        logger.info(f"[Poll BatchJob] Fetched {len(updates)} item updates for batch {batch_id}.")
+    except Exception as e:
+        logger.error(f"[Poll BatchJob] Error fetching item updates for batch {batch_id}: {e}")
+    return updates
+async def poll_batch_updates(server: SyncServer, batch_jobs: List[LLMBatchJob], metrics: BatchPollingMetrics) -> List[BatchPollingResult]:
+    """
+    Poll for updates to multiple batch jobs concurrently.
+    Args:
+        server: The SyncServer instance
+        batch_jobs: List of batch jobs to poll
+        metrics: Metrics collection object
+    Returns:
+        List of batch polling results
+    """
+    if not batch_jobs:
+        logger.info("[Poll BatchJob] No Anthropic batches to update; job complete.")
+        return []
+    # Create polling tasks for all batch jobs
+    coros = [fetch_batch_status(server, b) for b in batch_jobs]
+    results: List[BatchPollingResult] = await asyncio.gather(*coros)
+    # Update the server with batch status changes
+    server.batch_manager.bulk_update_batch_statuses(updates=results)
+    logger.info(f"[Poll BatchJob] Bulk-updated {len(results)} LLM batch(es) in the DB at job level.")
+    return results
+async def process_completed_batches(
+    server: SyncServer, batch_results: List[BatchPollingResult], metrics: BatchPollingMetrics
+) -> List[ItemUpdateInfo]:
+    """
+    Process batches that have completed and fetch their item results.
+    Args:
+        server: The SyncServer instance
+        batch_results: Results from polling batch statuses
+        metrics: Metrics collection object
+    Returns:
+        List of item updates to apply
+    """
+    item_update_tasks = []
+    # Process each top-level polling result
+    for batch_id, new_status, maybe_batch_resp in batch_results:
+        if not maybe_batch_resp:
+            if new_status == JobStatus.running:
+                metrics.running_count += 1
+            logger.warning(f"[Poll BatchJob] Batch {batch_id}: JobStatus was {new_status} and no batch response was found.")
+            continue
+        if new_status == JobStatus.completed:
+            metrics.completed_count += 1
+            batch_resp_id = maybe_batch_resp.id  # The Anthropic-assigned batch ID
+            # Queue an async call to fetch item results for this batch
+            item_update_tasks.append(fetch_batch_items(server, batch_id, batch_resp_id))
+        elif new_status == JobStatus.running:
+            metrics.running_count += 1
+    # Launch all item update tasks concurrently
+    concurrent_results = await asyncio.gather(*item_update_tasks, return_exceptions=True)
+    # Flatten and filter the results
+    item_updates = []
+    for result in concurrent_results:
+        if isinstance(result, Exception):
+            logger.error(f"[Poll BatchJob] A fetch_batch_items task failed with: {result}")
+        elif isinstance(result, list):
+            item_updates.extend(result)
+    logger.info(f"[Poll BatchJob] Collected a total of {len(item_updates)} item update(s) from completed batches.")
+    return item_updates
+async def poll_running_llm_batches(server: "SyncServer") -> None:
+    """
+    Cron job to poll all running LLM batch jobs and update their polling responses in bulk.
+    Steps:
+      1. Fetch currently running batch jobs
+      2. Filter Anthropic only
+      3. Retrieve updated top-level polling info concurrently
+      4. Bulk update LLMBatchJob statuses
+      5. For each completed batch, call .results(...) to get item-level results
+      6. Bulk update all matching LLMBatchItem records by (batch_id, agent_id)
+      7. Log telemetry about success/fail
+    """
+    # Initialize metrics tracking
+    metrics = BatchPollingMetrics()
+    logger.info("[Poll BatchJob] Starting poll_running_llm_batches job")
+    try:
+        # 1. Retrieve running batch jobs
+        batches = server.batch_manager.list_running_batches()
+        metrics.total_batches = len(batches)
+        # TODO: Expand to more providers
+        # 2. Filter for Anthropic jobs only
+        anthropic_batch_jobs = [b for b in batches if b.llm_provider == ProviderType.anthropic]
+        metrics.anthropic_batches = len(anthropic_batch_jobs)
+        # 3-4. Poll for batch updates and bulk update statuses
+        batch_results = await poll_batch_updates(server, anthropic_batch_jobs, metrics)
+        # 5. Process completed batches and fetch item results
+        item_updates = await process_completed_batches(server, batch_results, metrics)
+        # 6. Bulk update all items for newly completed batch(es)
+        if item_updates:
+            metrics.updated_items_count = len(item_updates)
+            server.batch_manager.bulk_update_batch_items_by_agent(item_updates)
+        else:
+            logger.info("[Poll BatchJob] No item-level updates needed.")
+    except Exception as e:
+        logger.exception("[Poll BatchJob] Unhandled error in poll_running_llm_batches", exc_info=e)
+    finally:
+        # 7. Log metrics summary
+        metrics.log_summary()

letta/jobs/scheduler.py ADDED Viewed

@@ -0,0 +1,28 @@
+import datetime
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from letta.jobs.llm_batch_job_polling import poll_running_llm_batches
+from letta.server.server import SyncServer
+from letta.settings import settings
+scheduler = AsyncIOScheduler()
+def start_cron_jobs(server: SyncServer):
+    """Initialize cron jobs"""
+    scheduler.add_job(
+        poll_running_llm_batches,
+        args=[server],
+        trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
+        next_run_time=datetime.datetime.now(datetime.UTC),
+        id="poll_llm_batches",
+        name="Poll LLM API batch jobs and update status",
+        replace_existing=True,
+    )
+    scheduler.start()
+def shutdown_cron_scheduler():
+    scheduler.shutdown()

letta/jobs/types.py ADDED Viewed

@@ -0,0 +1,10 @@
+from typing import Optional, Tuple
+from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchIndividualResponse
+from letta.schemas.enums import JobStatus
+BatchId = str
+AgentId = str
+BatchPollingResult = Tuple[BatchId, JobStatus, Optional[BetaMessageBatch]]
+ItemUpdateInfo = Tuple[BatchId, AgentId, JobStatus, BetaMessageBatchIndividualResponse]

letta/llm_api/anthropic.py CHANGED Viewed

@@ -25,6 +25,7 @@ from letta.llm_api.aws_bedrock import get_bedrock_client
 from letta.llm_api.helpers import add_inner_thoughts_to_functions
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.log import get_logger
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
@@ -44,6 +45,8 @@ from letta.settings import model_settings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
 from letta.tracing import log_event
+logger = get_logger(__name__)
 BASE_URL = "https://api.anthropic.com/v1"
@@ -620,9 +623,9 @@ def _prepare_anthropic_request(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
     # if true, prefix fill the generation with the thinking tag
-    prefix_fill: bool = True,
+    prefix_fill: bool = False,
     # if true, put COT inside the tool calls instead of inside the content
-    put_inner_thoughts_in_kwargs: bool = False,
+    put_inner_thoughts_in_kwargs: bool = True,
     bedrock: bool = False,
     # extended thinking related fields
     # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
@@ -634,7 +637,9 @@ def _prepare_anthropic_request(
         assert (
             max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
         ), "max tokens must be greater than thinking budget"
-        assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
+        if put_inner_thoughts_in_kwargs:
+            logger.warning("Extended thinking not compatible with put_inner_thoughts_in_kwargs")
+            put_inner_thoughts_in_kwargs = False
         # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
         # Silently disable prefix_fill for now
         prefix_fill = False

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -90,7 +90,7 @@ class AnthropicClient(LLMClientBase):
     def build_request_data(
         self,
         messages: List[PydanticMessage],
-        tools: List[dict],
+        tools: Optional[List[dict]] = None,
         force_tool_call: Optional[str] = None,
     ) -> dict:
         # TODO: This needs to get cleaned up. The logic here is pretty confusing.
@@ -146,11 +146,12 @@ class AnthropicClient(LLMClientBase):
             tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
         # Add tool choice
-        data["tool_choice"] = tool_choice
+        if tool_choice:
+            data["tool_choice"] = tool_choice
         # Add inner thoughts kwarg
         # TODO: Can probably make this more efficient
-        if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
+        if tools_for_request and len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
             tools_with_inner_thoughts = add_inner_thoughts_to_functions(
                 functions=[t.function.model_dump() for t in tools_for_request],
                 inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -158,7 +159,7 @@ class AnthropicClient(LLMClientBase):
             )
             tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
-        if len(tools_for_request) > 0:
+        if tools_for_request and len(tools_for_request) > 0:
             # TODO eventually enable parallel tool use
             data["tools"] = convert_tools_to_anthropic_format(tools_for_request)

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -322,6 +322,7 @@ def create(
         # Force tool calling
         tool_call = None
+        llm_config.put_inner_thoughts_in_kwargs = True
         if functions is None:
             # Special case for summarization path
             tools = None
@@ -356,6 +357,7 @@ def create(
         if stream:  # Client requested token streaming
             assert isinstance(stream_interface, (AgentChunkStreamingInterface, AgentRefreshStreamingInterface)), type(stream_interface)
+            stream_interface.inner_thoughts_in_kwargs = True
             response = anthropic_chat_completions_process_stream(
                 chat_completion_request=chat_completion_request,
                 put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,

letta/llm_api/openai_client.py CHANGED Viewed

@@ -78,9 +78,11 @@ class OpenAIClient(LLMClientBase):
         # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
         # TODO(matt) move into LLMConfig
         # TODO: This vllm checking is very brittle and is a patch at most
+        tool_choice = None
         if self.llm_config.model_endpoint == "https://inference.memgpt.ai" or (self.llm_config.handle and "vllm" in self.llm_config.handle):
             tool_choice = "auto"  # TODO change to "required" once proxy supports it
-        else:
+        elif tools:
+            # only set if tools is non-Null
             tool_choice = "required"
         if force_tool_call is not None:

letta/memory.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Callable, Dict, List
 from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
 from letta.llm_api.llm_api_tools import create
+from letta.llm_api.llm_client import LLMClient
 from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
 from letta.schemas.agent import AgentState
 from letta.schemas.enums import MessageRole
@@ -9,6 +10,7 @@ from letta.schemas.letta_message_content import TextContent
 from letta.schemas.memory import Memory
 from letta.schemas.message import Message
 from letta.settings import summarizer_settings
+from letta.tracing import trace_method
 from letta.utils import count_tokens, printd
@@ -45,6 +47,7 @@ def _format_summary_history(message_history: List[Message]):
     return "\n".join([f"{m.role}: {get_message_text(m.content)}" for m in message_history])
+@trace_method
 def summarize_messages(
     agent_state: AgentState,
     message_sequence_to_summarize: List[Message],
@@ -74,12 +77,25 @@ def summarize_messages(
     # TODO: We need to eventually have a separate LLM config for the summarizer LLM
     llm_config_no_inner_thoughts = agent_state.llm_config.model_copy(deep=True)
     llm_config_no_inner_thoughts.put_inner_thoughts_in_kwargs = False
-    response = create(
+    llm_client = LLMClient.create(
         llm_config=llm_config_no_inner_thoughts,
-        user_id=agent_state.created_by_id,
-        messages=message_sequence,
-        stream=False,
+        put_inner_thoughts_first=False,
     )
+    # try to use new client, otherwise fallback to old flow
+    # TODO: we can just directly call the LLM here?
+    if llm_client:
+        response = llm_client.send_llm_request(
+            messages=message_sequence,
+            stream=False,
+        )
+    else:
+        response = create(
+            llm_config=llm_config_no_inner_thoughts,
+            user_id=agent_state.created_by_id,
+            messages=message_sequence,
+            stream=False,
+        )
     printd(f"summarize_messages gpt reply: {response.choices[0]}")
     reply = response.choices[0].message.content

letta/orm/message.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from typing import List, Optional
 from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall as OpenAIToolCall
-from sqlalchemy import BigInteger, ForeignKey, Index, Sequence
-from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy import BigInteger, ForeignKey, Index, Sequence, event, text
+from sqlalchemy.orm import Mapped, Session, mapped_column, relationship
 from letta.orm.custom_columns import MessageContentColumn, ToolCallColumn, ToolReturnColumn
 from letta.orm.mixins import AgentMixin, OrganizationMixin
@@ -11,6 +11,7 @@ from letta.schemas.letta_message_content import MessageContent
 from letta.schemas.letta_message_content import TextContent as PydanticTextContent
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.message import ToolReturn
+from letta.settings import settings
 class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
@@ -42,9 +43,7 @@ class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
     group_id: Mapped[Optional[str]] = mapped_column(nullable=True, doc="The multi-agent group that the message was sent in")
     # Monotonically increasing sequence for efficient/correct listing
-    sequence_id: Mapped[int] = mapped_column(
-        BigInteger, Sequence("message_seq_id"), unique=True, nullable=False, doc="Global monotonically increasing ID"
-    )
+    sequence_id = mapped_column(BigInteger, Sequence("message_seq_id"), unique=True, nullable=False)
     # Relationships
     agent: Mapped["Agent"] = relationship("Agent", back_populates="messages", lazy="selectin")
@@ -67,3 +66,20 @@ class Message(SqlalchemyBase, OrganizationMixin, AgentMixin):
         if self.text and not model.content:
             model.content = [PydanticTextContent(text=self.text)]
         return model
+# listener
+@event.listens_for(Message, "before_insert")
+def set_sequence_id_for_sqlite(mapper, connection, target):
+    # TODO: Kind of hacky, used to detect if we are using sqlite or not
+    if not settings.pg_uri:
+        session = Session.object_session(target)
+        if not hasattr(session, "_sequence_id_counter"):
+            # Initialize counter for this flush
+            max_seq = connection.scalar(text("SELECT MAX(sequence_id) FROM messages"))
+            session._sequence_id_counter = max_seq or 0
+        session._sequence_id_counter += 1
+        target.sequence_id = session._sequence_id_counter

letta/schemas/enums.py CHANGED Viewed

@@ -32,6 +32,7 @@ class JobStatus(str, Enum):
     completed = "completed"
     failed = "failed"
     pending = "pending"
+    cancelled = "cancelled"
 class AgentStepStatus(str, Enum):

letta/schemas/llm_config.py CHANGED Viewed

@@ -2,6 +2,10 @@ from typing import Literal, Optional
 from pydantic import BaseModel, ConfigDict, Field, model_validator
+from letta.log import get_logger
+logger = get_logger(__name__)
 class LLMConfig(BaseModel):
     """
@@ -88,14 +92,14 @@ class LLMConfig(BaseModel):
         return values
     @model_validator(mode="after")
-    def validate_reasoning_constraints(self) -> "LLMConfig":
+    def issue_warning_for_reasoning_constraints(self) -> "LLMConfig":
         if self.enable_reasoner:
             if self.max_reasoning_tokens is None:
-                raise ValueError("max_reasoning_tokens must be set when enable_reasoner is True")
+                logger.warning("max_reasoning_tokens must be set when enable_reasoner is True")
             if self.max_tokens is not None and self.max_reasoning_tokens >= self.max_tokens:
-                raise ValueError("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
+                logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
             if self.put_inner_thoughts_in_kwargs:
-                raise ValueError("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
+                logger.warning("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
         return self
     @classmethod

letta/schemas/message.py CHANGED Viewed

@@ -37,6 +37,7 @@ from letta.schemas.letta_message_content import (
     get_letta_message_content_union_str_json_schema,
 )
 from letta.system import unpack_message
+from letta.utils import parse_json
 def add_inner_thoughts_to_tool_call(
@@ -47,7 +48,7 @@ def add_inner_thoughts_to_tool_call(
     """Add inner thoughts (arg + value) to a tool call"""
     try:
         # load the args list
-        func_args = json.loads(tool_call.function.arguments)
+        func_args = parse_json(tool_call.function.arguments)
         # create new ordered dict with inner thoughts first
         ordered_args = OrderedDict({inner_thoughts_key: inner_thoughts})
         # update with remaining args
@@ -293,7 +294,7 @@ class Message(BaseMessage):
                     if use_assistant_message and tool_call.function.name == assistant_message_tool_name:
                         # We need to unpack the actual message contents from the function call
                         try:
-                            func_args = json.loads(tool_call.function.arguments)
+                            func_args = parse_json(tool_call.function.arguments)
                             message_string = func_args[assistant_message_tool_kwarg]
                         except KeyError:
                             raise ValueError(f"Function call {tool_call.function.name} missing {assistant_message_tool_kwarg} argument")
@@ -336,7 +337,7 @@ class Message(BaseMessage):
                 raise ValueError(f"Invalid tool return (no text object on message): {self.content}")
             try:
-                function_return = json.loads(text_content)
+                function_return = parse_json(text_content)
                 status = function_return["status"]
                 if status == "OK":
                     status_enum = "success"
@@ -760,7 +761,7 @@ class Message(BaseMessage):
                             inner_thoughts_key=INNER_THOUGHTS_KWARG,
                         ).model_dump()
                     else:
-                        tool_call_input = json.loads(tool_call.function.arguments)
+                        tool_call_input = parse_json(tool_call.function.arguments)
                     content.append(
                         {
@@ -846,7 +847,7 @@ class Message(BaseMessage):
                     function_args = tool_call.function.arguments
                     try:
                         # NOTE: Google AI wants actual JSON objects, not strings
-                        function_args = json.loads(function_args)
+                        function_args = parse_json(function_args)
                     except:
                         raise UserWarning(f"Failed to parse JSON function args: {function_args}")
                         function_args = {"args": function_args}
@@ -881,7 +882,7 @@ class Message(BaseMessage):
             # NOTE: Google AI API wants the function response as JSON only, no string
             try:
-                function_response = json.loads(text_content)
+                function_response = parse_json(text_content)
             except:
                 function_response = {"function_response": text_content}
@@ -970,7 +971,7 @@ class Message(BaseMessage):
                 ]
                 for tc in self.tool_calls:
                     function_name = tc.function["name"]
-                    function_args = json.loads(tc.function["arguments"])
+                    function_args = parse_json(tc.function["arguments"])
                     function_args_str = ",".join([f"{k}={v}" for k, v in function_args.items()])
                     function_call_text = f"{function_name}({function_args_str})"
                     cohere_message.append(

letta-nightly 0.6.49.dev20250408104230__py3-none-any.whl → 0.6.50.dev20250409043626__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.49.dev20250408104230py3-none-any.whl → 0.6.50.dev20250409043626py3-none-any.whl