PyPI - letta-nightly - Versions diffs - 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl - Mend

letta-nightly 0.11.4.dev20250825104222py3-none-any.whl → 0.11.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

letta/__init__.py +1 -1
letta/agent.py +9 -3
letta/agents/base_agent.py +2 -2
letta/agents/letta_agent.py +56 -45
letta/agents/voice_agent.py +2 -2
letta/data_sources/redis_client.py +146 -1
letta/errors.py +4 -0
letta/functions/function_sets/files.py +2 -2
letta/functions/mcp_client/types.py +30 -6
letta/functions/schema_generator.py +46 -1
letta/functions/schema_validator.py +17 -2
letta/functions/types.py +1 -1
letta/helpers/tool_execution_helper.py +0 -2
letta/llm_api/anthropic_client.py +27 -5
letta/llm_api/deepseek_client.py +97 -0
letta/llm_api/groq_client.py +79 -0
letta/llm_api/helpers.py +0 -1
letta/llm_api/llm_api_tools.py +2 -113
letta/llm_api/llm_client.py +21 -0
letta/llm_api/llm_client_base.py +11 -9
letta/llm_api/openai_client.py +3 -0
letta/llm_api/xai_client.py +85 -0
letta/prompts/prompt_generator.py +190 -0
letta/schemas/agent_file.py +17 -2
letta/schemas/file.py +24 -1
letta/schemas/job.py +2 -0
letta/schemas/letta_message.py +2 -0
letta/schemas/letta_request.py +22 -0
letta/schemas/message.py +10 -1
letta/schemas/providers/bedrock.py +1 -0
letta/server/rest_api/redis_stream_manager.py +300 -0
letta/server/rest_api/routers/v1/agents.py +129 -7
letta/server/rest_api/routers/v1/folders.py +15 -5
letta/server/rest_api/routers/v1/runs.py +101 -11
letta/server/rest_api/routers/v1/sources.py +21 -53
letta/server/rest_api/routers/v1/telemetry.py +14 -4
letta/server/rest_api/routers/v1/tools.py +2 -2
letta/server/rest_api/streaming_response.py +3 -24
letta/server/server.py +0 -1
letta/services/agent_manager.py +2 -2
letta/services/agent_serialization_manager.py +129 -32
letta/services/file_manager.py +111 -6
letta/services/file_processor/file_processor.py +5 -2
letta/services/files_agents_manager.py +60 -0
letta/services/helpers/agent_manager_helper.py +4 -205
letta/services/helpers/tool_parser_helper.py +6 -3
letta/services/mcp/base_client.py +7 -1
letta/services/mcp/sse_client.py +7 -2
letta/services/mcp/stdio_client.py +5 -0
letta/services/mcp/streamable_http_client.py +11 -2
letta/services/mcp_manager.py +31 -30
letta/services/source_manager.py +26 -1
letta/services/summarizer/summarizer.py +21 -10
letta/services/tool_executor/files_tool_executor.py +13 -9
letta/services/tool_executor/mcp_tool_executor.py +3 -0
letta/services/tool_executor/tool_execution_manager.py +13 -0
letta/services/tool_manager.py +43 -20
letta/settings.py +1 -0
letta/utils.py +37 -0
{letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/METADATA +2 -2
{letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/RECORD +64 -63
letta/functions/mcp_client/__init__.py +0 -0
letta/functions/mcp_client/base_client.py +0 -156
letta/functions/mcp_client/sse_client.py +0 -51
letta/functions/mcp_client/stdio_client.py +0 -109
{letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.4.dev20250825104222.dist-info → letta_nightly-0.11.5.dist-info}/entry_points.txt +0 -0

letta/prompts/prompt_generator.py ADDED Viewed

@@ -0,0 +1,190 @@
+from datetime import datetime
+from typing import List, Literal, Optional
+from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
+from letta.helpers import ToolRulesSolver
+from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
+from letta.otel.tracing import trace_method
+from letta.schemas.memory import Memory
+class PromptGenerator:
+    # TODO: This code is kind of wonky and deserves a rewrite
+    @trace_method
+    @staticmethod
+    def compile_memory_metadata_block(
+        memory_edit_timestamp: datetime,
+        timezone: str,
+        previous_message_count: int = 0,
+        archival_memory_size: Optional[int] = 0,
+    ) -> str:
+        """
+        Generate a memory metadata block for the agent's system prompt.
+        This creates a structured metadata section that informs the agent about
+        the current state of its memory systems, including timing information
+        and memory counts. This helps the agent understand what information
+        is available through its tools.
+        Args:
+            memory_edit_timestamp: When memory blocks were last modified
+            timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
+            previous_message_count: Number of messages in recall memory (conversation history)
+            archival_memory_size: Number of items in archival memory (long-term storage)
+        Returns:
+            A formatted string containing the memory metadata block with XML-style tags
+        Example Output:
+            <memory_metadata>
+            - The current time is: 2024-01-15 10:30 AM PST
+            - Memory blocks were last modified: 2024-01-15 09:00 AM PST
+            - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
+            - 156 total memories you created are stored in archival memory (use tools to access them)
+            </memory_metadata>
+        """
+        # Put the timestamp in the local timezone (mimicking get_local_time())
+        timestamp_str = format_datetime(memory_edit_timestamp, timezone)
+        # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
+        metadata_lines = [
+            "<memory_metadata>",
+            f"- The current time is: {get_local_time_fast(timezone)}",
+            f"- Memory blocks were last modified: {timestamp_str}",
+            f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
+        ]
+        # Only include archival memory line if there are archival memories
+        if archival_memory_size is not None and archival_memory_size > 0:
+            metadata_lines.append(
+                f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
+            )
+        metadata_lines.append("</memory_metadata>")
+        memory_metadata_block = "\n".join(metadata_lines)
+        return memory_metadata_block
+    @staticmethod
+    def safe_format(template: str, variables: dict) -> str:
+        """
+        Safely formats a template string, preserving empty {} and {unknown_vars}
+        while substituting known variables.
+        If we simply use {} in format_map, it'll be treated as a positional field
+        """
+        # First escape any empty {} by doubling them
+        escaped = template.replace("{}", "{{}}")
+        # Now use format_map with our custom mapping
+        return escaped.format_map(PreserveMapping(variables))
+    @trace_method
+    @staticmethod
+    def get_system_message_from_compiled_memory(
+        system_prompt: str,
+        memory_with_sources: str,
+        in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
+        timezone: str,
+        user_defined_variables: Optional[dict] = None,
+        append_icm_if_missing: bool = True,
+        template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+        previous_message_count: int = 0,
+        archival_memory_size: int = 0,
+    ) -> str:
+        """Prepare the final/full system message that will be fed into the LLM API
+        The base system message may be templated, in which case we need to render the variables.
+        The following are reserved variables:
+        - CORE_MEMORY: the in-context memory of the LLM
+        """
+        if user_defined_variables is not None:
+            # TODO eventually support the user defining their own variables to inject
+            raise NotImplementedError
+        else:
+            variables = {}
+        # Add the protected memory variable
+        if IN_CONTEXT_MEMORY_KEYWORD in variables:
+            raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
+        else:
+            # TODO should this all put into the memory.__repr__ function?
+            memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
+                memory_edit_timestamp=in_context_memory_last_edit,
+                previous_message_count=previous_message_count,
+                archival_memory_size=archival_memory_size,
+                timezone=timezone,
+            )
+            full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
+            # Add to the variables list to inject
+            variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
+        if template_format == "f-string":
+            memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
+            # Catch the special case where the system prompt is unformatted
+            if append_icm_if_missing:
+                if memory_variable_string not in system_prompt:
+                    # In this case, append it to the end to make sure memory is still injected
+                    # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                    system_prompt += "\n\n" + memory_variable_string
+            # render the variables using the built-in templater
+            try:
+                if user_defined_variables:
+                    formatted_prompt = PromptGenerator.safe_format(system_prompt, variables)
+                else:
+                    formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
+            except Exception as e:
+                raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
+        else:
+            # TODO support for mustache and jinja2
+            raise NotImplementedError(template_format)
+        return formatted_prompt
+    @trace_method
+    @staticmethod
+    async def compile_system_message_async(
+        system_prompt: str,
+        in_context_memory: Memory,
+        in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
+        timezone: str,
+        user_defined_variables: Optional[dict] = None,
+        append_icm_if_missing: bool = True,
+        template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+        previous_message_count: int = 0,
+        archival_memory_size: int = 0,
+        tool_rules_solver: Optional[ToolRulesSolver] = None,
+        sources: Optional[List] = None,
+        max_files_open: Optional[int] = None,
+    ) -> str:
+        tool_constraint_block = None
+        if tool_rules_solver is not None:
+            tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
+        if user_defined_variables is not None:
+            # TODO eventually support the user defining their own variables to inject
+            raise NotImplementedError
+        else:
+            pass
+        memory_with_sources = await in_context_memory.compile_in_thread_async(
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+        )
+        return PromptGenerator.get_system_message_from_compiled_memory(
+            system_prompt=system_prompt,
+            memory_with_sources=memory_with_sources,
+            in_context_memory_last_edit=in_context_memory_last_edit,
+            timezone=timezone,
+            user_defined_variables=user_defined_variables,
+            append_icm_if_missing=append_icm_if_missing,
+            template_format=template_format,
+            previous_message_count=previous_message_count,
+            archival_memory_size=archival_memory_size,
+        )

letta/schemas/agent_file.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from datetime import datetime
 from typing import Any, Dict, List, Optional
+from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall as OpenAIToolCall
 from pydantic import BaseModel, Field
+from letta.helpers.datetime_helpers import get_utc_time
 from letta.schemas.agent import AgentState, CreateAgent
 from letta.schemas.block import Block, CreateBlock
 from letta.schemas.enums import MessageRole
 from letta.schemas.file import FileAgent, FileAgentBase, FileMetadata, FileMetadataBase
 from letta.schemas.group import Group, GroupCreate
 from letta.schemas.mcp import MCPServer
-from letta.schemas.message import Message, MessageCreate
+from letta.schemas.message import Message, MessageCreate, ToolReturn
 from letta.schemas.source import Source, SourceCreate
 from letta.schemas.tool import Tool
 from letta.schemas.user import User
@@ -46,6 +48,15 @@ class MessageSchema(MessageCreate):
     role: MessageRole = Field(..., description="The role of the participant.")
     model: Optional[str] = Field(None, description="The model used to make the function call")
     agent_id: Optional[str] = Field(None, description="The unique identifier of the agent")
+    tool_calls: Optional[List[OpenAIToolCall]] = Field(
+        default=None, description="The list of tool calls requested. Only applicable for role assistant."
+    )
+    tool_call_id: Optional[str] = Field(default=None, description="The ID of the tool call. Only applicable for role tool.")
+    tool_returns: Optional[List[ToolReturn]] = Field(default=None, description="Tool execution return information for prior tool calls")
+    created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
+    # TODO: Should we also duplicate the steps here?
+    # TODO: What about tool_return?
     @classmethod
     def from_message(cls, message: Message) -> "MessageSchema":
@@ -64,6 +75,10 @@ class MessageSchema(MessageCreate):
             group_id=message.group_id,
             model=message.model,
             agent_id=message.agent_id,
+            tool_calls=message.tool_calls,
+            tool_call_id=message.tool_call_id,
+            tool_returns=message.tool_returns,
+            created_at=message.created_at,
         )
@@ -114,7 +129,7 @@ class AgentSchema(CreateAgent):
             memory_blocks=[],  # TODO: Convert from agent_state.memory if needed
             tools=[],
             tool_ids=[tool.id for tool in agent_state.tools] if agent_state.tools else [],
-            source_ids=[],  # [source.id for source in agent_state.sources] if agent_state.sources else [],
+            source_ids=[source.id for source in agent_state.sources] if agent_state.sources else [],
             block_ids=[block.id for block in agent_state.memory.blocks],
             tool_rules=agent_state.tool_rules,
             tags=agent_state.tags,

letta/schemas/file.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from datetime import datetime
 from enum import Enum
-from typing import Optional
+from typing import List, Optional
 from pydantic import Field
@@ -108,3 +108,26 @@ class FileAgent(FileAgentBase):
         default_factory=datetime.utcnow,
         description="Row last-update timestamp (UTC).",
     )
+class AgentFileAttachment(LettaBase):
+    """Response model for agent file attachments showing file status in agent context"""
+    id: str = Field(..., description="Unique identifier of the file-agent relationship")
+    file_id: str = Field(..., description="Unique identifier of the file")
+    file_name: str = Field(..., description="Name of the file")
+    folder_id: str = Field(..., description="Unique identifier of the folder/source")
+    folder_name: str = Field(..., description="Name of the folder/source")
+    is_open: bool = Field(..., description="Whether the file is currently open in the agent's context")
+    last_accessed_at: Optional[datetime] = Field(None, description="Timestamp of last access by the agent")
+    visible_content: Optional[str] = Field(None, description="Portion of the file visible to the agent if open")
+    start_line: Optional[int] = Field(None, description="Starting line number if file was opened with line range")
+    end_line: Optional[int] = Field(None, description="Ending line number if file was opened with line range")
+class PaginatedAgentFiles(LettaBase):
+    """Paginated response for agent files"""
+    files: List[AgentFileAttachment] = Field(..., description="List of file attachments for the agent")
+    next_cursor: Optional[str] = Field(None, description="Cursor for fetching the next page (file-agent relationship ID)")
+    has_more: bool = Field(..., description="Whether more results exist after this page")

letta/schemas/job.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import List, Optional
 from pydantic import BaseModel, ConfigDict, Field
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.helpers.datetime_helpers import get_utc_time
 from letta.schemas.enums import JobStatus, JobType
 from letta.schemas.letta_base import OrmMetadataBase
 from letta.schemas.letta_message import MessageType
@@ -12,6 +13,7 @@ from letta.schemas.letta_message import MessageType
 class JobBase(OrmMetadataBase):
     __id_prefix__ = "job"
     status: JobStatus = Field(default=JobStatus.created, description="The status of the job.")
+    created_at: datetime = Field(default_factory=get_utc_time, description="The unix timestamp of when the job was created.")
     completed_at: Optional[datetime] = Field(None, description="The unix timestamp of when the job was completed.")
     metadata: Optional[dict] = Field(None, validation_alias="metadata_", description="The metadata of the job.")
     job_type: JobType = Field(default=JobType.JOB, description="The type of the job.")

letta/schemas/letta_message.py CHANGED Viewed

@@ -52,6 +52,8 @@ class LettaMessage(BaseModel):
     sender_id: str | None = None
     step_id: str | None = None
     is_err: bool | None = None
+    seq_id: int | None = None
+    run_id: str | None = None
     @field_serializer("date")
     def serialize_datetime(self, dt: datetime, _info):

letta/schemas/letta_request.py CHANGED Viewed

@@ -46,6 +46,10 @@ class LettaStreamingRequest(LettaRequest):
         default=False,
         description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
     )
+    background: bool = Field(
+        default=False,
+        description="Whether to process the request in the background.",
+    )
 class LettaAsyncRequest(LettaRequest):
@@ -66,3 +70,21 @@ class CreateBatch(BaseModel):
         "'status' is the final batch status (e.g., 'completed', 'failed'), and "
         "'completed_at' is an ISO 8601 timestamp indicating when the batch job completed.",
     )
+class RetrieveStreamRequest(BaseModel):
+    starting_after: int = Field(
+        0, description="Sequence id to use as a cursor for pagination. Response will start streaming after this chunk sequence id"
+    )
+    include_pings: Optional[bool] = Field(
+        default=False,
+        description="Whether to include periodic keepalive ping messages in the stream to prevent connection timeouts.",
+    )
+    poll_interval: Optional[float] = Field(
+        default=0.1,
+        description="Seconds to wait between polls when no new data.",
+    )
+    batch_size: Optional[int] = Field(
+        default=100,
+        description="Number of entries to read per batch.",
+    )

letta/schemas/message.py CHANGED Viewed

@@ -414,6 +414,8 @@ class Message(BaseMessage):
         except json.JSONDecodeError:
             raise ValueError(f"Failed to decode function return: {text_content}")
+        # if self.tool_call_id is None:
+        #     import pdb;pdb.set_trace()
         assert self.tool_call_id is not None
         return ToolReturnMessage(
@@ -844,7 +846,7 @@ class Message(BaseMessage):
             }
             content = []
             # COT / reasoning / thinking
-            if self.content is not None and len(self.content) > 1:
+            if self.content is not None and len(self.content) >= 1:
                 for content_part in self.content:
                     if isinstance(content_part, ReasoningContent):
                         content.append(
@@ -861,6 +863,13 @@ class Message(BaseMessage):
                                 "data": content_part.data,
                             }
                         )
+                    if isinstance(content_part, TextContent):
+                        content.append(
+                            {
+                                "type": "text",
+                                "text": content_part.text,
+                            }
+                        )
             elif text_content is not None:
                 content.append(
                     {

letta/schemas/providers/bedrock.py CHANGED Viewed

@@ -18,6 +18,7 @@ logger = get_logger(__name__)
 class BedrockProvider(Provider):
     provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.")
     provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
+    access_key: str = Field(..., description="AWS secret access key for Bedrock.")
     region: str = Field(..., description="AWS region for Bedrock")
     async def bedrock_get_model_list_async(self) -> list[dict]:

letta/server/rest_api/redis_stream_manager.py ADDED Viewed

@@ -0,0 +1,300 @@
+"""Redis stream manager for reading and writing SSE chunks with batching and TTL."""
+import asyncio
+import json
+import time
+from collections import defaultdict
+from typing import AsyncIterator, Dict, List, Optional
+from letta.data_sources.redis_client import AsyncRedisClient
+from letta.log import get_logger
+logger = get_logger(__name__)
+class RedisSSEStreamWriter:
+    """
+    Efficiently writes SSE chunks to Redis streams with batching and TTL management.
+    Features:
+    - Batches writes using Redis pipelines for performance
+    - Automatically sets/refreshes TTL on streams
+    - Tracks sequential IDs for cursor-based recovery
+    - Handles flush on size or time thresholds
+    """
+    def __init__(
+        self,
+        redis_client: AsyncRedisClient,
+        flush_interval: float = 0.5,
+        flush_size: int = 50,
+        stream_ttl_seconds: int = 10800,  # 3 hours default
+        max_stream_length: int = 10000,  # Max entries per stream
+    ):
+        """
+        Initialize the Redis SSE stream writer.
+        Args:
+            redis_client: Redis client instance
+            flush_interval: Seconds between automatic flushes
+            flush_size: Number of chunks to buffer before flushing
+            stream_ttl_seconds: TTL for streams in seconds (default: 6 hours)
+            max_stream_length: Maximum entries per stream before trimming
+        """
+        self.redis = redis_client
+        self.flush_interval = flush_interval
+        self.flush_size = flush_size
+        self.stream_ttl = stream_ttl_seconds
+        self.max_stream_length = max_stream_length
+        # Buffer for batching: run_id -> list of chunks
+        self.buffer: Dict[str, List[Dict]] = defaultdict(list)
+        # Track sequence IDs per run
+        self.seq_counters: Dict[str, int] = defaultdict(lambda: 1)
+        # Track last flush time per run
+        self.last_flush: Dict[str, float] = defaultdict(float)
+        # Background flush task
+        self._flush_task = None
+        self._running = False
+    async def start(self):
+        """Start the background flush task."""
+        if not self._running:
+            self._running = True
+            self._flush_task = asyncio.create_task(self._periodic_flush())
+    async def stop(self):
+        """Stop the background flush task and flush remaining data."""
+        self._running = False
+        if self._flush_task:
+            self._flush_task.cancel()
+            try:
+                await self._flush_task
+            except asyncio.CancelledError:
+                pass
+        for run_id in list(self.buffer.keys()):
+            if self.buffer[run_id]:
+                await self._flush_run(run_id)
+    async def write_chunk(
+        self,
+        run_id: str,
+        data: str,
+        is_complete: bool = False,
+    ) -> int:
+        """
+        Write an SSE chunk to the buffer for a specific run.
+        Args:
+            run_id: The run ID to write to
+            data: SSE-formatted chunk data
+            is_complete: Whether this is the final chunk
+        Returns:
+            The sequence ID assigned to this chunk
+        """
+        seq_id = self.seq_counters[run_id]
+        self.seq_counters[run_id] += 1
+        chunk = {
+            "seq_id": seq_id,
+            "data": data,
+            "timestamp": int(time.time() * 1000),
+        }
+        if is_complete:
+            chunk["complete"] = "true"
+        self.buffer[run_id].append(chunk)
+        should_flush = (
+            len(self.buffer[run_id]) >= self.flush_size or is_complete or (time.time() - self.last_flush[run_id]) > self.flush_interval
+        )
+        if should_flush:
+            await self._flush_run(run_id)
+        return seq_id
+    async def _flush_run(self, run_id: str):
+        """Flush buffered chunks for a specific run to Redis."""
+        if not self.buffer[run_id]:
+            return
+        chunks = self.buffer[run_id]
+        self.buffer[run_id] = []
+        stream_key = f"sse:run:{run_id}"
+        try:
+            client = await self.redis.get_client()
+            async with client.pipeline(transaction=False) as pipe:
+                for chunk in chunks:
+                    pipe.xadd(stream_key, chunk, maxlen=self.max_stream_length, approximate=True)
+                pipe.expire(stream_key, self.stream_ttl)
+                await pipe.execute()
+            self.last_flush[run_id] = time.time()
+            logger.debug(
+                f"Flushed {len(chunks)} chunks to Redis stream {stream_key}, " f"seq_ids {chunks[0]['seq_id']}-{chunks[-1]['seq_id']}"
+            )
+            if chunks[-1].get("complete") == "true":
+                self._cleanup_run(run_id)
+        except Exception as e:
+            logger.error(f"Failed to flush chunks for run {run_id}: {e}")
+            # Put chunks back in buffer to retry
+            self.buffer[run_id] = chunks + self.buffer[run_id]
+            raise
+    async def _periodic_flush(self):
+        """Background task to periodically flush buffers."""
+        while self._running:
+            try:
+                await asyncio.sleep(self.flush_interval)
+                # Check each run for time-based flush
+                current_time = time.time()
+                runs_to_flush = [
+                    run_id
+                    for run_id, last_flush in self.last_flush.items()
+                    if (current_time - last_flush) > self.flush_interval and self.buffer[run_id]
+                ]
+                for run_id in runs_to_flush:
+                    await self._flush_run(run_id)
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error(f"Error in periodic flush: {e}")
+    def _cleanup_run(self, run_id: str):
+        """Clean up tracking data for a completed run."""
+        self.buffer.pop(run_id, None)
+        self.seq_counters.pop(run_id, None)
+        self.last_flush.pop(run_id, None)
+    async def mark_complete(self, run_id: str):
+        """Mark a stream as complete and flush."""
+        # Add a [DONE] marker
+        await self.write_chunk(run_id, "data: [DONE]\n\n", is_complete=True)
+async def create_background_stream_processor(
+    stream_generator,
+    redis_client: AsyncRedisClient,
+    run_id: str,
+    writer: Optional[RedisSSEStreamWriter] = None,
+) -> None:
+    """
+    Process a stream in the background and store chunks to Redis.
+    This function consumes the stream generator and writes all chunks
+    to Redis for later retrieval.
+    Args:
+        stream_generator: The async generator yielding SSE chunks
+        redis_client: Redis client instance
+        run_id: The run ID to store chunks under
+        writer: Optional pre-configured writer (creates new if not provided)
+    """
+    if writer is None:
+        writer = RedisSSEStreamWriter(redis_client)
+        await writer.start()
+        should_stop_writer = True
+    else:
+        should_stop_writer = False
+    try:
+        async for chunk in stream_generator:
+            if isinstance(chunk, tuple):
+                chunk = chunk[0]
+            is_done = isinstance(chunk, str) and ("data: [DONE]" in chunk or "event: error" in chunk)
+            await writer.write_chunk(run_id=run_id, data=chunk, is_complete=is_done)
+            if is_done:
+                break
+    except Exception as e:
+        logger.error(f"Error processing stream for run {run_id}: {e}")
+        # Write error chunk
+        error_chunk = {"error": {"message": str(e)}}
+        await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
+    finally:
+        if should_stop_writer:
+            await writer.stop()
+async def redis_sse_stream_generator(
+    redis_client: AsyncRedisClient,
+    run_id: str,
+    starting_after: Optional[int] = None,
+    poll_interval: float = 0.1,
+    batch_size: int = 100,
+) -> AsyncIterator[str]:
+    """
+    Generate SSE events from Redis stream chunks.
+    This generator reads chunks stored in Redis streams and yields them as SSE events.
+    It supports cursor-based recovery by allowing you to start from a specific seq_id.
+    Args:
+        redis_client: Redis client instance
+        run_id: The run ID to read chunks for
+        starting_after: Sequential ID (integer) to start reading from (default: None for beginning)
+        poll_interval: Seconds to wait between polls when no new data (default: 0.1)
+        batch_size: Number of entries to read per batch (default: 100)
+    Yields:
+        SSE-formatted chunks from the Redis stream
+    """
+    stream_key = f"sse:run:{run_id}"
+    last_redis_id = "-"
+    cursor_seq_id = starting_after or 0
+    logger.debug(f"Starting redis_sse_stream_generator for run_id={run_id}, stream_key={stream_key}")
+    while True:
+        entries = await redis_client.xrange(stream_key, start=last_redis_id, count=batch_size)
+        if entries:
+            yielded_any = False
+            for entry_id, fields in entries:
+                if entry_id == last_redis_id:
+                    continue
+                chunk_seq_id = int(fields.get("seq_id", 0))
+                if chunk_seq_id > cursor_seq_id:
+                    data = fields.get("data", "")
+                    if not data:
+                        logger.debug(f"No data found for chunk {chunk_seq_id} in run {run_id}")
+                        continue
+                    if '"run_id":null' in data:
+                        data = data.replace('"run_id":null', f'"run_id":"{run_id}"')
+                    if '"seq_id":null' in data:
+                        data = data.replace('"seq_id":null', f'"seq_id":{chunk_seq_id}')
+                    yield data
+                    yielded_any = True
+                    if fields.get("complete") == "true":
+                        return
+                last_redis_id = entry_id
+            if not yielded_any and len(entries) > 1:
+                continue
+        if not entries or (len(entries) == 1 and entries[0][0] == last_redis_id):
+            await asyncio.sleep(poll_interval)

letta-nightly 0.11.4.dev20250825104222__py3-none-any.whl → 0.11.5__py3-none-any.whl

letta-nightly 0.11.4.dev20250825104222py3-none-any.whl → 0.11.5py3-none-any.whl