PyPI - letta-nightly - Versions diffs - 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl - Mend

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show

letta/__init__.py +1 -1
letta/agent.py +47 -12
letta/agents/base_agent.py +7 -4
letta/agents/helpers.py +52 -0
letta/agents/letta_agent.py +105 -42
letta/agents/voice_agent.py +2 -2
letta/constants.py +13 -1
letta/errors.py +10 -3
letta/functions/function_sets/base.py +65 -0
letta/functions/interface.py +2 -2
letta/functions/mcp_client/base_client.py +18 -1
letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
letta/groups/helpers.py +113 -0
letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
letta/groups/sleeptime_multi_agent.py +259 -0
letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
letta/helpers/converters.py +109 -7
letta/helpers/message_helper.py +1 -0
letta/helpers/tool_rule_solver.py +40 -23
letta/interface.py +12 -5
letta/interfaces/anthropic_streaming_interface.py +329 -0
letta/llm_api/anthropic.py +12 -1
letta/llm_api/anthropic_client.py +65 -14
letta/llm_api/azure_openai.py +2 -2
letta/llm_api/google_ai_client.py +13 -2
letta/llm_api/google_constants.py +3 -0
letta/llm_api/google_vertex_client.py +2 -2
letta/llm_api/llm_api_tools.py +1 -1
letta/llm_api/llm_client.py +7 -0
letta/llm_api/llm_client_base.py +2 -7
letta/llm_api/openai.py +7 -1
letta/llm_api/openai_client.py +250 -0
letta/orm/__init__.py +4 -0
letta/orm/agent.py +6 -0
letta/orm/block.py +32 -2
letta/orm/block_history.py +46 -0
letta/orm/custom_columns.py +60 -0
letta/orm/enums.py +7 -0
letta/orm/group.py +6 -0
letta/orm/groups_blocks.py +13 -0
letta/orm/llm_batch_items.py +55 -0
letta/orm/llm_batch_job.py +48 -0
letta/orm/message.py +7 -1
letta/orm/organization.py +2 -0
letta/orm/sqlalchemy_base.py +18 -15
letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
letta/prompts/system/sleeptime.txt +26 -0
letta/schemas/agent.py +13 -1
letta/schemas/enums.py +17 -2
letta/schemas/group.py +14 -1
letta/schemas/letta_message.py +5 -3
letta/schemas/llm_batch_job.py +53 -0
letta/schemas/llm_config.py +14 -4
letta/schemas/message.py +44 -0
letta/schemas/tool.py +3 -0
letta/schemas/usage.py +1 -0
letta/server/db.py +2 -0
letta/server/rest_api/app.py +1 -1
letta/server/rest_api/chat_completions_interface.py +8 -3
letta/server/rest_api/interface.py +36 -7
letta/server/rest_api/routers/v1/agents.py +53 -39
letta/server/rest_api/routers/v1/runs.py +14 -2
letta/server/rest_api/utils.py +15 -4
letta/server/server.py +120 -71
letta/services/agent_manager.py +70 -6
letta/services/block_manager.py +190 -2
letta/services/group_manager.py +68 -0
letta/services/helpers/agent_manager_helper.py +6 -4
letta/services/llm_batch_manager.py +139 -0
letta/services/message_manager.py +17 -31
letta/services/tool_executor/tool_execution_sandbox.py +1 -3
letta/services/tool_executor/tool_executor.py +9 -20
letta/services/tool_manager.py +14 -3
letta/services/tool_sandbox/__init__.py +0 -0
letta/services/tool_sandbox/base.py +188 -0
letta/services/tool_sandbox/e2b_sandbox.py +116 -0
letta/services/tool_sandbox/local_sandbox.py +221 -0
letta/sleeptime_agent.py +61 -0
letta/streaming_interface.py +20 -10
letta/utils.py +4 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
letta/offline_memory_agent.py +0 -173
letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0

letta/helpers/tool_rule_solver.py CHANGED Viewed

@@ -38,29 +38,46 @@ class ToolRulesSolver(BaseModel):
     )
     tool_call_history: List[str] = Field(default_factory=list, description="History of tool calls, updated with each tool call.")
-    def __init__(self, tool_rules: List[BaseToolRule], **kwargs):
-        super().__init__(**kwargs)
-        # Separate the provided tool rules into init, standard, and terminal categories
-        for rule in tool_rules:
-            if rule.type == ToolRuleType.run_first:
-                assert isinstance(rule, InitToolRule)
-                self.init_tool_rules.append(rule)
-            elif rule.type == ToolRuleType.constrain_child_tools:
-                assert isinstance(rule, ChildToolRule)
-                self.child_based_tool_rules.append(rule)
-            elif rule.type == ToolRuleType.conditional:
-                assert isinstance(rule, ConditionalToolRule)
-                self.validate_conditional_tool(rule)
-                self.child_based_tool_rules.append(rule)
-            elif rule.type == ToolRuleType.exit_loop:
-                assert isinstance(rule, TerminalToolRule)
-                self.terminal_tool_rules.append(rule)
-            elif rule.type == ToolRuleType.continue_loop:
-                assert isinstance(rule, ContinueToolRule)
-                self.continue_tool_rules.append(rule)
-            elif rule.type == ToolRuleType.max_count_per_step:
-                assert isinstance(rule, MaxCountPerStepToolRule)
-                self.child_based_tool_rules.append(rule)
+    def __init__(
+        self,
+        tool_rules: Optional[List[BaseToolRule]] = None,
+        init_tool_rules: Optional[List[InitToolRule]] = None,
+        continue_tool_rules: Optional[List[ContinueToolRule]] = None,
+        child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
+        terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
+        tool_call_history: Optional[List[str]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            init_tool_rules=init_tool_rules or [],
+            continue_tool_rules=continue_tool_rules or [],
+            child_based_tool_rules=child_based_tool_rules or [],
+            terminal_tool_rules=terminal_tool_rules or [],
+            tool_call_history=tool_call_history or [],
+            **kwargs,
+        )
+        if tool_rules:
+            for rule in tool_rules:
+                if rule.type == ToolRuleType.run_first:
+                    assert isinstance(rule, InitToolRule)
+                    self.init_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.constrain_child_tools:
+                    assert isinstance(rule, ChildToolRule)
+                    self.child_based_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.conditional:
+                    assert isinstance(rule, ConditionalToolRule)
+                    self.validate_conditional_tool(rule)
+                    self.child_based_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.exit_loop:
+                    assert isinstance(rule, TerminalToolRule)
+                    self.terminal_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.continue_loop:
+                    assert isinstance(rule, ContinueToolRule)
+                    self.continue_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.max_count_per_step:
+                    assert isinstance(rule, MaxCountPerStepToolRule)
+                    self.child_based_tool_rules.append(rule)
     def register_tool_call(self, tool_name: str):
         """Update the internal state to track tool call history."""

letta/interface.py CHANGED Viewed

@@ -30,7 +30,7 @@ class AgentInterface(ABC):
         raise NotImplementedError
     @abstractmethod
-    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         """Letta generates some internal monologue"""
         raise NotImplementedError
@@ -40,7 +40,7 @@ class AgentInterface(ABC):
         raise NotImplementedError
     @abstractmethod
-    def function_message(self, msg: str, msg_obj: Optional[Message] = None):
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         """Letta calls a function"""
         raise NotImplementedError
@@ -79,7 +79,7 @@ class CLIInterface(AgentInterface):
             print(fstr.format(msg=msg))
     @staticmethod
-    def internal_monologue(msg: str, msg_obj: Optional[Message] = None):
+    def internal_monologue(msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         # ANSI escape code for italic is '\x1B[3m'
         fstr = f"\x1B[3m{Fore.LIGHTBLACK_EX}{INNER_THOUGHTS_CLI_SYMBOL} {{msg}}{Style.RESET_ALL}"
         if STRIP_UI:
@@ -108,7 +108,14 @@ class CLIInterface(AgentInterface):
         print(fstr.format(msg=msg))
     @staticmethod
-    def user_message(msg: str, msg_obj: Optional[Message] = None, raw: bool = False, dump: bool = False, debug: bool = DEBUG):
+    def user_message(
+        msg: str,
+        msg_obj: Optional[Message] = None,
+        raw: bool = False,
+        dump: bool = False,
+        debug: bool = DEBUG,
+        chunk_index: Optional[int] = None,
+    ):
         def print_user_message(icon, msg, printf=print):
             if STRIP_UI:
                 printf(f"{icon} {msg}")
@@ -154,7 +161,7 @@ class CLIInterface(AgentInterface):
             printd_user_message("🧑", msg_json)
     @staticmethod
-    def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG):
+    def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG, chunk_index: Optional[int] = None):
         def print_function_message(icon, msg, color=Fore.RED, printf=print):
             if STRIP_UI:
                 printf(f"⚡{icon} [function] {msg}")

letta/interfaces/anthropic_streaming_interface.py ADDED Viewed

@@ -0,0 +1,329 @@
+from datetime import datetime, timezone
+from enum import Enum
+from typing import AsyncGenerator, List, Union
+from anthropic import AsyncStream
+from anthropic.types.beta import (
+    BetaInputJSONDelta,
+    BetaRawContentBlockDeltaEvent,
+    BetaRawContentBlockStartEvent,
+    BetaRawContentBlockStopEvent,
+    BetaRawMessageDeltaEvent,
+    BetaRawMessageStartEvent,
+    BetaRawMessageStopEvent,
+    BetaRawMessageStreamEvent,
+    BetaRedactedThinkingBlock,
+    BetaSignatureDelta,
+    BetaTextBlock,
+    BetaTextDelta,
+    BetaThinkingBlock,
+    BetaThinkingDelta,
+    BetaToolUseBlock,
+)
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+from letta.log import get_logger
+from letta.schemas.letta_message import (
+    AssistantMessage,
+    HiddenReasoningMessage,
+    LettaMessage,
+    ReasoningMessage,
+    ToolCallDelta,
+    ToolCallMessage,
+)
+from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
+from letta.schemas.message import Message
+from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
+from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+logger = get_logger(__name__)
+# TODO: These modes aren't used right now - but can be useful we do multiple sequential tool calling within one Claude message
+class EventMode(Enum):
+    TEXT = "TEXT"
+    TOOL_USE = "TOOL_USE"
+    THINKING = "THINKING"
+    REDACTED_THINKING = "REDACTED_THINKING"
+class AnthropicStreamingInterface:
+    """
+    Encapsulates the logic for streaming responses from Anthropic.
+    This class handles parsing of partial tokens, pre-execution messages,
+    and detection of tool call events.
+    """
+    def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
+        self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
+        self.use_assistant_message = use_assistant_message
+        # Premake IDs for database writes
+        self.letta_assistant_message_id = Message.generate_id()
+        self.letta_tool_message_id = Message.generate_id()
+        self.anthropic_mode = None
+        self.message_id = None
+        self.accumulated_inner_thoughts = []
+        self.tool_call_id = None
+        self.tool_call_name = None
+        self.accumulated_tool_call_args = []
+        self.previous_parse = {}
+        # usage trackers
+        self.input_tokens = 0
+        self.output_tokens = 0
+        # reasoning object trackers
+        self.reasoning_messages = []
+        # Buffer to hold tool call messages until inner thoughts are complete
+        self.tool_call_buffer = []
+        self.inner_thoughts_complete = False
+        self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
+    def get_tool_call_object(self) -> ToolCall:
+        """Useful for agent loop"""
+        return ToolCall(
+            id=self.tool_call_id, function=FunctionCall(arguments="".join(self.accumulated_tool_call_args), name=self.tool_call_name)
+        )
+    def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
+        """
+        Check if inner thoughts are complete in the current tool call arguments
+        by looking for a closing quote after the inner_thoughts field
+        """
+        if not self.put_inner_thoughts_in_kwarg:
+            # None of the things should have inner thoughts in kwargs
+            return True
+        else:
+            parsed = self.optimistic_json_parser.parse(combined_args)
+            # TODO: This will break on tools with 0 input
+            return len(parsed.keys()) > 1 and INNER_THOUGHTS_KWARG in parsed.keys()
+    async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
+        async with stream:
+            async for event in stream:
+                # TODO: Support BetaThinkingBlock, BetaRedactedThinkingBlock
+                if isinstance(event, BetaRawContentBlockStartEvent):
+                    content = event.content_block
+                    if isinstance(content, BetaTextBlock):
+                        self.anthropic_mode = EventMode.TEXT
+                        # TODO: Can capture citations, etc.
+                    elif isinstance(content, BetaToolUseBlock):
+                        self.anthropic_mode = EventMode.TOOL_USE
+                        self.tool_call_id = content.id
+                        self.tool_call_name = content.name
+                        self.inner_thoughts_complete = False
+                        if not self.use_assistant_message:
+                            # Buffer the initial tool call message instead of yielding immediately
+                            tool_call_msg = ToolCallMessage(
+                                id=self.letta_tool_message_id,
+                                tool_call=ToolCallDelta(name=self.tool_call_name, tool_call_id=self.tool_call_id),
+                                date=datetime.now(timezone.utc).isoformat(),
+                            )
+                            self.tool_call_buffer.append(tool_call_msg)
+                    elif isinstance(content, BetaThinkingBlock):
+                        self.anthropic_mode = EventMode.THINKING
+                        # TODO: Can capture signature, etc.
+                    elif isinstance(content, BetaRedactedThinkingBlock):
+                        self.anthropic_mode = EventMode.REDACTED_THINKING
+                        hidden_reasoning_message = HiddenReasoningMessage(
+                            id=self.letta_assistant_message_id,
+                            state="redacted",
+                            hidden_reasoning=content.data,
+                            date=datetime.now(timezone.utc).isoformat(),
+                        )
+                        self.reasoning_messages.append(hidden_reasoning_message)
+                        yield hidden_reasoning_message
+                elif isinstance(event, BetaRawContentBlockDeltaEvent):
+                    delta = event.delta
+                    if isinstance(delta, BetaTextDelta):
+                        # Safety check
+                        if not self.anthropic_mode == EventMode.TEXT:
+                            raise RuntimeError(
+                                f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}"
+                            )
+                        # TODO: Strip out </thinking> more robustly, this is pretty hacky lol
+                        delta.text = delta.text.replace("</thinking>", "")
+                        self.accumulated_inner_thoughts.append(delta.text)
+                        reasoning_message = ReasoningMessage(
+                            id=self.letta_assistant_message_id,
+                            reasoning=self.accumulated_inner_thoughts[-1],
+                            date=datetime.now(timezone.utc).isoformat(),
+                        )
+                        self.reasoning_messages.append(reasoning_message)
+                        yield reasoning_message
+                    elif isinstance(delta, BetaInputJSONDelta):
+                        if not self.anthropic_mode == EventMode.TOOL_USE:
+                            raise RuntimeError(
+                                f"Streaming integrity failed - received BetaInputJSONDelta object while not in TOOL_USE EventMode: {delta}"
+                            )
+                        self.accumulated_tool_call_args.append(delta.partial_json)
+                        combined_args = "".join(self.accumulated_tool_call_args)
+                        current_parsed = self.optimistic_json_parser.parse(combined_args)
+                        # Start detecting a difference in inner thoughts
+                        previous_inner_thoughts = self.previous_parse.get(INNER_THOUGHTS_KWARG, "")
+                        current_inner_thoughts = current_parsed.get(INNER_THOUGHTS_KWARG, "")
+                        inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
+                        if inner_thoughts_diff:
+                            reasoning_message = ReasoningMessage(
+                                id=self.letta_assistant_message_id,
+                                reasoning=inner_thoughts_diff,
+                                date=datetime.now(timezone.utc).isoformat(),
+                            )
+                            self.reasoning_messages.append(reasoning_message)
+                            yield reasoning_message
+                        # Check if inner thoughts are complete - if so, flush the buffer
+                        if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(combined_args):
+                            self.inner_thoughts_complete = True
+                            # Flush all buffered tool call messages
+                            for buffered_msg in self.tool_call_buffer:
+                                yield buffered_msg
+                            self.tool_call_buffer = []
+                        # Start detecting special case of "send_message"
+                        if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
+                            previous_send_message = self.previous_parse.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
+                            current_send_message = current_parsed.get(DEFAULT_MESSAGE_TOOL_KWARG, "")
+                            send_message_diff = current_send_message[len(previous_send_message) :]
+                            # Only stream out if it's not an empty string
+                            if send_message_diff:
+                                yield AssistantMessage(
+                                    id=self.letta_assistant_message_id,
+                                    content=[TextContent(text=send_message_diff)],
+                                    date=datetime.now(timezone.utc).isoformat(),
+                                )
+                        else:
+                            # Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
+                            tool_call_msg = ToolCallMessage(
+                                id=self.letta_tool_message_id,
+                                tool_call=ToolCallDelta(arguments=delta.partial_json),
+                                date=datetime.now(timezone.utc).isoformat(),
+                            )
+                            if self.inner_thoughts_complete:
+                                yield tool_call_msg
+                            else:
+                                self.tool_call_buffer.append(tool_call_msg)
+                        # Set previous parse
+                        self.previous_parse = current_parsed
+                    elif isinstance(delta, BetaThinkingDelta):
+                        # Safety check
+                        if not self.anthropic_mode == EventMode.THINKING:
+                            raise RuntimeError(
+                                f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
+                            )
+                        reasoning_message = ReasoningMessage(
+                            id=self.letta_assistant_message_id,
+                            source="reasoner_model",
+                            reasoning=delta.thinking,
+                            date=datetime.now(timezone.utc).isoformat(),
+                        )
+                        self.reasoning_messages.append(reasoning_message)
+                        yield reasoning_message
+                    elif isinstance(delta, BetaSignatureDelta):
+                        # Safety check
+                        if not self.anthropic_mode == EventMode.THINKING:
+                            raise RuntimeError(
+                                f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
+                            )
+                        reasoning_message = ReasoningMessage(
+                            id=self.letta_assistant_message_id,
+                            source="reasoner_model",
+                            reasoning="",
+                            date=datetime.now(timezone.utc).isoformat(),
+                            signature=delta.signature,
+                        )
+                        self.reasoning_messages.append(reasoning_message)
+                        yield reasoning_message
+                elif isinstance(event, BetaRawMessageStartEvent):
+                    self.message_id = event.message.id
+                    self.input_tokens += event.message.usage.input_tokens
+                    self.output_tokens += event.message.usage.output_tokens
+                elif isinstance(event, BetaRawMessageDeltaEvent):
+                    self.output_tokens += event.usage.output_tokens
+                elif isinstance(event, BetaRawMessageStopEvent):
+                    # Don't do anything here! We don't want to stop the stream.
+                    pass
+                elif isinstance(event, BetaRawContentBlockStopEvent):
+                    # If we're exiting a tool use block and there are still buffered messages,
+                    # we should flush them now
+                    if self.anthropic_mode == EventMode.TOOL_USE and self.tool_call_buffer:
+                        for buffered_msg in self.tool_call_buffer:
+                            yield buffered_msg
+                        self.tool_call_buffer = []
+                    self.anthropic_mode = None
+    def get_reasoning_content(self) -> List[Union[TextContent, ReasoningContent, RedactedReasoningContent]]:
+        def _process_group(
+            group: List[Union[ReasoningMessage, HiddenReasoningMessage]], group_type: str
+        ) -> Union[TextContent, ReasoningContent, RedactedReasoningContent]:
+            if group_type == "reasoning":
+                reasoning_text = "".join(chunk.reasoning for chunk in group)
+                is_native = any(chunk.source == "reasoner_model" for chunk in group)
+                signature = next((chunk.signature for chunk in group if chunk.signature is not None), None)
+                if is_native:
+                    return ReasoningContent(is_native=is_native, reasoning=reasoning_text, signature=signature)
+                else:
+                    return TextContent(text=reasoning_text)
+            elif group_type == "redacted":
+                redacted_text = "".join(chunk.hidden_reasoning for chunk in group if chunk.hidden_reasoning is not None)
+                return RedactedReasoningContent(data=redacted_text)
+            else:
+                raise ValueError("Unexpected group type")
+        merged = []
+        current_group = []
+        current_group_type = None  # "reasoning" or "redacted"
+        for msg in self.reasoning_messages:
+            # Determine the type of the current message
+            if isinstance(msg, HiddenReasoningMessage):
+                msg_type = "redacted"
+            elif isinstance(msg, ReasoningMessage):
+                msg_type = "reasoning"
+            else:
+                raise ValueError("Unexpected message type")
+            # Initialize group type if not set
+            if current_group_type is None:
+                current_group_type = msg_type
+            # If the type changes, process the current group
+            if msg_type != current_group_type:
+                merged.append(_process_group(current_group, current_group_type))
+                current_group = []
+                current_group_type = msg_type
+            current_group.append(msg)
+        # Process the final group, if any.
+        if current_group:
+            merged.append(_process_group(current_group, current_group_type))
+        # Strip out XML from any text content fields
+        for content in merged:
+            if isinstance(content, TextContent) and content.text.endswith("</thinking>"):
+                cutoff = len(content.text) - len("</thinking>")
+                content.text = content.text[:cutoff]
+        return merged

letta/llm_api/anthropic.py CHANGED Viewed

@@ -930,6 +930,8 @@ def anthropic_chat_completions_process_stream(
         stream_interface.stream_start()
     completion_tokens = 0
+    prev_message_type = None
+    message_idx = 0
     try:
         for chunk_idx, chat_completion_chunk in enumerate(
             anthropic_chat_completions_request_stream(
@@ -945,7 +947,7 @@ def anthropic_chat_completions_process_stream(
             if stream_interface:
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
-                    stream_interface.process_chunk(
+                    message_type = stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
@@ -953,7 +955,11 @@ def anthropic_chat_completions_process_stream(
                         # TODO handle emitting redacted reasoning content (e.g. as concat?)
                         expect_reasoning_content=extended_thinking,
                         name=name,
+                        message_index=message_idx,
                     )
+                    if message_type != prev_message_type and message_type is not None:
+                        message_idx += 1
+                    prev_message_type = message_type
                 elif isinstance(stream_interface, AgentRefreshStreamingInterface):
                     stream_interface.process_refresh(chat_completion_response)
                 else:
@@ -1107,4 +1113,9 @@ def anthropic_chat_completions_process_stream(
     log_event(name="llm_response_received", attributes=chat_completion_response.model_dump())
+    for choice in chat_completion_response.choices:
+        if choice.message.content is not None:
+            choice.message.content = choice.message.content.replace(f"<{inner_thoughts_xml_tag}>", "")
+            choice.message.content = choice.message.content.replace(f"</{inner_thoughts_xml_tag}>", "")
     return chat_completion_response

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import json
 import re
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 import anthropic
+from anthropic import AsyncStream
 from anthropic.types import Message as AnthropicMessage
+from anthropic.types.beta import BetaRawMessageStreamEvent
+from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
+from anthropic.types.beta.messages import BetaMessageBatch
+from anthropic.types.beta.messages.batch_create_params import Request
 from letta.errors import (
     ContextWindowExceededError,
@@ -28,6 +33,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
 from letta.services.provider_manager import ProviderManager
+from letta.tracing import trace_method
 DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
@@ -46,19 +52,49 @@ class AnthropicClient(LLMClientBase):
         response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
         return response.model_dump()
+    @trace_method
+    async def stream_async(self, request_data: dict) -> AsyncStream[BetaRawMessageStreamEvent]:
+        client = self._get_anthropic_client(async_client=True)
+        request_data["stream"] = True
+        return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
+    @trace_method
+    async def batch_async(self, requests: Dict[str, dict]) -> BetaMessageBatch:
+        """
+        Send a batch of requests to the Anthropic API asynchronously.
+        Args:
+            requests (Dict[str, dict]): A mapping from custom_id to request parameter dicts.
+        Returns:
+            List[dict]: A list of response dictionaries corresponding to each request.
+        """
+        client = self._get_anthropic_client(async_client=True)
+        anthropic_requests = [
+            Request(custom_id=custom_id, params=MessageCreateParamsNonStreaming(**params)) for custom_id, params in requests.items()
+        ]
+        batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
+        return batch_response
+    @trace_method
     def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
         override_key = ProviderManager().get_anthropic_override_key()
         if async_client:
             return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
         return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
+    @trace_method
     def build_request_data(
         self,
         messages: List[PydanticMessage],
         tools: List[dict],
-        tool_call: Optional[str],
         force_tool_call: Optional[str] = None,
     ) -> dict:
+        # TODO: This needs to get cleaned up. The logic here is pretty confusing.
+        # TODO: I really want to get rid of prefixing, it's a recipe for disaster code maintenance wise
         prefix_fill = True
         if not self.use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
@@ -74,11 +110,6 @@ class AnthropicClient(LLMClientBase):
         # Extended Thinking
         if self.llm_config.enable_reasoner:
-            assert (
-                self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
-            ), "max tokens must be greater than thinking budget"
-            assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
             data["thinking"] = {
                 "type": "enabled",
                 "budget_tokens": self.llm_config.max_reasoning_tokens,
@@ -90,15 +121,35 @@ class AnthropicClient(LLMClientBase):
             prefix_fill = False
         # Tools
-        tools_for_request = (
-            [Tool(function=f) for f in tools if f["name"] == force_tool_call]
-            if force_tool_call is not None
-            else [Tool(function=f) for f in tools]
-        )
-        if force_tool_call is not None:
-            self.llm_config.put_inner_thoughts_in_kwargs = True  # why do we do this ?
+        # For an overview on tool choice:
+        # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
+        if not tools:
+            # Special case for summarization path
+            tools_for_request = None
+            tool_choice = None
+        elif force_tool_call is not None:
+            tool_choice = {"type": "tool", "name": force_tool_call}
+            tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
+            # need to have this setting to be able to put inner thoughts in kwargs
+            if not self.llm_config.put_inner_thoughts_in_kwargs:
+                logger.warning(
+                    f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
+                )
+                self.llm_config.put_inner_thoughts_in_kwargs = True
+        else:
+            if self.llm_config.put_inner_thoughts_in_kwargs:
+                # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
+                tool_choice = {"type": "any", "disable_parallel_tool_use": True}
+            else:
+                tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
+            tools_for_request = [Tool(function=f) for f in tools] if tools is not None else None
+        # Add tool choice
+        data["tool_choice"] = tool_choice
         # Add inner thoughts kwarg
+        # TODO: Can probably make this more efficient
         if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
             tools_with_inner_thoughts = add_inner_thoughts_to_functions(
                 functions=[t.function.model_dump() for t in tools_for_request],

letta/llm_api/azure_openai.py CHANGED Viewed

@@ -36,8 +36,8 @@ def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_versio
     try:
         models_list = client.models.list()
-    except requests.RequestException as e:
-        raise RuntimeError(f"Failed to retrieve model list: {e}")
+    except Exception:
+        return []
     all_available_models = [model.to_dict() for model in models_list.data]

letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl