PyPI - letta-nightly - Versions diffs - 0.7.20.dev20250521104258__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl - Mend

letta-nightly 0.7.20.dev20250521104258py3-none-any.whl → 0.7.21.dev20250521233415py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

letta/__init__.py +1 -1
letta/agent.py +290 -3
letta/agents/base_agent.py +0 -55
letta/agents/helpers.py +5 -0
letta/agents/letta_agent.py +314 -64
letta/agents/letta_agent_batch.py +102 -55
letta/agents/voice_agent.py +5 -5
letta/client/client.py +9 -18
letta/constants.py +55 -1
letta/functions/function_sets/builtin.py +27 -0
letta/groups/sleeptime_multi_agent_v2.py +1 -1
letta/interfaces/anthropic_streaming_interface.py +10 -1
letta/interfaces/openai_streaming_interface.py +9 -2
letta/llm_api/anthropic.py +21 -2
letta/llm_api/anthropic_client.py +33 -6
letta/llm_api/google_ai_client.py +136 -423
letta/llm_api/google_vertex_client.py +173 -22
letta/llm_api/llm_api_tools.py +27 -0
letta/llm_api/llm_client.py +1 -1
letta/llm_api/llm_client_base.py +32 -21
letta/llm_api/openai.py +57 -0
letta/llm_api/openai_client.py +7 -11
letta/memory.py +0 -1
letta/orm/__init__.py +1 -0
letta/orm/enums.py +1 -0
letta/orm/provider_trace.py +26 -0
letta/orm/step.py +1 -0
letta/schemas/provider_trace.py +43 -0
letta/schemas/providers.py +210 -65
letta/schemas/step.py +1 -0
letta/schemas/tool.py +4 -0
letta/server/db.py +37 -19
letta/server/rest_api/routers/v1/__init__.py +2 -0
letta/server/rest_api/routers/v1/agents.py +57 -34
letta/server/rest_api/routers/v1/blocks.py +3 -3
letta/server/rest_api/routers/v1/identities.py +24 -26
letta/server/rest_api/routers/v1/jobs.py +3 -3
letta/server/rest_api/routers/v1/llms.py +13 -8
letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
letta/server/rest_api/routers/v1/tags.py +3 -3
letta/server/rest_api/routers/v1/telemetry.py +18 -0
letta/server/rest_api/routers/v1/tools.py +6 -6
letta/server/rest_api/streaming_response.py +105 -0
letta/server/rest_api/utils.py +4 -0
letta/server/server.py +140 -1
letta/services/agent_manager.py +251 -18
letta/services/block_manager.py +52 -37
letta/services/helpers/noop_helper.py +10 -0
letta/services/identity_manager.py +43 -38
letta/services/job_manager.py +29 -0
letta/services/message_manager.py +111 -0
letta/services/sandbox_config_manager.py +36 -0
letta/services/step_manager.py +146 -0
letta/services/telemetry_manager.py +58 -0
letta/services/tool_executor/tool_execution_manager.py +49 -5
letta/services/tool_executor/tool_execution_sandbox.py +47 -0
letta/services/tool_executor/tool_executor.py +236 -7
letta/services/tool_manager.py +160 -1
letta/services/tool_sandbox/e2b_sandbox.py +65 -3
letta/settings.py +10 -2
letta/tracing.py +5 -5
{letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
{letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +66 -59
{letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0

letta/agents/letta_agent_batch.py CHANGED Viewed

@@ -145,7 +145,7 @@ class LettaAgentBatch(BaseAgent):
         agent_mapping = {
             agent_state.id: agent_state
             for agent_state in await self.agent_manager.get_agents_by_ids_async(
-                agent_ids=[request.agent_id for request in batch_requests], actor=self.actor
+                agent_ids=[request.agent_id for request in batch_requests], include_relationships=["tools", "memory"], actor=self.actor
             )
         }
@@ -267,64 +267,121 @@ class LettaAgentBatch(BaseAgent):
     @trace_method
     async def _collect_resume_context(self, llm_batch_id: str) -> _ResumeContext:
-        # NOTE: We only continue for items with successful results
-        batch_items = await self.batch_manager.list_llm_batch_items_async(llm_batch_id=llm_batch_id, request_status=JobStatus.completed)
-        agent_ids = []
-        provider_results = {}
-        request_status_updates: List[RequestStatusUpdateInfo] = []
+        """
+        Collect context for resuming operations from completed batch items.
-        for item in batch_items:
-            aid = item.agent_id
-            agent_ids.append(aid)
-            provider_results[aid] = item.batch_request_result.result
+        Args:
+            llm_batch_id: The ID of the batch to collect context for
-        agent_states = await self.agent_manager.get_agents_by_ids_async(agent_ids, actor=self.actor)
-        agent_state_map = {agent.id: agent for agent in agent_states}
+        Returns:
+            _ResumeContext object containing all necessary data for resumption
+        """
+        # Fetch only completed batch items
+        batch_items = await self.batch_manager.list_llm_batch_items_async(llm_batch_id=llm_batch_id, request_status=JobStatus.completed)
-        name_map, args_map, cont_map = {}, {}, {}
-        for aid in agent_ids:
-            # status bookkeeping
-            pr = provider_results[aid]
-            status = (
-                JobStatus.completed
-                if isinstance(pr, BetaMessageBatchSucceededResult)
-                else (
-                    JobStatus.failed
-                    if isinstance(pr, BetaMessageBatchErroredResult)
-                    else JobStatus.cancelled if isinstance(pr, BetaMessageBatchCanceledResult) else JobStatus.expired
-                )
+        # Exit early if no items to process
+        if not batch_items:
+            return _ResumeContext(
+                batch_items=[],
+                agent_ids=[],
+                agent_state_map={},
+                provider_results={},
+                tool_call_name_map={},
+                tool_call_args_map={},
+                should_continue_map={},
+                request_status_updates=[],
             )
-            request_status_updates.append(RequestStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, request_status=status))
-            # translate provider‑specific response → OpenAI‑style tool call (unchanged)
-            llm_client = LLMClient.create(
-                provider_type=item.llm_config.model_endpoint_type,
-                put_inner_thoughts_first=True,
-                actor=self.actor,
-            )
-            tool_call = (
-                llm_client.convert_response_to_chat_completion(
-                    response_data=pr.message.model_dump(), input_messages=[], llm_config=item.llm_config
-                )
-                .choices[0]
-                .message.tool_calls[0]
-            )
+        # Extract agent IDs and organize items by agent ID
+        agent_ids = [item.agent_id for item in batch_items]
+        batch_item_map = {item.agent_id: item for item in batch_items}
-            name, args, cont = self._extract_tool_call_and_decide_continue(tool_call, item.step_state)
-            name_map[aid], args_map[aid], cont_map[aid] = name, args, cont
+        # Collect provider results
+        provider_results = {item.agent_id: item.batch_request_result.result for item in batch_items}
+        # Fetch agent states in a single call
+        agent_states = await self.agent_manager.get_agents_by_ids_async(
+            agent_ids=agent_ids, include_relationships=["tools", "memory"], actor=self.actor
+        )
+        agent_state_map = {agent.id: agent for agent in agent_states}
+        # Process each agent's results
+        tool_call_results = self._process_agent_results(
+            agent_ids=agent_ids, batch_item_map=batch_item_map, provider_results=provider_results, llm_batch_id=llm_batch_id
+        )
         return _ResumeContext(
             batch_items=batch_items,
             agent_ids=agent_ids,
             agent_state_map=agent_state_map,
             provider_results=provider_results,
-            tool_call_name_map=name_map,
-            tool_call_args_map=args_map,
-            should_continue_map=cont_map,
-            request_status_updates=request_status_updates,
+            tool_call_name_map=tool_call_results.name_map,
+            tool_call_args_map=tool_call_results.args_map,
+            should_continue_map=tool_call_results.cont_map,
+            request_status_updates=tool_call_results.status_updates,
+        )
+    def _process_agent_results(self, agent_ids, batch_item_map, provider_results, llm_batch_id):
+        """
+        Process the results for each agent, extracting tool calls and determining continuation status.
+        Returns:
+            A namedtuple containing name_map, args_map, cont_map, and status_updates
+        """
+        from collections import namedtuple
+        ToolCallResults = namedtuple("ToolCallResults", ["name_map", "args_map", "cont_map", "status_updates"])
+        name_map, args_map, cont_map = {}, {}, {}
+        request_status_updates = []
+        for aid in agent_ids:
+            item = batch_item_map[aid]
+            result = provider_results[aid]
+            # Determine job status based on result type
+            status = self._determine_job_status(result)
+            request_status_updates.append(RequestStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, request_status=status))
+            # Process tool calls
+            name, args, cont = self._extract_tool_call_from_result(item, result)
+            name_map[aid], args_map[aid], cont_map[aid] = name, args, cont
+        return ToolCallResults(name_map, args_map, cont_map, request_status_updates)
+    def _determine_job_status(self, result):
+        """Determine job status based on result type"""
+        if isinstance(result, BetaMessageBatchSucceededResult):
+            return JobStatus.completed
+        elif isinstance(result, BetaMessageBatchErroredResult):
+            return JobStatus.failed
+        elif isinstance(result, BetaMessageBatchCanceledResult):
+            return JobStatus.cancelled
+        else:
+            return JobStatus.expired
+    def _extract_tool_call_from_result(self, item, result):
+        """Extract tool call information from a result"""
+        llm_client = LLMClient.create(
+            provider_type=item.llm_config.model_endpoint_type,
+            put_inner_thoughts_first=True,
+            actor=self.actor,
+        )
+        # If result isn't a successful type, we can't extract a tool call
+        if not isinstance(result, BetaMessageBatchSucceededResult):
+            return None, None, False
+        tool_call = (
+            llm_client.convert_response_to_chat_completion(
+                response_data=result.message.model_dump(), input_messages=[], llm_config=item.llm_config
+            )
+            .choices[0]
+            .message.tool_calls[0]
         )
+        return self._extract_tool_call_and_decide_continue(tool_call, item.step_state)
     def _update_request_statuses(self, updates: List[RequestStatusUpdateInfo]) -> None:
         if updates:
             self.batch_manager.bulk_update_llm_batch_items_request_status_by_agent(updates=updates)
@@ -556,16 +613,6 @@ class LettaAgentBatch(BaseAgent):
         in_context_messages = await self._rebuild_memory_async(current_in_context_messages + new_in_context_messages, agent_state)
         return in_context_messages
-    # TODO: Make this a bullk function
-    def _rebuild_memory(
-        self,
-        in_context_messages: List[Message],
-        agent_state: AgentState,
-        num_messages: int | None = None,
-        num_archival_memories: int | None = None,
-    ) -> List[Message]:
-        return super()._rebuild_memory(in_context_messages, agent_state)
     # Not used in batch.
     async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
         raise NotImplementedError

letta/agents/voice_agent.py CHANGED Viewed

@@ -154,7 +154,7 @@ class VoiceAgent(BaseAgent):
         # TODO: Define max steps here
         for _ in range(max_steps):
             # Rebuild memory each loop
-            in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
+            in_context_messages = await self._rebuild_memory_async(in_context_messages, agent_state)
             openai_messages = convert_in_context_letta_messages_to_openai(in_context_messages, exclude_system_messages=True)
             openai_messages.extend(in_memory_message_history)
@@ -292,14 +292,14 @@ class VoiceAgent(BaseAgent):
             agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
         )
-    def _rebuild_memory(
+    async def _rebuild_memory_async(
         self,
         in_context_messages: List[Message],
         agent_state: AgentState,
         num_messages: int | None = None,
         num_archival_memories: int | None = None,
     ) -> List[Message]:
-        return super()._rebuild_memory(
+        return await super()._rebuild_memory_async(
             in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
         )
@@ -438,7 +438,7 @@ class VoiceAgent(BaseAgent):
         if start_date and end_date and start_date > end_date:
             start_date, end_date = end_date, start_date
-        archival_results = self.agent_manager.list_passages(
+        archival_results = await self.agent_manager.list_passages_async(
             actor=self.actor,
             agent_id=self.agent_id,
             query_text=archival_query,
@@ -457,7 +457,7 @@ class VoiceAgent(BaseAgent):
         keyword_results = {}
         if convo_keyword_queries:
             for keyword in convo_keyword_queries:
-                messages = self.message_manager.list_messages_for_agent(
+                messages = await self.message_manager.list_messages_for_agent_async(
                     agent_id=self.agent_id,
                     actor=self.actor,
                     query_text=keyword,

letta/client/client.py CHANGED Viewed

@@ -2773,11 +2773,8 @@ class LocalClient(AbstractClient):
     # humans / personas
-    def get_block_id(self, name: str, label: str) -> str:
-        block = self.server.block_manager.get_blocks(actor=self.user, template_name=name, label=label, is_template=True)
-        if not block:
-            return None
-        return block[0].id
+    def get_block_id(self, name: str, label: str) -> str | None:
+        return None
     def create_human(self, name: str, text: str):
         """
@@ -2812,7 +2809,7 @@ class LocalClient(AbstractClient):
         Returns:
             humans (List[Human]): List of human blocks
         """
-        return self.server.block_manager.get_blocks(actor=self.user, label="human", is_template=True)
+        return []
     def list_personas(self) -> List[Persona]:
         """
@@ -2821,7 +2818,7 @@ class LocalClient(AbstractClient):
         Returns:
             personas (List[Persona]): List of persona blocks
         """
-        return self.server.block_manager.get_blocks(actor=self.user, label="persona", is_template=True)
+        return []
     def update_human(self, human_id: str, text: str):
         """
@@ -2879,7 +2876,7 @@ class LocalClient(AbstractClient):
         assert id, f"Human ID must be provided"
         return Human(**self.server.block_manager.get_block_by_id(id, actor=self.user).model_dump())
-    def get_persona_id(self, name: str) -> str:
+    def get_persona_id(self, name: str) -> str | None:
         """
         Get the ID of a persona block template
@@ -2889,12 +2886,9 @@ class LocalClient(AbstractClient):
         Returns:
             id (str): ID of the persona block
         """
-        persona = self.server.block_manager.get_blocks(actor=self.user, template_name=name, label="persona", is_template=True)
-        if not persona:
-            return None
-        return persona[0].id
+        return None
-    def get_human_id(self, name: str) -> str:
+    def get_human_id(self, name: str) -> str | None:
         """
         Get the ID of a human block template
@@ -2904,10 +2898,7 @@ class LocalClient(AbstractClient):
         Returns:
             id (str): ID of the human block
         """
-        human = self.server.block_manager.get_blocks(actor=self.user, template_name=name, label="human", is_template=True)
-        if not human:
-            return None
-        return human[0].id
+        return None
     def delete_persona(self, id: str):
         """
@@ -3381,7 +3372,7 @@ class LocalClient(AbstractClient):
         Returns:
             blocks (List[Block]): List of blocks
         """
-        return self.server.block_manager.get_blocks(actor=self.user, label=label, is_template=templates_only)
+        return []
     def create_block(
         self, label: str, value: str, limit: Optional[int] = None, template_name: Optional[str] = None, is_template: bool = False

letta/constants.py CHANGED Viewed

@@ -19,6 +19,7 @@ MCP_TOOL_TAG_NAME_PREFIX = "mcp"  # full format, mcp:server_name
 LETTA_CORE_TOOL_MODULE_NAME = "letta.functions.function_sets.base"
 LETTA_MULTI_AGENT_TOOL_MODULE_NAME = "letta.functions.function_sets.multi_agent"
 LETTA_VOICE_TOOL_MODULE_NAME = "letta.functions.function_sets.voice"
+LETTA_BUILTIN_TOOL_MODULE_NAME = "letta.functions.function_sets.builtin"
 # String in the error message for when the context window is too large
@@ -83,9 +84,19 @@ BASE_VOICE_SLEEPTIME_TOOLS = [
 ]
 # Multi agent tools
 MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_tags", "send_message_to_agent_async"]
+# Built in tools
+BUILTIN_TOOLS = ["run_code", "web_search"]
 # Set of all built-in Letta tools
 LETTA_TOOL_SET = set(
-    BASE_TOOLS + BASE_MEMORY_TOOLS + MULTI_AGENT_TOOLS + BASE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_TOOLS + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
+    BASE_TOOLS
+    + BASE_MEMORY_TOOLS
+    + MULTI_AGENT_TOOLS
+    + BASE_SLEEPTIME_TOOLS
+    + BASE_VOICE_SLEEPTIME_TOOLS
+    + BASE_VOICE_SLEEPTIME_CHAT_TOOLS
+    + BUILTIN_TOOLS
 )
 # The name of the tool used to send message to the user
@@ -179,6 +190,45 @@ LLM_MAX_TOKENS = {
     "gpt-3.5-turbo-0613": 4096,  # legacy
     "gpt-3.5-turbo-16k-0613": 16385,  # legacy
     "gpt-3.5-turbo-0301": 4096,  # legacy
+    "gemini-1.0-pro-vision-latest": 12288,
+    "gemini-pro-vision": 12288,
+    "gemini-1.5-pro-latest": 2000000,
+    "gemini-1.5-pro-001": 2000000,
+    "gemini-1.5-pro-002": 2000000,
+    "gemini-1.5-pro": 2000000,
+    "gemini-1.5-flash-latest": 1000000,
+    "gemini-1.5-flash-001": 1000000,
+    "gemini-1.5-flash-001-tuning": 16384,
+    "gemini-1.5-flash": 1000000,
+    "gemini-1.5-flash-002": 1000000,
+    "gemini-1.5-flash-8b": 1000000,
+    "gemini-1.5-flash-8b-001": 1000000,
+    "gemini-1.5-flash-8b-latest": 1000000,
+    "gemini-1.5-flash-8b-exp-0827": 1000000,
+    "gemini-1.5-flash-8b-exp-0924": 1000000,
+    "gemini-2.5-pro-exp-03-25": 1048576,
+    "gemini-2.5-pro-preview-03-25": 1048576,
+    "gemini-2.5-flash-preview-04-17": 1048576,
+    "gemini-2.5-flash-preview-05-20": 1048576,
+    "gemini-2.5-flash-preview-04-17-thinking": 1048576,
+    "gemini-2.5-pro-preview-05-06": 1048576,
+    "gemini-2.0-flash-exp": 1048576,
+    "gemini-2.0-flash": 1048576,
+    "gemini-2.0-flash-001": 1048576,
+    "gemini-2.0-flash-exp-image-generation": 1048576,
+    "gemini-2.0-flash-lite-001": 1048576,
+    "gemini-2.0-flash-lite": 1048576,
+    "gemini-2.0-flash-preview-image-generation": 32768,
+    "gemini-2.0-flash-lite-preview-02-05": 1048576,
+    "gemini-2.0-flash-lite-preview": 1048576,
+    "gemini-2.0-pro-exp": 1048576,
+    "gemini-2.0-pro-exp-02-05": 1048576,
+    "gemini-exp-1206": 1048576,
+    "gemini-2.0-flash-thinking-exp-01-21": 1048576,
+    "gemini-2.0-flash-thinking-exp": 1048576,
+    "gemini-2.0-flash-thinking-exp-1219": 1048576,
+    "gemini-2.5-flash-preview-tts": 32768,
+    "gemini-2.5-pro-preview-tts": 65536,
 }
 # The error message that Letta will receive
 # MESSAGE_SUMMARY_WARNING_STR = f"Warning: the conversation history will soon reach its maximum length and be trimmed. Make sure to save any important information from the conversation to your memory before it is removed."
@@ -230,3 +280,7 @@ RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE = 5
 MAX_FILENAME_LENGTH = 255
 RESERVED_FILENAMES = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "LPT1", "LPT2"}
+WEB_SEARCH_CLIP_CONTENT = False
+WEB_SEARCH_INCLUDE_SCORE = False
+WEB_SEARCH_SEPARATOR = "\n" + "-" * 40 + "\n"

letta/functions/function_sets/builtin.py ADDED Viewed

@@ -0,0 +1,27 @@
+from typing import Literal
+async def web_search(query: str) -> str:
+    """
+    Search the web for information.
+    Args:
+        query (str): The query to search the web for.
+    Returns:
+        str: The search results.
+    """
+    raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")
+def run_code(code: str, language: Literal["python", "js", "ts", "r", "java"]) -> str:
+    """
+    Run code in a sandbox. Supports Python, Javascript, Typescript, R, and Java.
+    Args:
+        code (str): The code to run.
+        language (Literal["python", "js", "ts", "r", "java"]): The language of the code.
+    Returns:
+        str: The output of the code, the stdout, the stderr, and error traces (if any).
+    """
+    raise NotImplementedError("This is only available on the latest agent architecture. Please contact the Letta team.")

letta/groups/sleeptime_multi_agent_v2.py CHANGED Viewed

@@ -190,7 +190,7 @@ class SleeptimeMultiAgentV2(BaseAgent):
             prior_messages = []
             if self.group.sleeptime_agent_frequency:
                 try:
-                    prior_messages = self.message_manager.list_messages_for_agent(
+                    prior_messages = await self.message_manager.list_messages_for_agent_async(
                         agent_id=foreground_agent_id,
                         actor=self.actor,
                         after=last_processed_message_id,

letta/interfaces/anthropic_streaming_interface.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from datetime import datetime, timezone
 from enum import Enum
 from typing import AsyncGenerator, List, Union
@@ -74,6 +75,7 @@ class AnthropicStreamingInterface:
         # usage trackers
         self.input_tokens = 0
         self.output_tokens = 0
+        self.model = None
         # reasoning object trackers
         self.reasoning_messages = []
@@ -88,7 +90,13 @@ class AnthropicStreamingInterface:
     def get_tool_call_object(self) -> ToolCall:
         """Useful for agent loop"""
-        return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=self.accumulated_tool_call_args, name=self.tool_call_name))
+        # hack for tool rules
+        tool_input = json.loads(self.accumulated_tool_call_args)
+        if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
+            arguments = str(json.dumps(tool_input["function"]["arguments"], indent=2))
+        else:
+            arguments = self.accumulated_tool_call_args
+        return ToolCall(id=self.tool_call_id, function=FunctionCall(arguments=arguments, name=self.tool_call_name))
     def _check_inner_thoughts_complete(self, combined_args: str) -> bool:
         """
@@ -311,6 +319,7 @@ class AnthropicStreamingInterface:
                         self.message_id = event.message.id
                         self.input_tokens += event.message.usage.input_tokens
                         self.output_tokens += event.message.usage.output_tokens
+                        self.model = event.message.model
                     elif isinstance(event, BetaRawMessageDeltaEvent):
                         self.output_tokens += event.usage.output_tokens
                     elif isinstance(event, BetaRawMessageStopEvent):

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -40,6 +40,9 @@ class OpenAIStreamingInterface:
         self.letta_assistant_message_id = Message.generate_id()
         self.letta_tool_message_id = Message.generate_id()
+        self.message_id = None
+        self.model = None
         # token counters
         self.input_tokens = 0
         self.output_tokens = 0
@@ -69,10 +72,14 @@ class OpenAIStreamingInterface:
             prev_message_type = None
             message_index = 0
             async for chunk in stream:
+                if not self.model or not self.message_id:
+                    self.model = chunk.model
+                    self.message_id = chunk.id
                 # track usage
                 if chunk.usage:
-                    self.input_tokens += len(chunk.usage.prompt_tokens)
-                    self.output_tokens += len(chunk.usage.completion_tokens)
+                    self.input_tokens += chunk.usage.prompt_tokens
+                    self.output_tokens += chunk.usage.completion_tokens
                 if chunk.choices:
                     choice = chunk.choices[0]

letta/llm_api/anthropic.py CHANGED Viewed

@@ -134,13 +134,13 @@ def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
 def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
-    for model_dict in anthropic_get_model_list(url=url, api_key=api_key):
+    for model_dict in anthropic_get_model_list(api_key=api_key):
         if model_dict["name"] == model:
             return model_dict["context_window"]
     raise ValueError(f"Can't find model '{model}' in Anthropic model list")
-def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
+def anthropic_get_model_list(api_key: Optional[str]) -> dict:
     """https://docs.anthropic.com/claude/docs/models-overview"""
     # NOTE: currently there is no GET /models, so we need to hardcode
@@ -159,6 +159,25 @@ def anthropic_get_model_list(url: str, api_key: Union[str, None]) -> dict:
     return models_json["data"]
+async def anthropic_get_model_list_async(api_key: Optional[str]) -> dict:
+    """https://docs.anthropic.com/claude/docs/models-overview"""
+    # NOTE: currently there is no GET /models, so we need to hardcode
+    # return MODEL_LIST
+    if api_key:
+        anthropic_client = anthropic.AsyncAnthropic(api_key=api_key)
+    elif model_settings.anthropic_api_key:
+        anthropic_client = anthropic.AsyncAnthropic()
+    else:
+        raise ValueError("No API key provided")
+    models = await anthropic_client.models.list()
+    models_json = models.model_dump()
+    assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
+    return models_json["data"]
 def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
     """See: https://docs.anthropic.com/claude/docs/tool-use

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -35,6 +35,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
 from letta.services.provider_manager import ProviderManager
+from letta.settings import model_settings
 from letta.tracing import trace_method
 DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
@@ -120,8 +121,16 @@ class AnthropicClient(LLMClientBase):
             override_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
         if async_client:
-            return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
-        return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
+            return (
+                anthropic.AsyncAnthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
+                if override_key
+                else anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries)
+            )
+        return (
+            anthropic.Anthropic(api_key=override_key, max_retries=model_settings.anthropic_max_retries)
+            if override_key
+            else anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries)
+        )
     @trace_method
     def build_request_data(
@@ -239,6 +248,24 @@ class AnthropicClient(LLMClientBase):
         return data
+    async def count_tokens(self, messages: List[dict] = None, model: str = None, tools: List[Tool] = None) -> int:
+        client = anthropic.AsyncAnthropic()
+        if messages and len(messages) == 0:
+            messages = None
+        if tools and len(tools) > 0:
+            anthropic_tools = convert_tools_to_anthropic_format(tools)
+        else:
+            anthropic_tools = None
+        result = await client.beta.messages.count_tokens(
+            model=model or "claude-3-7-sonnet-20250219",
+            messages=messages or [{"role": "user", "content": "hi"}],
+            tools=anthropic_tools or [],
+        )
+        token_count = result.input_tokens
+        if messages is None:
+            token_count -= 8
+        return token_count
     def handle_llm_error(self, e: Exception) -> Exception:
         if isinstance(e, anthropic.APIConnectionError):
             logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
@@ -369,11 +396,11 @@ class AnthropicClient(LLMClientBase):
                     content = strip_xml_tags(string=content_part.text, tag="thinking")
                 if content_part.type == "tool_use":
                     # hack for tool rules
-                    input = json.loads(json.dumps(content_part.input))
-                    if "id" in input and input["id"].startswith("toolu_") and "function" in input:
-                        arguments = str(input["function"]["arguments"])
+                    tool_input = json.loads(json.dumps(content_part.input))
+                    if "id" in tool_input and tool_input["id"].startswith("toolu_") and "function" in tool_input:
+                        arguments = str(tool_input["function"]["arguments"])
                     else:
-                        arguments = json.dumps(content_part.input, indent=2)
+                        arguments = json.dumps(tool_input, indent=2)
                     tool_calls = [
                         ToolCall(
                             id=content_part.id,

letta-nightly 0.7.20.dev20250521104258__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl

letta-nightly 0.7.20.dev20250521104258py3-none-any.whl → 0.7.21.dev20250521233415py3-none-any.whl