PyPI - letta-nightly - Versions diffs - 0.6.50.dev20250411104155__py3-none-any.whl → 0.6.52.dev20250412051016__py3-none-any.whl - Mend

letta-nightly 0.6.50.dev20250411104155py3-none-any.whl → 0.6.52.dev20250412051016py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

letta/__init__.py +1 -1
letta/agent.py +23 -32
letta/agents/base_agent.py +17 -6
letta/agents/ephemeral_agent.py +5 -6
letta/agents/ephemeral_memory_agent.py +8 -10
letta/agents/helpers.py +6 -6
letta/agents/letta_agent.py +9 -10
letta/agents/letta_agent_batch.py +164 -0
letta/agents/voice_agent.py +8 -8
letta/functions/function_sets/base.py +1 -1
letta/helpers/converters.py +5 -2
letta/helpers/tool_rule_solver.py +12 -2
letta/jobs/scheduler.py +13 -11
letta/llm_api/anthropic.py +0 -1
letta/llm_api/anthropic_client.py +61 -23
letta/llm_api/cohere.py +1 -1
letta/llm_api/google_ai_client.py +48 -13
letta/llm_api/google_vertex_client.py +19 -1
letta/llm_api/llm_client_base.py +13 -5
letta/llm_api/openai.py +4 -3
letta/llm_api/openai_client.py +18 -10
letta/orm/organization.py +4 -2
letta/orm/sqlalchemy_base.py +3 -0
letta/schemas/enums.py +1 -0
letta/schemas/group.py +30 -1
letta/schemas/identity.py +10 -0
letta/schemas/letta_request.py +4 -0
letta/schemas/letta_response.py +9 -1
letta/schemas/llm_config.py +10 -0
letta/schemas/message.py +21 -12
letta/schemas/openai/chat_completion_request.py +1 -0
letta/schemas/tool_rule.py +14 -1
letta/server/rest_api/interface.py +5 -4
letta/server/rest_api/routers/v1/agents.py +20 -13
letta/server/rest_api/routers/v1/groups.py +1 -1
letta/server/rest_api/routers/v1/identities.py +23 -2
letta/server/rest_api/utils.py +20 -22
letta/server/server.py +34 -21
letta/services/agent_manager.py +13 -9
letta/services/block_manager.py +2 -4
letta/services/identity_manager.py +21 -5
letta/services/llm_batch_manager.py +21 -1
letta/services/summarizer/summarizer.py +11 -4
letta/services/tool_manager.py +1 -1
letta/settings.py +1 -0
letta/utils.py +2 -2
{letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/METADATA +3 -3
{letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/RECORD +51 -50
{letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.50.dev20250411104155.dist-info → letta_nightly-0.6.52.dev20250412051016.dist-info}/entry_points.txt +0 -0

letta/helpers/converters.py CHANGED Viewed

@@ -28,6 +28,7 @@ from letta.schemas.tool_rule import (
     ContinueToolRule,
     InitToolRule,
     MaxCountPerStepToolRule,
+    ParentToolRule,
     TerminalToolRule,
     ToolRule,
 )
@@ -89,7 +90,7 @@ def serialize_tool_rules(tool_rules: Optional[List[ToolRule]]) -> List[Dict[str,
     return data
-def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule]]:
+def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[ToolRule]:
     """Convert a list of dictionaries back into ToolRule objects."""
     if not data:
         return []
@@ -99,7 +100,7 @@ def deserialize_tool_rules(data: Optional[List[Dict]]) -> List[Union[ChildToolRu
 def deserialize_tool_rule(
     data: Dict,
-) -> Union[ChildToolRule, InitToolRule, TerminalToolRule, ConditionalToolRule, ContinueToolRule, MaxCountPerStepToolRule]:
+) -> ToolRule:
     """Deserialize a dictionary to the appropriate ToolRule subclass based on 'type'."""
     rule_type = ToolRuleType(data.get("type"))
@@ -118,6 +119,8 @@ def deserialize_tool_rule(
         return ContinueToolRule(**data)
     elif rule_type == ToolRuleType.max_count_per_step:
         return MaxCountPerStepToolRule(**data)
+    elif rule_type == ToolRuleType.parent_last_tool:
+        return ParentToolRule(**data)
     raise ValueError(f"Unknown ToolRule type: {rule_type}")

letta/helpers/tool_rule_solver.py CHANGED Viewed

@@ -10,6 +10,7 @@ from letta.schemas.tool_rule import (
     ContinueToolRule,
     InitToolRule,
     MaxCountPerStepToolRule,
+    ParentToolRule,
     TerminalToolRule,
 )
@@ -33,6 +34,9 @@ class ToolRulesSolver(BaseModel):
     child_based_tool_rules: List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]] = Field(
         default_factory=list, description="Standard tool rules for controlling execution sequence and allowed transitions."
     )
+    parent_tool_rules: List[ParentToolRule] = Field(
+        default_factory=list, description="Filter tool rules to be used to filter out tools from the available set."
+    )
     terminal_tool_rules: List[TerminalToolRule] = Field(
         default_factory=list, description="Terminal tool rules that end the agent loop if called."
     )
@@ -44,6 +48,7 @@ class ToolRulesSolver(BaseModel):
         init_tool_rules: Optional[List[InitToolRule]] = None,
         continue_tool_rules: Optional[List[ContinueToolRule]] = None,
         child_based_tool_rules: Optional[List[Union[ChildToolRule, ConditionalToolRule, MaxCountPerStepToolRule]]] = None,
+        parent_tool_rules: Optional[List[ParentToolRule]] = None,
         terminal_tool_rules: Optional[List[TerminalToolRule]] = None,
         tool_call_history: Optional[List[str]] = None,
         **kwargs,
@@ -52,6 +57,7 @@ class ToolRulesSolver(BaseModel):
             init_tool_rules=init_tool_rules or [],
             continue_tool_rules=continue_tool_rules or [],
             child_based_tool_rules=child_based_tool_rules or [],
+            parent_tool_rules=parent_tool_rules or [],
             terminal_tool_rules=terminal_tool_rules or [],
             tool_call_history=tool_call_history or [],
             **kwargs,
@@ -78,6 +84,9 @@ class ToolRulesSolver(BaseModel):
                 elif rule.type == ToolRuleType.max_count_per_step:
                     assert isinstance(rule, MaxCountPerStepToolRule)
                     self.child_based_tool_rules.append(rule)
+                elif rule.type == ToolRuleType.parent_last_tool:
+                    assert isinstance(rule, ParentToolRule)
+                    self.parent_tool_rules.append(rule)
     def register_tool_call(self, tool_name: str):
         """Update the internal state to track tool call history."""
@@ -102,13 +111,14 @@ class ToolRulesSolver(BaseModel):
                 # If there are init tool rules, only return those defined in the init tool rules
                 return [rule.tool_name for rule in self.init_tool_rules]
             else:
-                # Otherwise, return all the available tools
+                # Otherwise, return all tools besides those constrained by parent tool rules
+                available_tools = available_tools - set.union(set(), *(set(rule.children) for rule in self.parent_tool_rules))
                 return list(available_tools)
         else:
             # Collect valid tools from all child-based rules
             valid_tool_sets = [
                 rule.get_valid_tools(self.tool_call_history, available_tools, last_function_response)
-                for rule in self.child_based_tool_rules
+                for rule in self.child_based_tool_rules + self.parent_tool_rules
             ]
             # Compute intersection of all valid tool sets

letta/jobs/scheduler.py CHANGED Viewed

@@ -12,17 +12,19 @@ scheduler = AsyncIOScheduler()
 def start_cron_jobs(server: SyncServer):
     """Initialize cron jobs"""
-    scheduler.add_job(
-        poll_running_llm_batches,
-        args=[server],
-        trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
-        next_run_time=datetime.datetime.now(datetime.UTC),
-        id="poll_llm_batches",
-        name="Poll LLM API batch jobs and update status",
-        replace_existing=True,
-    )
-    scheduler.start()
+    if settings.enable_batch_job_polling:
+        scheduler.add_job(
+            poll_running_llm_batches,
+            args=[server],
+            trigger=IntervalTrigger(seconds=settings.poll_running_llm_batches_interval_seconds),
+            next_run_time=datetime.datetime.now(datetime.timezone.utc),
+            id="poll_llm_batches",
+            name="Poll LLM API batch jobs and update status",
+            replace_existing=True,
+        )
+        scheduler.start()
 def shutdown_cron_scheduler():
-    scheduler.shutdown()
+    if settings.enable_batch_job_polling:
+        scheduler.shutdown()

letta/llm_api/anthropic.py CHANGED Viewed

@@ -691,7 +691,6 @@ def _prepare_anthropic_request(
     # Convert to Anthropic format
     msg_objs = [
         _Message.dict_to_message(
-            user_id=None,
             agent_id=None,
             openai_message_dict=m,
         )

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -27,6 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.log import get_logger
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import Tool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
@@ -59,25 +60,55 @@ class AnthropicClient(LLMClientBase):
         return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
     @trace_method
-    async def batch_async(self, requests: Dict[str, dict]) -> BetaMessageBatch:
+    async def send_llm_batch_request_async(
+        self,
+        agent_messages_mapping: Dict[str, List[PydanticMessage]],
+        agent_tools_mapping: Dict[str, List[dict]],
+        agent_llm_config_mapping: Dict[str, LLMConfig],
+    ) -> BetaMessageBatch:
         """
-        Send a batch of requests to the Anthropic API asynchronously.
+        Sends a batch request to the Anthropic API using the provided agent messages and tools mappings.
         Args:
-            requests (Dict[str, dict]): A mapping from custom_id to request parameter dicts.
+            agent_messages_mapping: A dict mapping agent_id to their list of PydanticMessages.
+            agent_tools_mapping: A dict mapping agent_id to their list of tool dicts.
+            agent_llm_config_mapping: A dict mapping agent_id to their LLM config
         Returns:
-            List[dict]: A list of response dictionaries corresponding to each request.
+            BetaMessageBatch: The batch response from the Anthropic API.
+        Raises:
+            ValueError: If the sets of agent_ids in the two mappings do not match.
+            Exception: Transformed errors from the underlying API call.
         """
-        client = self._get_anthropic_client(async_client=True)
+        # Validate that both mappings use the same set of agent_ids.
+        if set(agent_messages_mapping.keys()) != set(agent_tools_mapping.keys()):
+            raise ValueError("Agent mappings for messages and tools must use the same agent_ids.")
+        try:
+            requests = {
+                agent_id: self.build_request_data(
+                    messages=agent_messages_mapping[agent_id],
+                    llm_config=agent_llm_config_mapping[agent_id],
+                    tools=agent_tools_mapping[agent_id],
+                )
+                for agent_id in agent_messages_mapping
+            }
-        anthropic_requests = [
-            Request(custom_id=custom_id, params=MessageCreateParamsNonStreaming(**params)) for custom_id, params in requests.items()
-        ]
+            client = self._get_anthropic_client(async_client=True)
+            anthropic_requests = [
+                Request(custom_id=agent_id, params=MessageCreateParamsNonStreaming(**params)) for agent_id, params in requests.items()
+            ]
+            batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
-        batch_response = await client.beta.messages.batches.create(requests=anthropic_requests)
+            return batch_response
-        return batch_response
+        except Exception as e:
+            # Enhance logging here if additional context is needed
+            logger.error("Error during send_llm_batch_request_async.", exc_info=True)
+            raise self.handle_llm_error(e)
     @trace_method
     def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
@@ -90,6 +121,7 @@ class AnthropicClient(LLMClientBase):
     def build_request_data(
         self,
         messages: List[PydanticMessage],
+        llm_config: LLMConfig,
         tools: Optional[List[dict]] = None,
         force_tool_call: Optional[str] = None,
     ) -> dict:
@@ -99,20 +131,20 @@ class AnthropicClient(LLMClientBase):
         if not self.use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
-        if not self.llm_config.max_tokens:
+        if not llm_config.max_tokens:
             raise ValueError("Max  tokens must be set for anthropic")
         data = {
-            "model": self.llm_config.model,
-            "max_tokens": self.llm_config.max_tokens,
-            "temperature": self.llm_config.temperature,
+            "model": llm_config.model,
+            "max_tokens": llm_config.max_tokens,
+            "temperature": llm_config.temperature,
         }
         # Extended Thinking
-        if self.llm_config.enable_reasoner:
+        if llm_config.enable_reasoner:
             data["thinking"] = {
                 "type": "enabled",
-                "budget_tokens": self.llm_config.max_reasoning_tokens,
+                "budget_tokens": llm_config.max_reasoning_tokens,
             }
             # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
             data["temperature"] = 1.0
@@ -132,13 +164,13 @@ class AnthropicClient(LLMClientBase):
             tools_for_request = [Tool(function=f) for f in tools if f["name"] == force_tool_call]
             # need to have this setting to be able to put inner thoughts in kwargs
-            if not self.llm_config.put_inner_thoughts_in_kwargs:
+            if not llm_config.put_inner_thoughts_in_kwargs:
                 logger.warning(
                     f"Force setting put_inner_thoughts_in_kwargs to True for Claude because there is a forced tool call: {force_tool_call}"
                 )
-                self.llm_config.put_inner_thoughts_in_kwargs = True
+                llm_config.put_inner_thoughts_in_kwargs = True
         else:
-            if self.llm_config.put_inner_thoughts_in_kwargs:
+            if llm_config.put_inner_thoughts_in_kwargs:
                 # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
                 tool_choice = {"type": "any", "disable_parallel_tool_use": True}
             else:
@@ -151,7 +183,7 @@ class AnthropicClient(LLMClientBase):
         # Add inner thoughts kwarg
         # TODO: Can probably make this more efficient
-        if tools_for_request and len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
+        if tools_for_request and len(tools_for_request) > 0 and llm_config.put_inner_thoughts_in_kwargs:
             tools_with_inner_thoughts = add_inner_thoughts_to_functions(
                 functions=[t.function.model_dump() for t in tools_for_request],
                 inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -173,7 +205,7 @@ class AnthropicClient(LLMClientBase):
         data["messages"] = [
             m.to_anthropic_dict(
                 inner_thoughts_xml_tag=inner_thoughts_xml_tag,
-                put_inner_thoughts_in_kwargs=bool(self.llm_config.put_inner_thoughts_in_kwargs),
+                put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
             )
             for m in messages[1:]
         ]
@@ -189,7 +221,7 @@ class AnthropicClient(LLMClientBase):
         # https://docs.anthropic.com/en/api/messages#body-messages
         # NOTE: cannot prefill with tools for opus:
         # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
-        if prefix_fill and not self.llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
+        if prefix_fill and not llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
             data["messages"].append(
                 # Start the thinking process for the assistant
                 {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
@@ -323,13 +355,19 @@ class AnthropicClient(LLMClientBase):
                 if content_part.type == "text":
                     content = strip_xml_tags(string=content_part.text, tag="thinking")
                 if content_part.type == "tool_use":
+                    # hack for tool rules
+                    input = json.loads(json.dumps(content_part.input))
+                    if "id" in input and input["id"].startswith("toolu_") and "function" in input:
+                        arguments = str(input["function"]["arguments"])
+                    else:
+                        arguments = json.dumps(content_part.input, indent=2)
                     tool_calls = [
                         ToolCall(
                             id=content_part.id,
                             type="function",
                             function=FunctionCall(
                                 name=content_part.name,
-                                arguments=json.dumps(content_part.input, indent=2),
+                                arguments=arguments,
                             ),
                         )
                     ]

letta/llm_api/cohere.py CHANGED Viewed

@@ -315,7 +315,7 @@ def cohere_chat_completions_request(
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
     # Convert messages to Cohere format
-    msg_objs = [Message.dict_to_message(user_id=uuid.uuid4(), agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
+    msg_objs = [Message.dict_to_message(agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
     # System message 0 should instead be a "preamble"
     # See: https://docs.cohere.com/reference/chat

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import uuid
 from typing import List, Optional, Tuple
@@ -11,12 +12,16 @@ from letta.llm_api.helpers import make_post_request
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.local_llm.utils import count_tokens
+from letta.log import get_logger
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import Tool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
 from letta.settings import model_settings
 from letta.utils import get_tool_call_id
+logger = get_logger(__name__)
 class GoogleAIClient(LLMClientBase):
@@ -24,6 +29,8 @@ class GoogleAIClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
+        # print("[google_ai request]", json.dumps(request_data, indent=2))
         url, headers = get_gemini_endpoint_and_headers(
             base_url=str(self.llm_config.model_endpoint),
             model=self.llm_config.model,
@@ -36,6 +43,7 @@ class GoogleAIClient(LLMClientBase):
     def build_request_data(
         self,
         messages: List[PydanticMessage],
+        llm_config: LLMConfig,
         tools: List[dict],
         force_tool_call: Optional[str] = None,
     ) -> dict:
@@ -44,9 +52,10 @@ class GoogleAIClient(LLMClientBase):
         """
         if tools:
             tools = [{"type": "function", "function": f} for f in tools]
-            tools = self.convert_tools_to_google_ai_format(
-                [Tool(**t) for t in tools],
-            )
+            tool_objs = [Tool(**t) for t in tools]
+            tool_names = [t.function.name for t in tool_objs]
+            # Convert to the exact payload style Google expects
+            tools = self.convert_tools_to_google_ai_format(tool_objs)
         contents = self.add_dummy_model_messages(
             [m.to_google_ai_dict() for m in messages],
         )
@@ -55,8 +64,8 @@ class GoogleAIClient(LLMClientBase):
             "contents": contents,
             "tools": tools,
             "generation_config": {
-                "temperature": self.llm_config.temperature,
-                "max_output_tokens": self.llm_config.max_tokens,
+                "temperature": llm_config.temperature,
+                "max_output_tokens": llm_config.max_tokens,
             },
         }
@@ -65,6 +74,8 @@ class GoogleAIClient(LLMClientBase):
             function_calling_config=FunctionCallingConfig(
                 # ANY mode forces the model to predict only function calls
                 mode=FunctionCallingConfigMode.ANY,
+                # Provide the list of tools (though empty should also work, it seems not to)
+                allowed_function_names=tool_names,
             )
         )
         request_data["tool_config"] = tool_config.model_dump()
@@ -99,6 +110,8 @@ class GoogleAIClient(LLMClientBase):
             }
         }
         """
+        # print("[google_ai response]", json.dumps(response_data, indent=2))
         try:
             choices = []
             index = 0
@@ -109,6 +122,17 @@ class GoogleAIClient(LLMClientBase):
                 assert role == "model", f"Unknown role in response: {role}"
                 parts = content["parts"]
+                # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
+                #       so let's disable it for now
+                # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
+                # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
+                # To patch this, if we have multiple parts we can take the last one
+                if len(parts) > 1:
+                    logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
+                    parts = [parts[-1]]
                 # TODO support parts / multimodal
                 # TODO support parallel tool calling natively
                 # TODO Alternative here is to throw away everything else except for the first part
@@ -199,10 +223,22 @@ class GoogleAIClient(LLMClientBase):
             #     "totalTokenCount": 36
             #   }
             if "usageMetadata" in response_data:
+                usage_data = response_data["usageMetadata"]
+                if "promptTokenCount" not in usage_data:
+                    raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
+                if "totalTokenCount" not in usage_data:
+                    raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
+                if "candidatesTokenCount" not in usage_data:
+                    raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
+                prompt_tokens = usage_data["promptTokenCount"]
+                completion_tokens = usage_data["candidatesTokenCount"]
+                total_tokens = usage_data["totalTokenCount"]
                 usage = UsageStatistics(
-                    prompt_tokens=response_data["usageMetadata"]["promptTokenCount"],
-                    completion_tokens=response_data["usageMetadata"]["candidatesTokenCount"],
-                    total_tokens=response_data["usageMetadata"]["totalTokenCount"],
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=total_tokens,
                 )
             else:
                 # Count it ourselves
@@ -282,17 +318,16 @@ class GoogleAIClient(LLMClientBase):
             for t in tools
         ]
-        # Correct casing + add inner thoughts if needed
+        # Add inner thoughts if needed
         for func in function_list:
-            func["parameters"]["type"] = "OBJECT"
-            for param_name, param_fields in func["parameters"]["properties"].items():
-                param_fields["type"] = param_fields["type"].upper()
+            # Note: Google AI API used to have weird casing requirements, but not any more
             # Add inner thoughts
             if self.llm_config.put_inner_thoughts_in_kwargs:
                 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
                 func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
-                    "type": "STRING",
+                    "type": "string",
                     "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
                 }
                 func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -9,6 +9,7 @@ from letta.helpers.json_helpers import json_dumps
 from letta.llm_api.google_ai_client import GoogleAIClient
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.local_llm.utils import count_tokens
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
 from letta.settings import model_settings
@@ -37,20 +38,24 @@ class GoogleVertexClient(GoogleAIClient):
     def build_request_data(
         self,
         messages: List[PydanticMessage],
+        llm_config: LLMConfig,
         tools: List[dict],
         force_tool_call: Optional[str] = None,
     ) -> dict:
         """
         Constructs a request object in the expected data format for this client.
         """
-        request_data = super().build_request_data(messages, tools, force_tool_call)
+        request_data = super().build_request_data(messages, self.llm_config, tools, force_tool_call)
         request_data["config"] = request_data.pop("generation_config")
         request_data["config"]["tools"] = request_data.pop("tools")
+        tool_names = [t["name"] for t in tools]
         tool_config = ToolConfig(
             function_calling_config=FunctionCallingConfig(
                 # ANY mode forces the model to predict only function calls
                 mode=FunctionCallingConfigMode.ANY,
+                # Provide the list of tools (though empty should also work, it seems not to)
+                allowed_function_names=tool_names,
             )
         )
         request_data["config"]["tool_config"] = tool_config.model_dump()
@@ -86,6 +91,8 @@ class GoogleVertexClient(GoogleAIClient):
         }
         }
         """
+        # print(response_data)
         response = GenerateContentResponse(**response_data)
         try:
             choices = []
@@ -97,6 +104,17 @@ class GoogleVertexClient(GoogleAIClient):
                 assert role == "model", f"Unknown role in response: {role}"
                 parts = content.parts
+                # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
+                #       so let's disable it for now
+                # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
+                # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
+                # To patch this, if we have multiple parts we can take the last one
+                if len(parts) > 1:
+                    logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
+                    parts = [parts[-1]]
                 # TODO support parts / multimodal
                 # TODO support parallel tool calling natively
                 # TODO Alternative here is to throw away everything else except for the first part

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from abc import abstractmethod
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
+from anthropic.types.beta.messages import BetaMessageBatch
 from openai import AsyncStream, Stream
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -21,7 +22,6 @@ class LLMClientBase:
         self,
         llm_config: LLMConfig,
         put_inner_thoughts_first: Optional[bool] = True,
-        use_structured_output: Optional[bool] = True,
         use_tool_naming: bool = True,
     ):
         self.llm_config = llm_config
@@ -40,7 +40,7 @@ class LLMClientBase:
         If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
         Otherwise returns a ChatCompletionResponse.
         """
-        request_data = self.build_request_data(messages, tools, force_tool_call)
+        request_data = self.build_request_data(messages, self.llm_config, tools, force_tool_call)
         try:
             log_event(name="llm_request_sent", attributes=request_data)
@@ -66,8 +66,7 @@ class LLMClientBase:
         If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
         Otherwise returns a ChatCompletionResponse.
         """
-        request_data = self.build_request_data(messages, tools, force_tool_call)
-        response_data = {}
+        request_data = self.build_request_data(messages, self.llm_config, tools, force_tool_call)
         try:
             log_event(name="llm_request_sent", attributes=request_data)
@@ -81,10 +80,19 @@ class LLMClientBase:
         return self.convert_response_to_chat_completion(response_data, messages)
+    async def send_llm_batch_request_async(
+        self,
+        agent_messages_mapping: Dict[str, List[Message]],
+        agent_tools_mapping: Dict[str, List[dict]],
+        agent_llm_config_mapping: Dict[str, LLMConfig],
+    ) -> Union[BetaMessageBatch]:
+        raise NotImplementedError
     @abstractmethod
     def build_request_data(
         self,
         messages: List[Message],
+        llm_config: LLMConfig,
         tools: List[dict],
         force_tool_call: Optional[str] = None,
     ) -> dict:

letta/llm_api/openai.py CHANGED Viewed

@@ -135,7 +135,7 @@ def build_openai_chat_completions_request(
             tool_choice=tool_choice,
             user=str(user_id),
             max_completion_tokens=llm_config.max_tokens,
-            temperature=llm_config.temperature,
+            temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
         )
     else:
         data = ChatCompletionRequest(
@@ -145,7 +145,7 @@ def build_openai_chat_completions_request(
             function_call=function_call,
             user=str(user_id),
             max_completion_tokens=llm_config.max_tokens,
-            temperature=llm_config.temperature,
+            temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
         )
         # https://platform.openai.com/docs/guides/text-generation/json-mode
         # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
@@ -168,7 +168,6 @@ def build_openai_chat_completions_request(
                 tool.function = FunctionSchema(**structured_output_version)
             except ValueError as e:
                 warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
     return data
@@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
     #         except ValueError as e:
     #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+    if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
+        data.pop("parallel_tool_calls", None)
     return data

letta-nightly 0.6.50.dev20250411104155__py3-none-any.whl → 0.6.52.dev20250412051016__py3-none-any.whl

letta-nightly 0.6.50.dev20250411104155py3-none-any.whl → 0.6.52.dev20250412051016py3-none-any.whl