PyPI - letta-nightly - Versions diffs - 0.6.24.dev20250212104045__py3-none-any.whl → 0.6.25.dev20250213104102__py3-none-any.whl - Mend

letta-nightly 0.6.24.dev20250212104045py3-none-any.whl → 0.6.25.dev20250213104102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (24) hide show

letta/__init__.py +1 -1
letta/agent.py +19 -3
letta/client/client.py +5 -0
letta/embeddings.py +21 -0
letta/functions/helpers.py +28 -1
letta/llm_api/google_vertex.py +328 -0
letta/llm_api/llm_api_tools.py +26 -0
letta/orm/agent.py +7 -1
letta/schemas/agent.py +14 -1
letta/schemas/embedding_config.py +1 -0
letta/schemas/llm_config.py +1 -0
letta/schemas/message.py +0 -11
letta/schemas/providers.py +42 -3
letta/server/rest_api/routers/v1/tools.py +15 -2
letta/server/server.py +10 -4
letta/services/agent_manager.py +5 -0
letta/services/message_manager.py +89 -64
letta/settings.py +8 -0
letta/utils.py +17 -0
{letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/METADATA +3 -2
{letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/RECORD +24 -23
{letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.24"
+__version__ = "0.6.25"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -61,6 +61,7 @@ from letta.utils import (
     get_utc_time,
     json_dumps,
     json_loads,
+    log_telemetry,
     parse_json,
     printd,
     validate_function_response,
@@ -306,7 +307,7 @@ class Agent(BaseAgent):
         last_function_failed: bool = False,
     ) -> ChatCompletionResponse:
         """Get response from LLM API with robust retry mechanism."""
+        log_telemetry(self.logger, "_get_ai_reply start")
         allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(last_function_response=self.last_function_response)
         agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
@@ -337,6 +338,7 @@ class Agent(BaseAgent):
         for attempt in range(1, empty_response_retry_limit + 1):
             try:
+                log_telemetry(self.logger, "_get_ai_reply create start")
                 response = create(
                     llm_config=self.agent_state.llm_config,
                     messages=message_sequence,
@@ -349,6 +351,7 @@ class Agent(BaseAgent):
                     stream=stream,
                     stream_interface=self.interface,
                 )
+                log_telemetry(self.logger, "_get_ai_reply create finish")
                 # These bottom two are retryable
                 if len(response.choices) == 0 or response.choices[0] is None:
@@ -360,12 +363,13 @@ class Agent(BaseAgent):
                         raise RuntimeError("Finish reason was length (maximum context length)")
                     else:
                         raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
+                log_telemetry(self.logger, "_handle_ai_response finish")
                 return response
             except ValueError as ve:
                 if attempt >= empty_response_retry_limit:
                     warnings.warn(f"Retry limit reached. Final error: {ve}")
+                    log_telemetry(self.logger, "_handle_ai_response finish ValueError")
                     raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
                 else:
                     delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
@@ -374,8 +378,10 @@ class Agent(BaseAgent):
             except Exception as e:
                 # For non-retryable errors, exit immediately
+                log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
                 raise e
+        log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
         raise Exception("Retries exhausted and no valid response received.")
     def _handle_ai_response(
@@ -388,7 +394,7 @@ class Agent(BaseAgent):
         response_message_id: Optional[str] = None,
     ) -> Tuple[List[Message], bool, bool]:
         """Handles parsing and function execution"""
+        log_telemetry(self.logger, "_handle_ai_response start")
         # Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
         if response_message_id is not None:
             assert response_message_id.startswith("message-"), response_message_id
@@ -506,7 +512,13 @@ class Agent(BaseAgent):
             self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1])
             try:
                 # handle tool execution (sandbox) and state updates
+                log_telemetry(
+                    self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
+                )
                 function_response, sandbox_run_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
+                log_telemetry(
+                    self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
+                )
                 if sandbox_run_result and sandbox_run_result.status == "error":
                     messages = self._handle_function_error_response(
@@ -597,6 +609,7 @@ class Agent(BaseAgent):
         elif self.tool_rules_solver.is_terminal_tool(function_name):
             heartbeat_request = False
+        log_telemetry(self.logger, "_handle_ai_response finish")
         return messages, heartbeat_request, function_failed
     def step(
@@ -684,6 +697,9 @@ class Agent(BaseAgent):
             else:
                 break
+        if self.agent_state.message_buffer_autoclear:
+            self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
         return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
     def inner_step(

letta/client/client.py CHANGED Viewed

@@ -73,6 +73,7 @@ class AbstractClient(object):
         metadata: Optional[Dict] = {"human:": DEFAULT_HUMAN, "persona": DEFAULT_PERSONA},
         description: Optional[str] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         raise NotImplementedError
@@ -540,6 +541,7 @@ class RESTClient(AbstractClient):
         description: Optional[str] = None,
         initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         """Create an agent
@@ -600,6 +602,7 @@ class RESTClient(AbstractClient):
             "initial_message_sequence": initial_message_sequence,
             "tags": tags,
             "include_base_tools": include_base_tools,
+            "message_buffer_autoclear": message_buffer_autoclear,
         }
         # Only add name if it's not None
@@ -2353,6 +2356,7 @@ class LocalClient(AbstractClient):
         description: Optional[str] = None,
         initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
+        message_buffer_autoclear: bool = False,
     ) -> AgentState:
         """Create an agent
@@ -2404,6 +2408,7 @@ class LocalClient(AbstractClient):
             "embedding_config": embedding_config if embedding_config else self._default_embedding_config,
             "initial_message_sequence": initial_message_sequence,
             "tags": tags,
+            "message_buffer_autoclear": message_buffer_autoclear,
         }
         # Only add name if it's not None

letta/embeddings.py CHANGED Viewed

@@ -188,6 +188,19 @@ class GoogleEmbeddings:
         return response_json["embedding"]["values"]
+class GoogleVertexEmbeddings:
+    def __init__(self, model: str, project_id: str, region: str):
+        from google import genai
+        self.client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
+        self.model = model
+    def get_text_embedding(self, text: str):
+        response = self.client.generate_embeddings(content=text, model=self.model)
+        return response.embeddings[0].embedding
 def query_embedding(embedding_model, query_text: str):
     """Generate padded embedding for querying database"""
     query_vec = embedding_model.get_text_embedding(query_text)
@@ -267,5 +280,13 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
         )
         return model
+    elif endpoint_type == "google_vertex":
+        model = GoogleVertexEmbeddings(
+            model=config.embedding_model,
+            api_key=model_settings.gemini_api_key,
+            base_url=model_settings.gemini_base_url,
+        )
+        return model
     else:
         raise ValueError(f"Unknown endpoint type {endpoint_type}")

letta/functions/helpers.py CHANGED Viewed

@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
 from letta.schemas.user import User
 from letta.server.rest_api.utils import get_letta_server
 from letta.settings import settings
+from letta.utils import log_telemetry
 # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -341,10 +342,16 @@ async def async_send_message_with_retries(
     timeout: int,
     logging_prefix: Optional[str] = None,
 ) -> str:
     logging_prefix = logging_prefix or "[async_send_message_with_retries]"
+    log_telemetry(sender_agent.logger, f"async_send_message_with_retries start", target_agent_id=target_agent_id)
     for attempt in range(1, max_retries + 1):
         try:
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream start",
+                target_agent_id=target_agent_id,
+            )
             response = await asyncio.wait_for(
                 send_message_to_agent_no_stream(
                     server=server,
@@ -354,15 +361,24 @@ async def async_send_message_with_retries(
                 ),
                 timeout=timeout,
             )
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream finish",
+                target_agent_id=target_agent_id,
+            )
             # Then parse out the assistant message
             assistant_message = parse_letta_response_for_assistant_message(target_agent_id, response)
             if assistant_message:
                 sender_agent.logger.info(f"{logging_prefix} - {assistant_message}")
+                log_telemetry(
+                    sender_agent.logger, f"async_send_message_with_retries finish with assistant message", target_agent_id=target_agent_id
+                )
                 return assistant_message
             else:
                 msg = f"(No response from agent {target_agent_id})"
                 sender_agent.logger.info(f"{logging_prefix} - {msg}")
+                log_telemetry(sender_agent.logger, f"async_send_message_with_retries finish no response", target_agent_id=target_agent_id)
                 return msg
         except asyncio.TimeoutError:
@@ -380,6 +396,12 @@ async def async_send_message_with_retries(
             await asyncio.sleep(backoff)
         else:
             sender_agent.logger.error(f"{logging_prefix} - Fatal error: {error_msg}")
+            log_telemetry(
+                sender_agent.logger,
+                f"async_send_message_with_retries finish fatal error",
+                target_agent_id=target_agent_id,
+                error_msg=error_msg,
+            )
             raise Exception(error_msg)
@@ -468,6 +490,7 @@ def fire_and_forget_send_to_agent(
 async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent", message: str, tags: List[str]) -> List[str]:
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async start", message=message, tags=tags)
     server = get_letta_server()
     augmented_message = (
@@ -477,7 +500,9 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
     )
     # Retrieve up to 100 matching agents
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents start", message=message, tags=tags)
     matching_agents = server.agent_manager.list_agents(actor=sender_agent.user, tags=tags, match_all_tags=True, limit=100)
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async  listing agents finish", message=message, tags=tags)
     # Create a system message
     messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
@@ -504,4 +529,6 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
             final.append(str(r))
         else:
             final.append(r)
+    log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
     return final

letta/llm_api/google_vertex.py ADDED Viewed

@@ -0,0 +1,328 @@
+import uuid
+from typing import List, Optional
+from letta.constants import NON_USER_MSG_PREFIX
+from letta.local_llm.json_parser import clean_json_string_extra_backslash
+from letta.local_llm.utils import count_tokens
+from letta.schemas.openai.chat_completion_request import Tool
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
+from letta.utils import get_tool_call_id, get_utc_time, json_dumps
+def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
+    """Google AI API requires all function call returns are immediately followed by a 'model' role message.
+    In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
+    so there is no natural follow-up 'model' role message.
+    To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
+    with role == 'model' that is placed in-betweeen and function output
+    (role == 'tool') and user message (role == 'user').
+    """
+    dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]}
+    messages_with_padding = []
+    for i, message in enumerate(messages):
+        messages_with_padding.append(message)
+        # Check if the current message role is 'tool' and the next message role is 'user'
+        if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
+            messages_with_padding.append(dummy_yield_message)
+    return messages_with_padding
+# TODO use pydantic model as input
+def to_google_ai(openai_message_dict: dict) -> dict:
+    # TODO supports "parts" as part of multimodal support
+    assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported"
+    if openai_message_dict["role"] == "user":
+        google_ai_message_dict = {
+            "role": "user",
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    elif openai_message_dict["role"] == "assistant":
+        google_ai_message_dict = {
+            "role": "model",  # NOTE: diff
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    elif openai_message_dict["role"] == "tool":
+        google_ai_message_dict = {
+            "role": "function",  # NOTE: diff
+            "parts": [{"text": openai_message_dict["content"]}],
+        }
+    else:
+        raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}")
+# TODO convert return type to pydantic
+def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
+    """
+    OpenAI style:
+      "tools": [{
+        "type": "function",
+        "function": {
+            "name": "find_movies",
+            "description": "find ....",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                 PARAM: {
+                   "type": PARAM_TYPE,  # eg "string"
+                   "description": PARAM_DESCRIPTION,
+                 },
+                 ...
+              },
+              "required": List[str],
+            }
+        }
+      }
+      ]
+    Google AI style:
+      "tools": [{
+        "functionDeclarations": [{
+          "name": "find_movies",
+          "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
+          "parameters": {
+            "type": "OBJECT",
+            "properties": {
+              "location": {
+                "type": "STRING",
+                "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
+              },
+              "description": {
+                "type": "STRING",
+                "description": "Any kind of description including category or genre, title words, attributes, etc."
+              }
+            },
+            "required": ["description"]
+          }
+        }, {
+          "name": "find_theaters",
+          ...
+    """
+    function_list = [
+        dict(
+            name=t.function.name,
+            description=t.function.description,
+            parameters=t.function.parameters,  # TODO need to unpack
+        )
+        for t in tools
+    ]
+    # Correct casing + add inner thoughts if needed
+    for func in function_list:
+        func["parameters"]["type"] = "OBJECT"
+        for param_name, param_fields in func["parameters"]["properties"].items():
+            param_fields["type"] = param_fields["type"].upper()
+        # Add inner thoughts
+        if inner_thoughts_in_kwargs:
+            from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
+            func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
+                "type": "STRING",
+                "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
+            }
+            func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
+    return [{"functionDeclarations": function_list}]
+def convert_google_ai_response_to_chatcompletion(
+    response,
+    model: str,  # Required since not returned
+    input_messages: Optional[List[dict]] = None,  # Required if the API doesn't return UsageMetadata
+    pull_inner_thoughts_from_args: Optional[bool] = True,
+) -> ChatCompletionResponse:
+    """Google AI API response format is not the same as ChatCompletion, requires unpacking
+    Example:
+    {
+      "candidates": [
+        {
+          "content": {
+            "parts": [
+              {
+                "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
+              }
+            ]
+          }
+        }
+      ],
+      "usageMetadata": {
+        "promptTokenCount": 9,
+        "candidatesTokenCount": 27,
+        "totalTokenCount": 36
+      }
+    }
+    """
+    try:
+        choices = []
+        index = 0
+        for candidate in response.candidates:
+            content = candidate.content
+            role = content.role
+            assert role == "model", f"Unknown role in response: {role}"
+            parts = content.parts
+            # TODO support parts / multimodal
+            # TODO support parallel tool calling natively
+            # TODO Alternative here is to throw away everything else except for the first part
+            for response_message in parts:
+                # Convert the actual message style to OpenAI style
+                if response_message.function_call:
+                    function_call = response_message.function_call
+                    function_name = function_call.name
+                    function_args = function_call.args
+                    assert isinstance(function_args, dict), function_args
+                    # NOTE: this also involves stripping the inner monologue out of the function
+                    if pull_inner_thoughts_from_args:
+                        from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+                        assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
+                        inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                        assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
+                    else:
+                        inner_thoughts = None
+                    # Google AI API doesn't generate tool call IDs
+                    openai_response_message = Message(
+                        role="assistant",  # NOTE: "model" -> "assistant"
+                        content=inner_thoughts,
+                        tool_calls=[
+                            ToolCall(
+                                id=get_tool_call_id(),
+                                type="function",
+                                function=FunctionCall(
+                                    name=function_name,
+                                    arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
+                                ),
+                            )
+                        ],
+                    )
+                else:
+                    # Inner thoughts are the content by default
+                    inner_thoughts = response_message.text
+                    # Google AI API doesn't generate tool call IDs
+                    openai_response_message = Message(
+                        role="assistant",  # NOTE: "model" -> "assistant"
+                        content=inner_thoughts,
+                    )
+                # Google AI API uses different finish reason strings than OpenAI
+                # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
+                #   see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
+                # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
+                #   see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
+                finish_reason = candidate.finish_reason.value
+                if finish_reason == "STOP":
+                    openai_finish_reason = (
+                        "function_call"
+                        if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
+                        else "stop"
+                    )
+                elif finish_reason == "MAX_TOKENS":
+                    openai_finish_reason = "length"
+                elif finish_reason == "SAFETY":
+                    openai_finish_reason = "content_filter"
+                elif finish_reason == "RECITATION":
+                    openai_finish_reason = "content_filter"
+                else:
+                    raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
+                choices.append(
+                    Choice(
+                        finish_reason=openai_finish_reason,
+                        index=index,
+                        message=openai_response_message,
+                    )
+                )
+                index += 1
+        # if len(choices) > 1:
+        #     raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
+        # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
+        #  "usageMetadata": {
+        #     "promptTokenCount": 9,
+        #     "candidatesTokenCount": 27,
+        #     "totalTokenCount": 36
+        #   }
+        if response.usage_metadata:
+            usage = UsageStatistics(
+                prompt_tokens=response.usage_metadata.prompt_token_count,
+                completion_tokens=response.usage_metadata.candidates_token_count,
+                total_tokens=response.usage_metadata.total_token_count,
+            )
+        else:
+            # Count it ourselves
+            assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
+            prompt_tokens = count_tokens(json_dumps(input_messages))  # NOTE: this is a very rough approximation
+            completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump()))  # NOTE: this is also approximate
+            total_tokens = prompt_tokens + completion_tokens
+            usage = UsageStatistics(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+        response_id = str(uuid.uuid4())
+        return ChatCompletionResponse(
+            id=response_id,
+            choices=choices,
+            model=model,  # NOTE: Google API doesn't pass back model in the response
+            created=get_utc_time(),
+            usage=usage,
+        )
+    except KeyError as e:
+        raise e
+# TODO convert 'data' type to pydantic
+def google_vertex_chat_completions_request(
+    model: str,
+    project_id: str,
+    region: str,
+    contents: List[dict],
+    config: dict,
+    add_postfunc_model_messages: bool = True,
+    # NOTE: Google AI API doesn't support mixing parts 'text' and 'function',
+    # so there's no clean way to put inner thoughts in the same message as a function call
+    inner_thoughts_in_kwargs: bool = True,
+) -> ChatCompletionResponse:
+    """https://ai.google.dev/docs/function_calling
+    From https://ai.google.dev/api/rest#service-endpoint:
+    "A service endpoint is a base URL that specifies the network address of an API service.
+    One service might have multiple service endpoints.
+    This service has the following service endpoint and all URIs below are relative to this service endpoint:
+    https://xxx.googleapis.com
+    """
+    from google import genai
+    client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
+    # add dummy model messages to the end of the input
+    if add_postfunc_model_messages:
+        contents = add_dummy_model_messages(contents)
+    # make request to client
+    response = client.models.generate_content(model=model, contents=contents, config=config)
+    print(response)
+    # convert back response
+    try:
+        return convert_google_ai_response_to_chatcompletion(
+            response=response,
+            model=model,
+            input_messages=contents,
+            pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
+        )
+    except Exception as conversion_error:
+        print(f"Error during response conversion: {conversion_error}")
+        raise conversion_error

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -252,6 +252,32 @@ def create(
             inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
         )
+    elif llm_config.model_endpoint_type == "google_vertex":
+        from letta.llm_api.google_vertex import google_vertex_chat_completions_request
+        if stream:
+            raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
+        if not use_tool_naming:
+            raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
+        if functions is not None:
+            tools = [{"type": "function", "function": f} for f in functions]
+            tools = [Tool(**t) for t in tools]
+            tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
+        else:
+            tools = None
+        config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
+        return google_vertex_chat_completions_request(
+            model=llm_config.model,
+            project_id=model_settings.google_cloud_project,
+            region=model_settings.google_cloud_location,
+            contents=[m.to_google_ai_dict() for m in messages],
+            config=config,
+            inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+        )
     elif llm_config.model_endpoint_type == "anthropic":
         if not use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")

letta/orm/agent.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import uuid
 from typing import TYPE_CHECKING, List, Optional
-from sqlalchemy import JSON, Index, String
+from sqlalchemy import JSON, Boolean, Index, String
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from letta.orm.block import Block
@@ -62,6 +62,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     # Tool rules
     tool_rules: Mapped[Optional[List[ToolRule]]] = mapped_column(ToolRulesColumn, doc="the tool rules for this agent.")
+    # Stateless
+    message_buffer_autoclear: Mapped[bool] = mapped_column(
+        Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
+    )
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
     tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
@@ -146,6 +151,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
             "project_id": self.project_id,
             "template_id": self.template_id,
             "base_template_id": self.base_template_id,
+            "message_buffer_autoclear": self.message_buffer_autoclear,
         }
         return self.__pydantic_model__(**state)

letta-nightly 0.6.24.dev20250212104045__py3-none-any.whl → 0.6.25.dev20250213104102__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.24.dev20250212104045py3-none-any.whl → 0.6.25.dev20250213104102py3-none-any.whl