PyPI - letta-nightly - Versions diffs - 0.7.1.dev20250423104245__py3-none-any.whl → 0.7.3.dev20250424054013__py3-none-any.whl - Mend

letta-nightly 0.7.1.dev20250423104245py3-none-any.whl → 0.7.3.dev20250424054013py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

letta/__init__.py +1 -1
letta/agent.py +2 -1
letta/agents/letta_agent.py +2 -1
letta/agents/letta_agent_batch.py +8 -3
letta/agents/voice_agent.py +2 -2
letta/client/client.py +3 -0
letta/functions/functions.py +2 -1
letta/functions/schema_generator.py +5 -0
letta/helpers/composio_helpers.py +1 -1
letta/helpers/datetime_helpers.py +9 -0
letta/jobs/llm_batch_job_polling.py +2 -1
letta/llm_api/anthropic.py +10 -6
letta/llm_api/anthropic_client.py +7 -6
letta/llm_api/cohere.py +2 -2
letta/llm_api/google_ai_client.py +11 -45
letta/llm_api/google_vertex_client.py +8 -7
letta/llm_api/llm_client.py +8 -14
letta/llm_api/llm_client_base.py +17 -16
letta/llm_api/openai.py +11 -4
letta/llm_api/openai_client.py +47 -14
letta/local_llm/chat_completion_proxy.py +2 -2
letta/memory.py +2 -1
letta/personas/examples/sleeptime_memory_persona.txt +5 -0
letta/schemas/enums.py +3 -0
letta/schemas/letta_message_content.py +2 -1
letta/schemas/llm_config.py +12 -2
letta/schemas/message.py +17 -0
letta/schemas/openai/chat_completion_response.py +52 -3
letta/server/rest_api/chat_completions_interface.py +2 -2
letta/server/rest_api/interface.py +1 -1
letta/server/rest_api/routers/v1/messages.py +9 -1
letta/server/server.py +1 -6
letta/services/agent_manager.py +6 -1
{letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/METADATA +1 -1
{letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/RECORD +38 -38
letta/personas/examples/offline_memory_persona.txt +0 -4
{letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.7.1"
+__version__ = "0.7.3"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -332,13 +332,14 @@ class Agent(BaseAgent):
                 log_telemetry(self.logger, "_get_ai_reply create start")
                 # New LLM client flow
                 llm_client = LLMClient.create(
-                    llm_config=self.agent_state.llm_config,
+                    provider=self.agent_state.llm_config.model_endpoint_type,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
                 if llm_client and not stream:
                     response = llm_client.send_llm_request(
                         messages=message_sequence,
+                        llm_config=self.agent_state.llm_config,
                         tools=allowed_functions,
                         stream=stream,
                         force_tool_call=force_tool_call,

letta/agents/letta_agent.py CHANGED Viewed

@@ -66,7 +66,7 @@ class LettaAgent(BaseAgent):
         )
         tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
         llm_client = LLMClient.create(
-            llm_config=agent_state.llm_config,
+            provider=agent_state.llm_config.model_endpoint_type,
             put_inner_thoughts_first=True,
         )
         for step in range(max_steps):
@@ -182,6 +182,7 @@ class LettaAgent(BaseAgent):
         response = await llm_client.send_llm_request_async(
             messages=in_context_messages,
+            llm_config=agent_state.llm_config,
             tools=allowed_tools,
             force_tool_call=force_tool_call,
             stream=stream,

letta/agents/letta_agent_batch.py CHANGED Viewed

@@ -156,7 +156,7 @@ class LettaAgentBatch:
         log_event(name="init_llm_client")
         llm_client = LLMClient.create(
-            llm_config=agent_states[0].llm_config,
+            provider=agent_states[0].llm_config.model_endpoint_type,
             put_inner_thoughts_first=True,
         )
         agent_llm_config_mapping = {s.id: s.llm_config for s in agent_states}
@@ -272,9 +272,14 @@ class LettaAgentBatch:
             request_status_updates.append(RequestStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, request_status=status))
             # translate provider‑specific response → OpenAI‑style tool call (unchanged)
-            llm_client = LLMClient.create(llm_config=item.llm_config, put_inner_thoughts_first=True)
+            llm_client = LLMClient.create(
+                provider=item.llm_config.model_endpoint_type,
+                put_inner_thoughts_first=True,
+            )
             tool_call = (
-                llm_client.convert_response_to_chat_completion(response_data=pr.message.model_dump(), input_messages=[])
+                llm_client.convert_response_to_chat_completion(
+                    response_data=pr.message.model_dump(), input_messages=[], llm_config=item.llm_config
+                )
                 .choices[0]
                 .message.tool_calls[0]
             )

letta/agents/voice_agent.py CHANGED Viewed

@@ -90,7 +90,7 @@ class VoiceAgent(BaseAgent):
         # )
         self.message_buffer_limit = message_buffer_limit
         # self.message_buffer_min = message_buffer_min
-        self.offline_memory_agent = EphemeralMemoryAgent(
+        self.sleeptime_memory_agent = EphemeralMemoryAgent(
             agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
         )
@@ -372,7 +372,7 @@ class VoiceAgent(BaseAgent):
                 return f"Failed to call tool. Error: {e}", False
     async def _recall_memory(self, query, agent_state: AgentState) -> None:
-        results = await self.offline_memory_agent.step([MessageCreate(role="user", content=[TextContent(text=query)])])
+        results = await self.sleeptime_memory_agent.step([MessageCreate(role="user", content=[TextContent(text=query)])])
         target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
         self.block_manager.update_block(
             block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor

letta/client/client.py CHANGED Viewed

@@ -85,6 +85,7 @@ class AbstractClient(object):
         description: Optional[str] = None,
         tags: Optional[List[str]] = None,
         message_buffer_autoclear: bool = False,
+        response_format: Optional[ResponseFormatUnion] = None,
     ) -> AgentState:
         raise NotImplementedError
@@ -2352,6 +2353,7 @@ class LocalClient(AbstractClient):
         initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
         message_buffer_autoclear: bool = False,
+        response_format: Optional[ResponseFormatUnion] = None,
     ) -> AgentState:
         """Create an agent
@@ -2405,6 +2407,7 @@ class LocalClient(AbstractClient):
             "initial_message_sequence": initial_message_sequence,
             "tags": tags,
             "message_buffer_autoclear": message_buffer_autoclear,
+            "response_format": response_format,
         }
         # Only add name if it's not None

letta/functions/functions.py CHANGED Viewed

@@ -2,7 +2,7 @@ import importlib
 import inspect
 from textwrap import dedent  # remove indentation
 from types import ModuleType
-from typing import Dict, List, Optional
+from typing import Dict, List, Literal, Optional
 from letta.errors import LettaToolCreateError
 from letta.functions.schema_generator import generate_schema
@@ -20,6 +20,7 @@ def derive_openai_json_schema(source_code: str, name: Optional[str] = None) -> d
             "Optional": Optional,
             "List": List,
             "Dict": Dict,
+            "Literal": Literal,
             # To support Pydantic models
             # "BaseModel": BaseModel,
             # "Field": Field,

letta/functions/schema_generator.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Type, Union, get_args, get_origin
 from composio.client.collections import ActionParametersModel
 from docstring_parser import parse
 from pydantic import BaseModel
+from typing_extensions import Literal
 from letta.functions.mcp_client.types import MCPTool
@@ -70,6 +71,10 @@ def type_to_json_schema_type(py_type) -> dict:
             "items": type_to_json_schema_type(args[0]),
         }
+    # Handle literals
+    if get_origin(py_type) is Literal:
+        return {"type": "string", "enum": get_args(py_type)}
     # Handle object types
     if py_type == dict or origin in (dict, Dict):
         args = get_args(py_type)

letta/helpers/composio_helpers.py CHANGED Viewed

@@ -10,7 +10,7 @@ def get_composio_api_key(actor: User, logger: Optional[Logger] = None) -> Option
     api_keys = SandboxConfigManager().list_sandbox_env_vars_by_key(key="COMPOSIO_API_KEY", actor=actor)
     if not api_keys:
         if logger:
-            logger.warning(f"No API keys found for Composio. Defaulting to the environment variable...")
+            logger.debug(f"No API keys found for Composio. Defaulting to the environment variable...")
         if tool_settings.composio_api_key:
             return tool_settings.composio_api_key
         else:

letta/helpers/datetime_helpers.py CHANGED Viewed

@@ -66,6 +66,15 @@ def get_utc_time() -> datetime:
     return datetime.now(timezone.utc)
+def get_utc_time_int() -> int:
+    return int(get_utc_time().timestamp())
+def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
+    """Convert Unix timestamp in seconds to UTC datetime object"""
+    return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 def format_datetime(dt):
     return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")

letta/jobs/llm_batch_job_polling.py CHANGED Viewed

@@ -73,7 +73,8 @@ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: st
     """
     updates = []
     try:
-        async for item_result in server.anthropic_async_client.beta.messages.batches.results(batch_resp_id):
+        results = await server.anthropic_async_client.beta.messages.batches.results(batch_resp_id)
+        async for item_result in results:
             # Here, custom_id should be the agent_id
             item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
             updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))

letta/llm_api/anthropic.py CHANGED Viewed

@@ -20,7 +20,7 @@ from anthropic.types.beta import (
 )
 from letta.errors import BedrockError, BedrockPermissionError
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
 from letta.llm_api.aws_bedrock import get_bedrock_client
 from letta.llm_api.helpers import add_inner_thoughts_to_functions
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion(
     return ChatCompletionResponse(
         id=response.id,
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=response.model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,
@@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
                 'logprobs': None
             }
         ],
-        'created': datetime.datetime(2025, 1, 24, 0, 18, 55, tzinfo=TzInfo(UTC)),
+        'created': 1713216662,
         'model': 'gpt-4o-mini-2024-07-18',
         'system_fingerprint': 'fp_bd83329f63',
         'object': 'chat.completion.chunk'
@@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
     return ChatCompletionChunkResponse(
         id=message_id,
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=model,
         output_tokens=completion_chunk_tokens,
     )
@@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream(
     chat_completion_response = ChatCompletionResponse(
         id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
         choices=[],
-        created=dummy_message.created_at,
+        created=int(dummy_message.created_at.timestamp()),
         model=chat_completion_request.model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,
@@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream(
                     message_type = stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
-                        message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
+                        message_date=(
+                            timestamp_to_datetime(chat_completion_response.created)
+                            if create_message_datetime
+                            else timestamp_to_datetime(chat_completion_chunk.created)
+                        ),
                         # if extended_thinking is on, then reasoning_content will be flowing as chunks
                         # TODO handle emitting redacted reasoning content (e.g. as concat?)
                         expect_reasoning_content=extended_thinking,

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -22,7 +22,7 @@ from letta.errors import (
     LLMServerError,
     LLMUnprocessableEntityError,
 )
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
@@ -43,18 +43,18 @@ logger = get_logger(__name__)
 class AnthropicClient(LLMClientBase):
-    def request(self, request_data: dict) -> dict:
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
         client = self._get_anthropic_client(async_client=False)
         response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
         return response.model_dump()
-    async def request_async(self, request_data: dict) -> dict:
+    async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
         client = self._get_anthropic_client(async_client=True)
         response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
         return response.model_dump()
     @trace_method
-    async def stream_async(self, request_data: dict) -> AsyncStream[BetaRawMessageStreamEvent]:
+    async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
         client = self._get_anthropic_client(async_client=True)
         request_data["stream"] = True
         return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
@@ -310,6 +310,7 @@ class AnthropicClient(LLMClientBase):
         self,
         response_data: dict,
         input_messages: List[PydanticMessage],
+        llm_config: LLMConfig,
     ) -> ChatCompletionResponse:
         """
         Example response from Claude 3:
@@ -403,7 +404,7 @@ class AnthropicClient(LLMClientBase):
         chat_completion_response = ChatCompletionResponse(
             id=response.id,
             choices=[choice],
-            created=get_utc_time(),
+            created=get_utc_time_int(),
             model=response.model,
             usage=UsageStatistics(
                 prompt_tokens=prompt_tokens,
@@ -411,7 +412,7 @@ class AnthropicClient(LLMClientBase):
                 total_tokens=prompt_tokens + completion_tokens,
             ),
         )
-        if self.llm_config.put_inner_thoughts_in_kwargs:
+        if llm_config.put_inner_thoughts_in_kwargs:
             chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
                 response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
             )

letta/llm_api/cohere.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import List, Optional, Union
 import requests
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.local_llm.utils import count_tokens
 from letta.schemas.message import Message
@@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion(
     return ChatCompletionResponse(
         id=response_json["response_id"],
         choices=[choice],
-        created=get_utc_time(),
+        created=get_utc_time_int(),
         model=model,
         usage=UsageStatistics(
             prompt_tokens=prompt_tokens,

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ import requests
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
 from letta.constants import NON_USER_MSG_PREFIX
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.llm_api.helpers import make_post_request
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -25,15 +25,15 @@ logger = get_logger(__name__)
 class GoogleAIClient(LLMClientBase):
-    def request(self, request_data: dict) -> dict:
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
         """
         Performs underlying request to llm and returns raw response.
         """
         # print("[google_ai request]", json.dumps(request_data, indent=2))
         url, headers = get_gemini_endpoint_and_headers(
-            base_url=str(self.llm_config.model_endpoint),
-            model=self.llm_config.model,
+            base_url=str(llm_config.model_endpoint),
+            model=llm_config.model,
             api_key=str(model_settings.gemini_api_key),
             key_in_header=True,
             generate_content=True,
@@ -55,7 +55,7 @@ class GoogleAIClient(LLMClientBase):
             tool_objs = [Tool(**t) for t in tools]
             tool_names = [t.function.name for t in tool_objs]
             # Convert to the exact payload style Google expects
-            tools = self.convert_tools_to_google_ai_format(tool_objs)
+            tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
         else:
             tool_names = []
@@ -88,6 +88,7 @@ class GoogleAIClient(LLMClientBase):
         self,
         response_data: dict,
         input_messages: List[PydanticMessage],
+        llm_config: LLMConfig,
     ) -> ChatCompletionResponse:
         """
         Converts custom response format from llm client into an OpenAI
@@ -150,7 +151,7 @@ class GoogleAIClient(LLMClientBase):
                         assert isinstance(function_args, dict), function_args
                         # NOTE: this also involves stripping the inner monologue out of the function
-                        if self.llm_config.put_inner_thoughts_in_kwargs:
+                        if llm_config.put_inner_thoughts_in_kwargs:
                             from letta.local_llm.constants import INNER_THOUGHTS_KWARG
                             assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
@@ -259,49 +260,14 @@ class GoogleAIClient(LLMClientBase):
             return ChatCompletionResponse(
                 id=response_id,
                 choices=choices,
-                model=self.llm_config.model,  # NOTE: Google API doesn't pass back model in the response
-                created=get_utc_time(),
+                model=llm_config.model,  # NOTE: Google API doesn't pass back model in the response
+                created=get_utc_time_int(),
                 usage=usage,
             )
         except KeyError as e:
             raise e
-    def _clean_google_ai_schema_properties(self, schema_part: dict):
-        """Recursively clean schema parts to remove unsupported Google AI keywords."""
-        if not isinstance(schema_part, dict):
-            return
-        # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
-        # * Only a subset of the OpenAPI schema is supported.
-        # * Supported parameter types in Python are limited.
-        unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum"]
-        keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
-        for key_to_remove in keys_to_remove_at_this_level:
-            logger.warning(f"Removing unsupported keyword 	'{key_to_remove}' from schema part.")
-            del schema_part[key_to_remove]
-        if schema_part.get("type") == "string" and "format" in schema_part:
-            allowed_formats = ["enum", "date-time"]
-            if schema_part["format"] not in allowed_formats:
-                logger.warning(f"Removing unsupported format 	'{schema_part['format']}' for string type. Allowed: {allowed_formats}")
-                del schema_part["format"]
-        # Check properties within the current level
-        if "properties" in schema_part and isinstance(schema_part["properties"], dict):
-            for prop_name, prop_schema in schema_part["properties"].items():
-                self._clean_google_ai_schema_properties(prop_schema)
-        # Check items within arrays
-        if "items" in schema_part and isinstance(schema_part["items"], dict):
-            self._clean_google_ai_schema_properties(schema_part["items"])
-        # Check within anyOf, allOf, oneOf lists
-        for key in ["anyOf", "allOf", "oneOf"]:
-            if key in schema_part and isinstance(schema_part[key], list):
-                for item_schema in schema_part[key]:
-                    self._clean_google_ai_schema_properties(item_schema)
-    def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
+    def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
         """
         OpenAI style:
         "tools": [{
@@ -365,7 +331,7 @@ class GoogleAIClient(LLMClientBase):
                 self._clean_google_ai_schema_properties(func["parameters"])
             # Add inner thoughts
-            if self.llm_config.put_inner_thoughts_in_kwargs:
+            if llm_config.put_inner_thoughts_in_kwargs:
                 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
                 func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import List, Optional
 from google import genai
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
-from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps
 from letta.llm_api.google_ai_client import GoogleAIClient
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
@@ -18,7 +18,7 @@ from letta.utils import get_tool_call_id
 class GoogleVertexClient(GoogleAIClient):
-    def request(self, request_data: dict) -> dict:
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
         """
         Performs underlying request to llm and returns raw response.
         """
@@ -29,7 +29,7 @@ class GoogleVertexClient(GoogleAIClient):
             http_options={"api_version": "v1"},
         )
         response = client.models.generate_content(
-            model=self.llm_config.model,
+            model=llm_config.model,
             contents=request_data["contents"],
             config=request_data["config"],
         )
@@ -45,7 +45,7 @@ class GoogleVertexClient(GoogleAIClient):
         """
         Constructs a request object in the expected data format for this client.
         """
-        request_data = super().build_request_data(messages, self.llm_config, tools, force_tool_call)
+        request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
         request_data["config"] = request_data.pop("generation_config")
         request_data["config"]["tools"] = request_data.pop("tools")
@@ -75,6 +75,7 @@ class GoogleVertexClient(GoogleAIClient):
         self,
         response_data: dict,
         input_messages: List[PydanticMessage],
+        llm_config: LLMConfig,
     ) -> ChatCompletionResponse:
         """
         Converts custom response format from llm client into an OpenAI
@@ -136,7 +137,7 @@ class GoogleVertexClient(GoogleAIClient):
                         assert isinstance(function_args, dict), function_args
                         # NOTE: this also involves stripping the inner monologue out of the function
-                        if self.llm_config.put_inner_thoughts_in_kwargs:
+                        if llm_config.put_inner_thoughts_in_kwargs:
                             from letta.local_llm.constants import INNER_THOUGHTS_KWARG
                             assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
@@ -233,8 +234,8 @@ class GoogleVertexClient(GoogleAIClient):
             return ChatCompletionResponse(
                 id=response_id,
                 choices=choices,
-                model=self.llm_config.model,  # NOTE: Google API doesn't pass back model in the response
-                created=get_utc_time(),
+                model=llm_config.model,  # NOTE: Google API doesn't pass back model in the response
+                created=get_utc_time_int(),
                 usage=usage,
             )
         except KeyError as e:

letta/llm_api/llm_client.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional
 from letta.llm_api.llm_client_base import LLMClientBase
-from letta.schemas.llm_config import LLMConfig
+from letta.schemas.enums import ProviderType
 class LLMClient:
@@ -9,17 +9,15 @@ class LLMClient:
     @staticmethod
     def create(
-        llm_config: LLMConfig,
+        provider: ProviderType,
         put_inner_thoughts_first: bool = True,
     ) -> Optional[LLMClientBase]:
         """
         Create an LLM client based on the model endpoint type.
         Args:
-            llm_config: Configuration for the LLM model
+            provider: The model endpoint type
             put_inner_thoughts_first: Whether to put inner thoughts first in the response
-            use_structured_output: Whether to use structured output
-            use_tool_naming: Whether to use tool naming
         Returns:
             An instance of LLMClientBase subclass
@@ -27,33 +25,29 @@ class LLMClient:
         Raises:
             ValueError: If the model endpoint type is not supported
         """
-        match llm_config.model_endpoint_type:
-            case "google_ai":
+        match provider:
+            case ProviderType.google_ai:
                 from letta.llm_api.google_ai_client import GoogleAIClient
                 return GoogleAIClient(
-                    llm_config=llm_config,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
-            case "google_vertex":
+            case ProviderType.google_vertex:
                 from letta.llm_api.google_vertex_client import GoogleVertexClient
                 return GoogleVertexClient(
-                    llm_config=llm_config,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
-            case "anthropic":
+            case ProviderType.anthropic:
                 from letta.llm_api.anthropic_client import AnthropicClient
                 return AnthropicClient(
-                    llm_config=llm_config,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
-            case "openai":
+            case ProviderType.openai:
                 from letta.llm_api.openai_client import OpenAIClient
                 return OpenAIClient(
-                    llm_config=llm_config,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
             case _:

letta-nightly 0.7.1.dev20250423104245__py3-none-any.whl → 0.7.3.dev20250424054013__py3-none-any.whl

letta-nightly 0.7.1.dev20250423104245py3-none-any.whl → 0.7.3.dev20250424054013py3-none-any.whl