PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241004104123__py3-none-any.whl → 0.4.1.dev20241005104008__py3-none-any.whl - Mend

letta-nightly 0.4.1.dev20241004104123py3-none-any.whl → 0.4.1.dev20241005104008py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (34) hide show

letta/cli/cli.py +30 -365
letta/cli/cli_config.py +70 -27
letta/client/client.py +103 -11
letta/config.py +80 -80
letta/constants.py +6 -0
letta/credentials.py +10 -1
letta/errors.py +63 -5
letta/llm_api/llm_api_tools.py +110 -52
letta/local_llm/chat_completion_proxy.py +0 -3
letta/main.py +1 -2
letta/metadata.py +12 -0
letta/providers.py +232 -0
letta/schemas/block.py +1 -1
letta/schemas/letta_request.py +17 -0
letta/schemas/letta_response.py +11 -0
letta/schemas/llm_config.py +18 -2
letta/schemas/message.py +40 -13
letta/server/rest_api/app.py +5 -0
letta/server/rest_api/interface.py +115 -24
letta/server/rest_api/routers/v1/agents.py +36 -3
letta/server/rest_api/routers/v1/llms.py +6 -2
letta/server/server.py +60 -87
letta/server/static_files/assets/index-3ab03d5b.css +1 -0
letta/server/static_files/assets/{index-4d08d8a3.js → index-9a9c449b.js} +69 -69
letta/server/static_files/index.html +2 -2
letta/settings.py +144 -114
letta/utils.py +6 -1
{letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/METADATA +1 -1
{letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/RECORD +32 -32
letta/local_llm/groq/api.py +0 -97
letta/server/static_files/assets/index-156816da.css +0 -1
{letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/LICENSE +0 -0
{letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/WHEEL +0 -0
{letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/entry_points.txt +0 -0

letta/schemas/message.py CHANGED Viewed

@@ -6,11 +6,16 @@ from typing import List, Optional
 from pydantic import Field, field_validator
-from letta.constants import TOOL_CALL_ID_MAX_LEN
+from letta.constants import (
+    DEFAULT_MESSAGE_TOOL,
+    DEFAULT_MESSAGE_TOOL_KWARG,
+    TOOL_CALL_ID_MAX_LEN,
+)
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_base import LettaBase
 from letta.schemas.letta_message import (
+    AssistantMessage,
     FunctionCall,
     FunctionCallMessage,
     FunctionReturn,
@@ -122,7 +127,12 @@ class Message(BaseMessage):
         json_message["created_at"] = self.created_at.isoformat()
         return json_message
-    def to_letta_message(self) -> List[LettaMessage]:
+    def to_letta_message(
+        self,
+        assistant_message: bool = False,
+        assistant_message_function_name: str = DEFAULT_MESSAGE_TOOL,
+        assistant_message_function_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+    ) -> List[LettaMessage]:
         """Convert message object (in DB format) to the style used by the original Letta API"""
         messages = []
@@ -140,16 +150,33 @@ class Message(BaseMessage):
             if self.tool_calls is not None:
                 # This is type FunctionCall
                 for tool_call in self.tool_calls:
-                    messages.append(
-                        FunctionCallMessage(
-                            id=self.id,
-                            date=self.created_at,
-                            function_call=FunctionCall(
-                                name=tool_call.function.name,
-                                arguments=tool_call.function.arguments,
-                            ),
+                    # If we're supporting using assistant message,
+                    # then we want to treat certain function calls as a special case
+                    if assistant_message and tool_call.function.name == assistant_message_function_name:
+                        # We need to unpack the actual message contents from the function call
+                        try:
+                            func_args = json.loads(tool_call.function.arguments)
+                            message_string = func_args[DEFAULT_MESSAGE_TOOL_KWARG]
+                        except KeyError:
+                            raise ValueError(f"Function call {tool_call.function.name} missing {DEFAULT_MESSAGE_TOOL_KWARG} argument")
+                        messages.append(
+                            AssistantMessage(
+                                id=self.id,
+                                date=self.created_at,
+                                assistant_message=message_string,
+                            )
+                        )
+                    else:
+                        messages.append(
+                            FunctionCallMessage(
+                                id=self.id,
+                                date=self.created_at,
+                                function_call=FunctionCall(
+                                    name=tool_call.function.name,
+                                    arguments=tool_call.function.arguments,
+                                ),
+                            )
                         )
-                    )
         elif self.role == MessageRole.tool:
             # This is type FunctionReturn
             # Try to interpret the function return, recall that this is how we packaged:
@@ -560,8 +587,8 @@ class Message(BaseMessage):
             if self.tool_calls is not None:
                 # NOTE: implied support for multiple calls
                 for tool_call in self.tool_calls:
-                    function_name = tool_call.function["name"]
-                    function_args = tool_call.function["arguments"]
+                    function_name = tool_call.function.name
+                    function_args = tool_call.function.arguments
                     try:
                         # NOTE: Google AI wants actual JSON objects, not strings
                         function_args = json.loads(function_args)

letta/server/rest_api/app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import logging
+import sys
 from pathlib import Path
 from typing import Optional
@@ -71,6 +72,10 @@ def create_application() -> "FastAPI":
         summary="Create LLM agents with long-term memory and custom tools 📚🦙",
         version="1.0.0",  # TODO wire this up to the version in the package
     )
+    if "--ade" in sys.argv:
+        settings.cors_origins.append("https://app.letta.com")
     app.add_middleware(
         CORSMiddleware,
         allow_origins=settings.cors_origins,

letta/server/rest_api/interface.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import asyncio
 import json
 import queue
+import warnings
 from collections import deque
 from datetime import datetime
 from typing import AsyncGenerator, Literal, Optional, Union
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.interface import AgentInterface
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import (
@@ -249,7 +251,7 @@ class QueuingInterface(AgentInterface):
 class FunctionArgumentsStreamHandler:
     """State machine that can process a stream of"""
-    def __init__(self, json_key="message"):
+    def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
         self.json_key = json_key
         self.reset()
@@ -311,7 +313,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     should maintain multiple generators and index them with the request ID
     """
-    def __init__(self, multi_step=True):
+    def __init__(
+        self,
+        multi_step=True,
+        use_assistant_message=False,
+        assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
+        assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
+    ):
         # If streaming mode, ignores base interface calls like .assistant_message, etc
         self.streaming_mode = False
         # NOTE: flag for supporting legacy 'stream' flag where send_message is treated specially
@@ -321,7 +329,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         self.streaming_chat_completion_mode_function_name = None  # NOTE: sadly need to track state during stream
         # If chat completion mode, we need a special stream reader to
         # turn function argument to send_message into a normal text stream
-        self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler()
+        self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_function_kwarg)
         self._chunks = deque()
         self._event = asyncio.Event()  # Use an event to notify when chunks are available
@@ -333,6 +341,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         self.multi_step_indicator = MessageStreamStatus.done_step
         self.multi_step_gen_indicator = MessageStreamStatus.done_generation
+        # Support for AssistantMessage
+        self.use_assistant_message = use_assistant_message
+        self.assistant_message_function_name = assistant_message_function_name
+        self.assistant_message_function_kwarg = assistant_message_function_kwarg
         # extra prints
         self.debug = False
         self.timeout = 30
@@ -441,7 +454,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def _process_chunk_to_letta_style(
         self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime
-    ) -> Optional[Union[InternalMonologue, FunctionCallMessage]]:
+    ) -> Optional[Union[InternalMonologue, FunctionCallMessage, AssistantMessage]]:
         """
         Example data from non-streaming response looks like:
@@ -461,23 +474,83 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 date=message_date,
                 internal_monologue=message_delta.content,
             )
+        # tool calls
         elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
             tool_call = message_delta.tool_calls[0]
-            tool_call_delta = {}
-            if tool_call.id:
-                tool_call_delta["id"] = tool_call.id
-            if tool_call.function:
-                if tool_call.function.arguments:
-                    tool_call_delta["arguments"] = tool_call.function.arguments
+            # special case for trapping `send_message`
+            if self.use_assistant_message and tool_call.function:
+                # If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
+                # Track the function name while streaming
+                # If we were previously on a 'send_message', we need to 'toggle' into 'content' mode
                 if tool_call.function.name:
-                    tool_call_delta["name"] = tool_call.function.name
+                    if self.streaming_chat_completion_mode_function_name is None:
+                        self.streaming_chat_completion_mode_function_name = tool_call.function.name
+                    else:
+                        self.streaming_chat_completion_mode_function_name += tool_call.function.name
+                # If we get a "hit" on the special keyword we're looking for, we want to skip to the next chunk
+                # TODO I don't think this handles the function name in multi-pieces problem. Instead, we should probably reset the streaming_chat_completion_mode_function_name when we make this hit?
+                # if self.streaming_chat_completion_mode_function_name == self.assistant_message_function_name:
+                if tool_call.function.name == self.assistant_message_function_name:
+                    self.streaming_chat_completion_json_reader.reset()
+                    # early exit to turn into content mode
+                    return None
+                # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
+                if (
+                    tool_call.function.arguments
+                    and self.streaming_chat_completion_mode_function_name == self.assistant_message_function_name
+                ):
+                    # Strip out any extras tokens
+                    cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
+                    # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
+                    if cleaned_func_args is None:
+                        return None
+                    else:
+                        processed_chunk = AssistantMessage(
+                            id=message_id,
+                            date=message_date,
+                            assistant_message=cleaned_func_args,
+                        )
+                # otherwise we just do a regular passthrough of a FunctionCallDelta via a FunctionCallMessage
+                else:
+                    tool_call_delta = {}
+                    if tool_call.id:
+                        tool_call_delta["id"] = tool_call.id
+                    if tool_call.function:
+                        if tool_call.function.arguments:
+                            tool_call_delta["arguments"] = tool_call.function.arguments
+                        if tool_call.function.name:
+                            tool_call_delta["name"] = tool_call.function.name
+                    processed_chunk = FunctionCallMessage(
+                        id=message_id,
+                        date=message_date,
+                        function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
+                    )
+            else:
+                tool_call_delta = {}
+                if tool_call.id:
+                    tool_call_delta["id"] = tool_call.id
+                if tool_call.function:
+                    if tool_call.function.arguments:
+                        tool_call_delta["arguments"] = tool_call.function.arguments
+                    if tool_call.function.name:
+                        tool_call_delta["name"] = tool_call.function.name
+                processed_chunk = FunctionCallMessage(
+                    id=message_id,
+                    date=message_date,
+                    function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
+                )
-            processed_chunk = FunctionCallMessage(
-                id=message_id,
-                date=message_date,
-                function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
-            )
         elif choice.finish_reason is not None:
             # skip if there's a finish
             return None
@@ -663,14 +736,32 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 else:
-                    processed_chunk = FunctionCallMessage(
-                        id=msg_obj.id,
-                        date=msg_obj.created_at,
-                        function_call=FunctionCall(
-                            name=function_call.function.name,
-                            arguments=function_call.function.arguments,
-                        ),
-                    )
+                    try:
+                        func_args = json.loads(function_call.function.arguments)
+                    except:
+                        warnings.warn(f"Failed to parse function arguments: {function_call.function.arguments}")
+                        func_args = {}
+                    if (
+                        self.use_assistant_message
+                        and function_call.function.name == self.assistant_message_function_name
+                        and self.assistant_message_function_kwarg in func_args
+                    ):
+                        processed_chunk = AssistantMessage(
+                            id=msg_obj.id,
+                            date=msg_obj.created_at,
+                            assistant_message=func_args[self.assistant_message_function_kwarg],
+                        )
+                    else:
+                        processed_chunk = FunctionCallMessage(
+                            id=msg_obj.id,
+                            date=msg_obj.created_at,
+                            function_call=FunctionCall(
+                                name=function_call.function.name,
+                                arguments=function_call.function.arguments,
+                            ),
+                        )
                     # processed_chunk = {
                     #     "function_call": {
                     #         "name": function_call.function.name,

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -6,6 +6,7 @@ from fastapi import APIRouter, Body, Depends, HTTPException, Query, status
 from fastapi.responses import JSONResponse, StreamingResponse
 from starlette.responses import StreamingResponse
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
 from letta.schemas.enums import MessageRole, MessageStreamStatus
 from letta.schemas.letta_message import (
@@ -254,6 +255,19 @@ def get_agent_messages(
     before: Optional[str] = Query(None, description="Message before which to retrieve the returned messages."),
     limit: int = Query(10, description="Maximum number of messages to retrieve."),
     msg_object: bool = Query(False, description="If true, returns Message objects. If false, return LettaMessage objects."),
+    # Flags to support the use of AssistantMessage message types
+    use_assistant_message: bool = Query(
+        False,
+        description="[Only applicable if msg_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
+    ),
+    assistant_message_function_name: str = Query(
+        DEFAULT_MESSAGE_TOOL,
+        description="[Only applicable if use_assistant_message is True] The name of the designated message tool.",
+    ),
+    assistant_message_function_kwarg: str = Query(
+        DEFAULT_MESSAGE_TOOL_KWARG,
+        description="[Only applicable if use_assistant_message is True] The name of the message argument in the designated message tool.",
+    ),
 ):
     """
     Retrieve message history for an agent.
@@ -267,6 +281,9 @@ def get_agent_messages(
         limit=limit,
         reverse=True,
         return_message_object=msg_object,
+        use_assistant_message=use_assistant_message,
+        assistant_message_function_name=assistant_message_function_name,
+        assistant_message_function_kwarg=assistant_message_function_kwarg,
     )
@@ -310,6 +327,10 @@ async def send_message(
         stream_steps=request.stream_steps,
         stream_tokens=request.stream_tokens,
         return_message_object=request.return_message_object,
+        # Support for AssistantMessage
+        use_assistant_message=request.use_assistant_message,
+        assistant_message_function_name=request.assistant_message_function_name,
+        assistant_message_function_kwarg=request.assistant_message_function_kwarg,
     )
@@ -322,12 +343,17 @@ async def send_message_to_agent(
     message: str,
     stream_steps: bool,
     stream_tokens: bool,
+    # related to whether or not we return `LettaMessage`s or `Message`s
     return_message_object: bool,  # Should be True for Python Client, False for REST API
-    chat_completion_mode: Optional[bool] = False,
+    chat_completion_mode: bool = False,
     timestamp: Optional[datetime] = None,
-    # related to whether or not we return `LettaMessage`s or `Message`s
+    # Support for AssistantMessage
+    use_assistant_message: bool = False,
+    assistant_message_function_name: str = DEFAULT_MESSAGE_TOOL,
+    assistant_message_function_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
 ) -> Union[StreamingResponse, LettaResponse]:
     """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
     # TODO: @charles is this the correct way to handle?
     include_final_message = True
@@ -356,7 +382,8 @@ async def send_message_to_agent(
         # Disable token streaming if not OpenAI
         # TODO: cleanup this logic
-        if server.server_llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in server.server_llm_config.model_endpoint:
+        llm_config = letta_agent.agent_state.llm_config
+        if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
             print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
             stream_tokens = False
@@ -368,6 +395,11 @@ async def send_message_to_agent(
         # streaming_interface.allow_assistant_message = stream
         # streaming_interface.function_call_legacy_mode = stream
+        # Allow AssistantMessage is desired by client
+        streaming_interface.use_assistant_message = use_assistant_message
+        streaming_interface.assistant_message_function_name = assistant_message_function_name
+        streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
         # Offload the synchronous message_func to a separate thread
         streaming_interface.stream_start()
         task = asyncio.create_task(
@@ -408,6 +440,7 @@ async def send_message_to_agent(
                 message_ids = [m.id for m in filtered_stream]
                 message_ids = deduplicate(message_ids)
                 message_objs = [server.get_agent_message(agent_id=agent_id, message_id=m_id) for m_id in message_ids]
+                message_objs = [m for m in message_objs if m is not None]
                 return LettaResponse(messages=message_objs, usage=usage)
             else:
                 return LettaResponse(messages=filtered_stream, usage=usage)

letta/server/rest_api/routers/v1/llms.py CHANGED Viewed

@@ -17,7 +17,9 @@ def list_llm_backends(
     server: "SyncServer" = Depends(get_letta_server),
 ):
-    return server.list_models()
+    models = server.list_llm_models()
+    print(models)
+    return models
 @router.get("/embedding", response_model=List[EmbeddingConfig], operation_id="list_embedding_models")
@@ -25,4 +27,6 @@ def list_embedding_backends(
     server: "SyncServer" = Depends(get_letta_server),
 ):
-    return server.list_embedding_models()
+    models = server.list_embedding_models()
+    print(models)
+    return models

letta/server/server.py CHANGED Viewed

@@ -15,7 +15,6 @@ import letta.server.utils as server_utils
 import letta.system as system
 from letta.agent import Agent, save_agent
 from letta.agent_store.storage import StorageConnector, TableType
-from letta.cli.cli_config import get_model_options
 from letta.config import LettaConfig
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data
@@ -44,6 +43,13 @@ from letta.log import get_logger
 from letta.memory import get_memory_functions
 from letta.metadata import MetadataStore
 from letta.prompts import gpt_system
+from letta.providers import (
+    AnthropicProvider,
+    GoogleAIProvider,
+    OllamaProvider,
+    OpenAIProvider,
+    VLLMProvider,
+)
 from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
 from letta.schemas.api_key import APIKey, APIKeyCreate
 from letta.schemas.block import (
@@ -158,7 +164,7 @@ from letta.metadata import (
     ToolModel,
     UserModel,
 )
-from letta.settings import settings
+from letta.settings import model_settings, settings
 config = LettaConfig.load()
@@ -234,51 +240,9 @@ class SyncServer(Server):
         # The default interface that will get assigned to agents ON LOAD
         self.default_interface_factory = default_interface_factory
-        # self.default_interface = default_interface
-        # self.default_interface = default_interface_cls()
-        # Initialize the connection to the DB
-        # try:
-        #    self.config = LettaConfig.load()
-        #    assert self.config.default_llm_config is not None, "default_llm_config must be set in the config"
-        #    assert self.config.default_embedding_config is not None, "default_embedding_config must be set in the config"
-        # except Exception as e:
-        #    # TODO: very hacky - need to improve model config for docker container
-        #    if os.getenv("OPENAI_API_KEY") is None:
-        #        logger.error("No OPENAI_API_KEY environment variable set and no ~/.letta/config")
-        #        raise e
-        #    from letta.cli.cli import QuickstartChoice, quickstart
-        #    quickstart(backend=QuickstartChoice.openai, debug=False, terminal=False, latest=False)
-        #    self.config = LettaConfig.load()
-        #    self.config.save()
-        # TODO figure out how to handle credentials for the server
         self.credentials = LettaCredentials.load()
-        # Generate default LLM/Embedding configs for the server
-        # TODO: we may also want to do the same thing with default persona/human/etc.
-        self.server_llm_config = settings.llm_config
-        self.server_embedding_config = settings.embedding_config
-        # self.server_llm_config = LLMConfig(
-        #    model=self.config.default_llm_config.model,
-        #    model_endpoint_type=self.config.default_llm_config.model_endpoint_type,
-        #    model_endpoint=self.config.default_llm_config.model_endpoint,
-        #    model_wrapper=self.config.default_llm_config.model_wrapper,
-        #    context_window=self.config.default_llm_config.context_window,
-        # )
-        # self.server_embedding_config = EmbeddingConfig(
-        #    embedding_endpoint_type=self.config.default_embedding_config.embedding_endpoint_type,
-        #    embedding_endpoint=self.config.default_embedding_config.embedding_endpoint,
-        #    embedding_dim=self.config.default_embedding_config.embedding_dim,
-        #    embedding_model=self.config.default_embedding_config.embedding_model,
-        #    embedding_chunk_size=self.config.default_embedding_config.embedding_chunk_size,
-        # )
-        assert self.server_embedding_config.embedding_model is not None, vars(self.server_embedding_config)
-        # Override config values with settings
         # Initialize the metadata store
         config = LettaConfig.load()
         if settings.letta_pg_uri_no_default:
@@ -286,8 +250,6 @@ class SyncServer(Server):
             config.recall_storage_uri = settings.letta_pg_uri_no_default
             config.archival_storage_type = "postgres"
             config.archival_storage_uri = settings.letta_pg_uri_no_default
-        config.default_llm_config = self.server_llm_config
-        config.default_embedding_config = self.server_embedding_config
         config.save()
         self.config = config
         self.ms = MetadataStore(self.config)
@@ -296,6 +258,19 @@ class SyncServer(Server):
         # add global default tools (for admin)
         self.add_default_tools(module_name="base")
+        # collect providers
+        self._enabled_providers = []
+        if model_settings.openai_api_key:
+            self._enabled_providers.append(OpenAIProvider(api_key=model_settings.openai_api_key))
+        if model_settings.anthropic_api_key:
+            self._enabled_providers.append(AnthropicProvider(api_key=model_settings.anthropic_api_key))
+        if model_settings.ollama_base_url:
+            self._enabled_providers.append(OllamaProvider(base_url=model_settings.ollama_base_url))
+        if model_settings.vllm_base_url:
+            self._enabled_providers.append(VLLMProvider(base_url=model_settings.vllm_base_url))
+        if model_settings.gemini_api_key:
+            self._enabled_providers.append(GoogleAIProvider(api_key=model_settings.gemini_api_key))
     def save_agents(self):
         """Saves all the agents that are in the in-memory object store"""
         for agent_d in self.active_agents:
@@ -456,7 +431,7 @@ class SyncServer(Server):
             logger.debug("Calling step_yield()")
             letta_agent.interface.step_yield()
-        return LettaUsageStatistics(**total_usage.dict(), step_count=step_count)
+        return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
     def _command(self, user_id: str, agent_id: str, command: str) -> LettaUsageStatistics:
         """Process a CLI command"""
@@ -766,8 +741,8 @@ class SyncServer(Server):
         try:
             # model configuration
-            llm_config = request.llm_config if request.llm_config else self.server_llm_config
-            embedding_config = request.embedding_config if request.embedding_config else self.server_embedding_config
+            llm_config = request.llm_config
+            embedding_config = request.embedding_config
             # get tools + make sure they exist
             tool_objs = []
@@ -1262,6 +1237,9 @@ class SyncServer(Server):
         order: Optional[str] = "asc",
         reverse: Optional[bool] = False,
         return_message_object: bool = True,
+        use_assistant_message: bool = False,
+        assistant_message_function_name: str = constants.DEFAULT_MESSAGE_TOOL,
+        assistant_message_function_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
     ) -> Union[List[Message], List[LettaMessage]]:
         if self.ms.get_user(user_id=user_id) is None:
             raise ValueError(f"User user_id={user_id} does not exist")
@@ -1281,9 +1259,25 @@ class SyncServer(Server):
         if not return_message_object:
             # If we're GETing messages in reverse, we need to reverse the inner list (generated by to_letta_message)
             if reverse:
-                records = [msg for m in records for msg in m.to_letta_message()[::-1]]
+                records = [
+                    msg
+                    for m in records
+                    for msg in m.to_letta_message(
+                        assistant_message=use_assistant_message,
+                        assistant_message_function_name=assistant_message_function_name,
+                        assistant_message_function_kwarg=assistant_message_function_kwarg,
+                    )[::-1]
+                ]
             else:
-                records = [msg for m in records for msg in m.to_letta_message()]
+                records = [
+                    msg
+                    for m in records
+                    for msg in m.to_letta_message(
+                        assistant_message=use_assistant_message,
+                        assistant_message_function_name=assistant_message_function_name,
+                        assistant_message_function_kwarg=assistant_message_function_kwarg,
+                    )
+                ]
         return records
@@ -1320,39 +1314,15 @@ class SyncServer(Server):
         base_config = vars(self.config)
         clean_base_config = clean_keys(base_config)
-        clean_base_config_default_llm_config_dict = vars(clean_base_config["default_llm_config"])
-        clean_base_config_default_embedding_config_dict = vars(clean_base_config["default_embedding_config"])
-        clean_base_config["default_llm_config"] = clean_base_config_default_llm_config_dict
-        clean_base_config["default_embedding_config"] = clean_base_config_default_embedding_config_dict
         response = {"config": clean_base_config}
         if include_defaults:
             default_config = vars(LettaConfig())
             clean_default_config = clean_keys(default_config)
-            clean_default_config["default_llm_config"] = clean_base_config_default_llm_config_dict
-            clean_default_config["default_embedding_config"] = clean_base_config_default_embedding_config_dict
             response["defaults"] = clean_default_config
         return response
-    def get_available_models(self) -> List[LLMConfig]:
-        """Poll the LLM endpoint for a list of available models"""
-        credentials = LettaCredentials().load()
-        try:
-            model_options = get_model_options(
-                credentials=credentials,
-                model_endpoint_type=self.config.default_llm_config.model_endpoint_type,
-                model_endpoint=self.config.default_llm_config.model_endpoint,
-            )
-            return model_options
-        except Exception as e:
-            logger.exception(f"Failed to get list of available models from LLM endpoint:\n{str(e)}")
-            raise
     def update_agent_core_memory(self, user_id: str, agent_id: str, new_memory_contents: dict) -> Memory:
         """Update the agents core memory block, return the new state"""
         if self.ms.get_user(user_id=user_id) is None:
@@ -1472,7 +1442,7 @@ class SyncServer(Server):
         source = Source(
             name=request.name,
             user_id=user_id,
-            embedding_config=self.config.default_embedding_config,
+            embedding_config=self.list_embedding_models()[0],  # TODO: require providing this
         )
         self.ms.create_source(source)
         assert self.ms.get_source(source_name=request.name, user_id=user_id) is not None, f"Failed to create source {request.name}"
@@ -1970,20 +1940,23 @@ class SyncServer(Server):
         return self.get_default_user()
-    def list_models(self) -> List[LLMConfig]:
+    def list_llm_models(self) -> List[LLMConfig]:
         """List available models"""
-        # TODO: allow multiple options from endpoint
-        # model_options = get_model_options(
-        #    credentials=LettaCredentials().load(),
-        #    model_endpoint_type=settings.llm_endpoint,
-        #    model_endpoint=settings.llm_endpoint_type
-        # )
-        return [settings.llm_config]
+        llm_models = []
+        for provider in self._enabled_providers:
+            llm_models.extend(provider.list_llm_models())
+        return llm_models
     def list_embedding_models(self) -> List[EmbeddingConfig]:
         """List available embedding models"""
+        embedding_models = []
+        for provider in self._enabled_providers:
+            embedding_models.extend(provider.list_embedding_models())
+        return embedding_models
+    def add_llm_model(self, request: LLMConfig) -> LLMConfig:
+        """Add a new LLM model"""
-        # TODO support multiple models
-        return [settings.embedding_config]
+    def add_embedding_model(self, request: EmbeddingConfig) -> EmbeddingConfig:
+        """Add a new embedding model"""

letta-nightly 0.4.1.dev20241004104123__py3-none-any.whl → 0.4.1.dev20241005104008__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.4.1.dev20241004104123py3-none-any.whl → 0.4.1.dev20241005104008py3-none-any.whl