PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241011104054__tar.gz → 0.4.1.dev20241012104008__tar.gz - Mend

letta-nightly 0.4.1.dev20241011104054tar.gz → 0.4.1.dev20241012104008tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (190) hide show

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: letta-nightly
-Version: 0.4.1.dev20241011104054
+Version: 0.4.1.dev20241012104008
 Summary: Create LLM agents with long-term memory and custom tools
 License: Apache License
 Author: Letta Team
@@ -20,6 +20,7 @@ Provides-Extra: postgres
 Provides-Extra: qdrant
 Provides-Extra: server
 Provides-Extra: tests
+Requires-Dist: alembic (>=1.13.3,<2.0.0)
 Requires-Dist: autoflake (>=2.3.0,<3.0.0) ; extra == "dev"
 Requires-Dist: black[jupyter] (>=24.2.0,<25.0.0) ; extra == "dev"
 Requires-Dist: chromadb (>=0.4.24,<0.5.0)

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/agent.py RENAMED Viewed

@@ -481,7 +481,7 @@ class Agent(BaseAgent):
                 first_message=first_message,
                 # streaming
                 stream=stream,
-                stream_inferface=self.interface,
+                stream_interface=self.interface,
                 # putting inner thoughts in func args or not
                 inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
             )

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/agent_store/db.py RENAMED Viewed

@@ -18,13 +18,14 @@ from sqlalchemy import (
     select,
     text,
 )
-from sqlalchemy.orm import declarative_base, mapped_column
+from sqlalchemy.orm import mapped_column
 from sqlalchemy.orm.session import close_all_sessions
 from sqlalchemy.sql import func
 from sqlalchemy_json import MutableJson
 from tqdm import tqdm
 from letta.agent_store.storage import StorageConnector, TableType
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.constants import MAX_EMBEDDING_DIM
 from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
@@ -35,7 +36,6 @@ from letta.schemas.openai.chat_completions import ToolCall
 from letta.schemas.passage import Passage
 from letta.settings import settings
-Base = declarative_base()
 config = LettaConfig()
@@ -560,3 +560,9 @@ class SQLLiteStorageConnector(SQLStorageConnector):
             # Commit the changes to the database
             session.commit()
+def attach_base():
+    # This should be invoked in server.py to make sure Base gets initialized properly
+    # DO NOT REMOVE
+    print("Initializing database...")

letta_nightly-0.4.1.dev20241012104008/letta/base.py ADDED Viewed

@@ -0,0 +1,3 @@
+from sqlalchemy.ext.declarative import declarative_base
+Base = declarative_base()

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/cli/cli.py RENAMED Viewed

@@ -14,9 +14,7 @@ from letta.constants import CLI_WARNING_PREFIX, LETTA_DIR
 from letta.local_llm.constants import ASSISTANT_MESSAGE_CLI_SYMBOL
 from letta.log import get_logger
 from letta.metadata import MetadataStore
-from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import OptionState
-from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ChatMemory, Memory
 from letta.server.server import logger as server_logger
@@ -235,12 +233,7 @@ def run(
         # choose from list of llm_configs
         llm_configs = client.list_llm_configs()
         llm_options = [llm_config.model for llm_config in llm_configs]
-        # TODO move into LLMConfig as a class method?
-        def prettify_llm_config(llm_config: LLMConfig) -> str:
-            return f"{llm_config.model}" + f" ({llm_config.model_endpoint})" if llm_config.model_endpoint else ""
-        llm_choices = [questionary.Choice(title=prettify_llm_config(llm_config), value=llm_config) for llm_config in llm_configs]
+        llm_choices = [questionary.Choice(title=llm_config.pretty_print(), value=llm_config) for llm_config in llm_configs]
         # select model
         if len(llm_options) == 0:
@@ -255,17 +248,8 @@ def run(
         embedding_configs = client.list_embedding_configs()
         embedding_options = [embedding_config.embedding_model for embedding_config in embedding_configs]
-        # TODO move into EmbeddingConfig as a class method?
-        def prettify_embed_config(embedding_config: EmbeddingConfig) -> str:
-            return (
-                f"{embedding_config.embedding_model}" + f" ({embedding_config.embedding_endpoint})"
-                if embedding_config.embedding_endpoint
-                else ""
-            )
         embedding_choices = [
-            questionary.Choice(title=prettify_embed_config(embedding_config), value=embedding_config)
-            for embedding_config in embedding_configs
+            questionary.Choice(title=embedding_config.pretty_print(), value=embedding_config) for embedding_config in embedding_configs
         ]
         # select model

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/llm_api/azure_openai.py RENAMED Viewed

@@ -79,7 +79,7 @@ def azure_openai_chat_completions_request(
         data.pop("tools")
         data.pop("tool_choice", None)  # extra safe,  should exist always (default="auto")
-    url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.api_version)
+    url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.azure_api_version)
     response_json = make_post_request(url, headers, data)
     # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
     if "content" not in response_json["choices"][0].get("message"):

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/llm_api/helpers.py RENAMED Viewed

@@ -153,6 +153,7 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -
                 return new_choice
             else:
                 warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+                return choice
         except json.JSONDecodeError as e:
             warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/llm_api/llm_api_tools.py RENAMED Viewed

@@ -70,6 +70,10 @@ def retry_with_exponential_backoff(
                 return func(*args, **kwargs)
             except requests.exceptions.HTTPError as http_err:
+                if not hasattr(http_err, "response") or not http_err.response:
+                    raise
                 # Retry on specified errors
                 if http_err.response.status_code in error_codes:
                     # Increment retries
@@ -115,7 +119,7 @@ def create(
     use_tool_naming: bool = True,
     # streaming?
     stream: bool = False,
-    stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
+    stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     # TODO move to llm_config?
     # if unspecified (None), default to something we've tested
     inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
@@ -149,19 +153,19 @@ def create(
         if stream:  # Client requested token streaming
             data.stream = True
-            assert isinstance(stream_inferface, AgentChunkStreamingInterface) or isinstance(
-                stream_inferface, AgentRefreshStreamingInterface
-            ), type(stream_inferface)
+            assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
+                stream_interface, AgentRefreshStreamingInterface
+            ), type(stream_interface)
             response = openai_chat_completions_process_stream(
                 url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
                 api_key=model_settings.openai_api_key,
                 chat_completion_request=data,
-                stream_inferface=stream_inferface,
+                stream_interface=stream_interface,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
-            if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                stream_inferface.stream_start()
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_start()
             try:
                 response = openai_chat_completions_request(
                     url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
@@ -169,8 +173,8 @@ def create(
                     chat_completion_request=data,
                 )
             finally:
-                if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                    stream_inferface.stream_end()
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.stream_end()
         if inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
@@ -317,8 +321,8 @@ def create(
         # They mention that none of the messages can have names, but it seems to not error out (for now)
         data.stream = False
-        if isinstance(stream_inferface, AgentChunkStreamingInterface):
-            stream_inferface.stream_start()
+        if isinstance(stream_interface, AgentChunkStreamingInterface):
+            stream_interface.stream_start()
         try:
             # groq uses the openai chat completions API, so this component should be reusable
             assert model_settings.groq_api_key is not None, "Groq key is missing"
@@ -328,8 +332,8 @@ def create(
                 chat_completion_request=data,
             )
         finally:
-            if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                stream_inferface.stream_end()
+            if isinstance(stream_interface, AgentChunkStreamingInterface):
+                stream_interface.stream_end()
         if inner_thoughts_in_kwargs:
             response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/llm_api/openai.py RENAMED Viewed

@@ -61,6 +61,7 @@ def openai_get_model_list(
         headers["Authorization"] = f"Bearer {api_key}"
     printd(f"Sending request to {url}")
+    response = None
     try:
         # TODO add query param "tool" to be true
         response = requests.get(url, headers=headers, params=extra_params)
@@ -71,7 +72,8 @@ def openai_get_model_list(
     except requests.exceptions.HTTPError as http_err:
         # Handle HTTP errors (e.g., response 4XX, 5XX)
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got HTTPError, exception={http_err}, response={response}")
@@ -79,7 +81,8 @@ def openai_get_model_list(
     except requests.exceptions.RequestException as req_err:
         # Handle other requests-related errors (e.g., connection error)
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got RequestException, exception={req_err}, response={response}")
@@ -87,7 +90,8 @@ def openai_get_model_list(
     except Exception as e:
         # Handle other potential errors
         try:
-            response = response.json()
+            if response:
+                response = response.json()
         except:
             pass
         printd(f"Got unknown Exception, exception={e}, response={response}")
@@ -157,7 +161,7 @@ def openai_chat_completions_process_stream(
     url: str,
     api_key: str,
     chat_completion_request: ChatCompletionRequest,
-    stream_inferface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
+    stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
     create_message_id: bool = True,
     create_message_datetime: bool = True,
 ) -> ChatCompletionResponse:
@@ -167,7 +171,7 @@ def openai_chat_completions_process_stream(
     on the chunks received from the OpenAI-compatible server POST SSE response.
     """
     assert chat_completion_request.stream == True
-    assert stream_inferface is not None, "Required"
+    assert stream_interface is not None, "Required"
     # Count the prompt tokens
     # TODO move to post-request?
@@ -220,8 +224,8 @@ def openai_chat_completions_process_stream(
         ),
     )
-    if stream_inferface:
-        stream_inferface.stream_start()
+    if stream_interface:
+        stream_interface.stream_start()
     n_chunks = 0  # approx == n_tokens
     try:
@@ -230,17 +234,17 @@ def openai_chat_completions_process_stream(
         ):
             assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
-            if stream_inferface:
-                if isinstance(stream_inferface, AgentChunkStreamingInterface):
-                    stream_inferface.process_chunk(
+            if stream_interface:
+                if isinstance(stream_interface, AgentChunkStreamingInterface):
+                    stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
                     )
-                elif isinstance(stream_inferface, AgentRefreshStreamingInterface):
-                    stream_inferface.process_refresh(chat_completion_response)
+                elif isinstance(stream_interface, AgentRefreshStreamingInterface):
+                    stream_interface.process_refresh(chat_completion_response)
                 else:
-                    raise TypeError(stream_inferface)
+                    raise TypeError(stream_interface)
             if chunk_idx == 0:
                 # initialize the choice objects which we will increment with the deltas
@@ -314,13 +318,13 @@ def openai_chat_completions_process_stream(
             n_chunks += 1
     except Exception as e:
-        if stream_inferface:
-            stream_inferface.stream_end()
+        if stream_interface:
+            stream_interface.stream_end()
         print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
         raise e
     finally:
-        if stream_inferface:
-            stream_inferface.stream_end()
+        if stream_interface:
+            stream_interface.stream_end()
     # make sure we didn't leave temp stuff in
     assert all([c.finish_reason != TEMP_STREAM_FINISH_REASON for c in chat_completion_response.choices])

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/local_llm/vllm/api.py RENAMED Viewed

@@ -3,7 +3,7 @@ from urllib.parse import urljoin
 from letta.local_llm.settings.settings import get_completions_settings
 from letta.local_llm.utils import count_tokens, post_json_auth_request
-WEBUI_API_SUFFIX = "/v1/completions"
+WEBUI_API_SUFFIX = "/completions"
 def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user, grammar=None):

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/metadata.py RENAMED Viewed

@@ -14,11 +14,10 @@ from sqlalchemy import (
     String,
     TypeDecorator,
     desc,
-    func,
 )
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.schemas.agent import AgentState
 from letta.schemas.api_key import APIKey
@@ -28,6 +27,8 @@ from letta.schemas.enums import JobStatus
 from letta.schemas.job import Job
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
+# from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
 from letta.schemas.openai.chat_completions import ToolCall, ToolCallFunction
 from letta.schemas.organization import Organization
 from letta.schemas.source import Source
@@ -36,8 +37,6 @@ from letta.schemas.user import User
 from letta.settings import settings
 from letta.utils import enforce_types, get_utc_time, printd
-Base = declarative_base()
 class LLMConfigColumn(TypeDecorator):
     """Custom type for storing LLMConfig as JSON"""

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/providers.py RENAMED Viewed

@@ -14,14 +14,18 @@ from letta.schemas.llm_config import LLMConfig
 class Provider(BaseModel):
-    def list_llm_models(self):
+    def list_llm_models(self) -> List[LLMConfig]:
         return []
-    def list_embedding_models(self):
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
         return []
-    def get_model_context_window(self, model_name: str):
-        pass
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
+        raise NotImplementedError
+    def provider_tag(self) -> str:
+        """String representation of the provider for display purposes"""
+        raise NotImplementedError
 class LettaProvider(Provider):
@@ -162,7 +166,7 @@ class OllamaProvider(OpenAIProvider):
             )
         return configs
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
         import requests
@@ -310,7 +314,7 @@ class GoogleAIProvider(Provider):
             )
         return configs
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
         from letta.llm_api.google_ai import google_ai_get_model_context_window
         return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
@@ -371,16 +375,75 @@ class AzureProvider(Provider):
             )
         return configs
-    def get_model_context_window(self, model_name: str):
+    def get_model_context_window(self, model_name: str) -> Optional[int]:
         """
         This is hardcoded for now, since there is no API endpoints to retrieve metadata for a model.
         """
         return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, 4096)
-class VLLMProvider(OpenAIProvider):
+class VLLMChatCompletionsProvider(Provider):
+    """vLLM provider that treats vLLM as an OpenAI /chat/completions proxy"""
     # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
-    pass
+    name: str = "vllm"
+    base_url: str = Field(..., description="Base URL for the vLLM API.")
+    def list_llm_models(self) -> List[LLMConfig]:
+        # not supported with vLLM
+        from letta.llm_api.openai import openai_get_model_list
+        assert self.base_url, "base_url is required for vLLM provider"
+        response = openai_get_model_list(self.base_url, api_key=None)
+        configs = []
+        print(response)
+        for model in response["data"]:
+            configs.append(
+                LLMConfig(
+                    model=model["id"],
+                    model_endpoint_type="openai",
+                    model_endpoint=self.base_url,
+                    context_window=model["max_model_len"],
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # not supported with vLLM
+        return []
+class VLLMCompletionsProvider(Provider):
+    """This uses /completions API as the backend, not /chat/completions, so we need to specify a model wrapper"""
+    # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
+    name: str = "vllm"
+    base_url: str = Field(..., description="Base URL for the vLLM API.")
+    default_prompt_formatter: str = Field(..., description="Default prompt formatter (aka model wrapper)to use on vLLM /completions API.")
+    def list_llm_models(self) -> List[LLMConfig]:
+        # not supported with vLLM
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=None)
+        configs = []
+        for model in response["data"]:
+            configs.append(
+                LLMConfig(
+                    model=model["id"],
+                    model_endpoint_type="vllm",
+                    model_endpoint=self.base_url,
+                    model_wrapper=self.default_prompt_formatter,
+                    context_window=model["max_model_len"],
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # not supported with vLLM
+        return []
 class CohereProvider(OpenAIProvider):

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/schemas/embedding_config.py RENAMED Viewed

@@ -52,3 +52,10 @@ class EmbeddingConfig(BaseModel):
             )
         else:
             raise ValueError(f"Model {model_name} not supported.")
+    def pretty_print(self) -> str:
+        return (
+            f"{self.embedding_model}"
+            + (f" [type={self.embedding_endpoint_type}]" if self.embedding_endpoint_type else "")
+            + (f" [ip={self.embedding_endpoint}]" if self.embedding_endpoint else "")
+        )

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/schemas/llm_config.py RENAMED Viewed

@@ -68,3 +68,10 @@ class LLMConfig(BaseModel):
             )
         else:
             raise ValueError(f"Model {model_name} not supported.")
+    def pretty_print(self) -> str:
+        return (
+            f"{self.model}"
+            + (f" [type={self.model_endpoint_type}]" if self.model_endpoint_type else "")
+            + (f" [ip={self.model_endpoint}]" if self.model_endpoint else "")
+        )

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/server/server.py RENAMED Viewed

@@ -14,8 +14,8 @@ import letta.constants as constants
 import letta.server.utils as server_utils
 import letta.system as system
 from letta.agent import Agent, save_agent
+from letta.agent_store.db import attach_base
 from letta.agent_store.storage import StorageConnector, TableType
-from letta.config import LettaConfig
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data
@@ -41,7 +41,7 @@ from letta.interface import AgentInterface  # abstract
 from letta.interface import CLIInterface  # for printing to terminal
 from letta.log import get_logger
 from letta.memory import get_memory_functions
-from letta.metadata import MetadataStore
+from letta.metadata import Base, MetadataStore
 from letta.prompts import gpt_system
 from letta.providers import (
     AnthropicProvider,
@@ -51,7 +51,8 @@ from letta.providers import (
     OllamaProvider,
     OpenAIProvider,
     Provider,
-    VLLMProvider,
+    VLLMChatCompletionsProvider,
+    VLLMCompletionsProvider,
 )
 from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgentState
 from letta.schemas.api_key import APIKey, APIKeyCreate
@@ -150,23 +151,11 @@ class Server(object):
 from sqlalchemy import create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker
+from sqlalchemy.orm import sessionmaker
-from letta.agent_store.db import MessageModel, PassageModel
 from letta.config import LettaConfig
 # NOTE: hack to see if single session management works
-from letta.metadata import (
-    AgentModel,
-    AgentSourceMappingModel,
-    APIKeyModel,
-    BlockModel,
-    JobModel,
-    OrganizationModel,
-    SourceModel,
-    ToolModel,
-    UserModel,
-)
 from letta.settings import model_settings, settings
 config = LettaConfig.load()
@@ -183,24 +172,12 @@ else:
     # TODO: don't rely on config storage
     engine = create_engine("sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
-Base = declarative_base()
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-Base.metadata.create_all(
-    engine,
-    tables=[
-        UserModel.__table__,
-        AgentModel.__table__,
-        SourceModel.__table__,
-        AgentSourceMappingModel.__table__,
-        APIKeyModel.__table__,
-        BlockModel.__table__,
-        ToolModel.__table__,
-        JobModel.__table__,
-        PassageModel.__table__,
-        MessageModel.__table__,
-        OrganizationModel.__table__,
-    ],
-)
+attach_base()
+Base.metadata.create_all(bind=engine)
 # Dependency
@@ -268,12 +245,11 @@ class SyncServer(Server):
         if model_settings.anthropic_api_key:
             self._enabled_providers.append(AnthropicProvider(api_key=model_settings.anthropic_api_key))
         if model_settings.ollama_base_url:
-            self._enabled_providers.append(OllamaProvider(base_url=model_settings.ollama_base_url))
-        if model_settings.vllm_base_url:
-            self._enabled_providers.append(VLLMProvider(base_url=model_settings.vllm_base_url))
+            self._enabled_providers.append(OllamaProvider(base_url=model_settings.ollama_base_url, api_key=None))
         if model_settings.gemini_api_key:
             self._enabled_providers.append(GoogleAIProvider(api_key=model_settings.gemini_api_key))
         if model_settings.azure_api_key and model_settings.azure_base_url:
+            assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
             self._enabled_providers.append(
                 AzureProvider(
                     api_key=model_settings.azure_api_key,
@@ -281,6 +257,18 @@ class SyncServer(Server):
                     api_version=model_settings.azure_api_version,
                 )
             )
+        if model_settings.vllm_api_base:
+            # vLLM exposes both a /chat/completions and a /completions endpoint
+            self._enabled_providers.append(
+                VLLMCompletionsProvider(
+                    base_url=model_settings.vllm_api_base,
+                    default_prompt_formatter=model_settings.default_prompt_formatter,
+                )
+            )
+            # NOTE: to use the /chat/completions endpoint, you need to specify extra flags on vLLM startup
+            # see: https://docs.vllm.ai/en/latest/getting_started/examples/openai_chat_completion_client_with_tools.html
+            # e.g. "... --enable-auto-tool-choice --tool-call-parser hermes"
+            self._enabled_providers.append(VLLMChatCompletionsProvider(base_url=model_settings.vllm_api_base))
     def save_agents(self):
         """Saves all the agents that are in the in-memory object store"""

{letta_nightly-0.4.1.dev20241011104054 → letta_nightly-0.4.1.dev20241012104008}/letta/settings.py RENAMED Viewed

@@ -4,14 +4,20 @@ from typing import Optional
 from pydantic import Field
 from pydantic_settings import BaseSettings, SettingsConfigDict
+from letta.local_llm.constants import DEFAULT_WRAPPER_NAME
 class ModelSettings(BaseSettings):
     # env_prefix='my_prefix_'
+    # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper
+    # the "model wrapper" is responsible for prompt formatting and function calling parsing
+    default_prompt_formatter: str = DEFAULT_WRAPPER_NAME
     # openai
     openai_api_key: Optional[str] = None
-    openai_api_base: Optional[str] = "https://api.openai.com/v1"
+    openai_api_base: str = "https://api.openai.com/v1"
     # groq
     groq_api_key: Optional[str] = None
@@ -25,13 +31,16 @@ class ModelSettings(BaseSettings):
     # azure
     azure_api_key: Optional[str] = None
     azure_base_url: Optional[str] = None
-    azure_api_version: Optional[str] = None
+    # We provide a default here, since usually people will want to be on the latest API version.
+    azure_api_version: Optional[str] = (
+        "2024-09-01-preview"  # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
+    )
     # google ai
     gemini_api_key: Optional[str] = None
     # vLLM
-    vllm_base_url: Optional[str] = None
+    vllm_api_base: Optional[str] = None
     # openllm
     openllm_auth_type: Optional[str] = None

letta-nightly 0.4.1.dev20241011104054__tar.gz → 0.4.1.dev20241012104008__tar.gz

Potentially problematic release.

letta-nightly 0.4.1.dev20241011104054tar.gz → 0.4.1.dev20241012104008tar.gz