PyPI - letta-nightly - Versions diffs - 0.4.1.dev20241004012408__py3-none-any.whl → 0.4.1.dev20241005104008__py3-none-any.whl - Mend

letta-nightly 0.4.1.dev20241004012408py3-none-any.whl → 0.4.1.dev20241005104008py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (34) hide show

letta/cli/cli.py +30 -365
letta/cli/cli_config.py +70 -27
letta/client/client.py +103 -11
letta/config.py +80 -80
letta/constants.py +6 -0
letta/credentials.py +10 -1
letta/errors.py +63 -5
letta/llm_api/llm_api_tools.py +110 -52
letta/local_llm/chat_completion_proxy.py +0 -3
letta/main.py +1 -2
letta/metadata.py +12 -0
letta/providers.py +232 -0
letta/schemas/block.py +1 -1
letta/schemas/letta_request.py +17 -0
letta/schemas/letta_response.py +11 -0
letta/schemas/llm_config.py +18 -2
letta/schemas/message.py +40 -13
letta/server/rest_api/app.py +5 -0
letta/server/rest_api/interface.py +115 -24
letta/server/rest_api/routers/v1/agents.py +36 -3
letta/server/rest_api/routers/v1/llms.py +6 -2
letta/server/server.py +60 -87
letta/server/static_files/assets/index-3ab03d5b.css +1 -0
letta/server/static_files/assets/{index-4d08d8a3.js → index-9a9c449b.js} +69 -69
letta/server/static_files/index.html +2 -2
letta/settings.py +144 -114
letta/utils.py +6 -1
{letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/METADATA +1 -1
{letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/RECORD +32 -32
letta/local_llm/groq/api.py +0 -97
letta/server/static_files/assets/index-156816da.css +0 -1
{letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/LICENSE +0 -0
{letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/WHEEL +0 -0
{letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/entry_points.txt +0 -0

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -9,7 +9,6 @@ from typing import List, Optional, Union
 import requests
 from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
-from letta.credentials import LettaCredentials
 from letta.llm_api.anthropic import anthropic_chat_completions_request
 from letta.llm_api.azure_openai import (
     MODEL_TO_AZURE_ENGINE,
@@ -29,6 +28,7 @@ from letta.local_llm.constants import (
     INNER_THOUGHTS_KWARG,
     INNER_THOUGHTS_KWARG_DESCRIPTION,
 )
+from letta.providers import GoogleAIProvider
 from letta.schemas.enums import OptionState
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
@@ -37,14 +37,14 @@ from letta.schemas.openai.chat_completion_request import (
     Tool,
     cast_message_to_subtype,
 )
-from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
 from letta.streaming_interface import (
     AgentChunkStreamingInterface,
     AgentRefreshStreamingInterface,
 )
 from letta.utils import json_dumps
-LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local"]
+LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
 # TODO update to use better types
@@ -83,7 +83,7 @@ def add_inner_thoughts_to_functions(
     return new_functions
-def unpack_inner_thoughts_from_kwargs(
+def unpack_all_inner_thoughts_from_kwargs(
     response: ChatCompletionResponse,
     inner_thoughts_key: str,
 ) -> ChatCompletionResponse:
@@ -93,36 +93,7 @@ def unpack_inner_thoughts_from_kwargs(
     new_choices = []
     for choice in response.choices:
-        msg = choice.message
-        if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) >= 1:
-            if len(msg.tool_calls) > 1:
-                warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(msg.tool_calls)}) is not supported")
-            # TODO support multiple tool calls
-            tool_call = msg.tool_calls[0]
-            try:
-                # Sadly we need to parse the JSON since args are in string format
-                func_args = dict(json.loads(tool_call.function.arguments))
-                if inner_thoughts_key in func_args:
-                    # extract the inner thoughts
-                    inner_thoughts = func_args.pop(inner_thoughts_key)
-                    # replace the kwargs
-                    new_choice = choice.model_copy(deep=True)
-                    new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
-                    # also replace the message content
-                    if new_choice.message.content is not None:
-                        warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
-                    new_choice.message.content = inner_thoughts
-                    # save copy
-                    new_choices.append(new_choice)
-                else:
-                    warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
-            except json.JSONDecodeError as e:
-                warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
-                raise e
+        new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
     # return an updated copy
     new_response = response.model_copy(deep=True)
@@ -130,6 +101,38 @@ def unpack_inner_thoughts_from_kwargs(
     return new_response
+def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
+    message = choice.message
+    if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
+        if len(message.tool_calls) > 1:
+            warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
+        # TODO support multiple tool calls
+        tool_call = message.tool_calls[0]
+        try:
+            # Sadly we need to parse the JSON since args are in string format
+            func_args = dict(json.loads(tool_call.function.arguments))
+            if inner_thoughts_key in func_args:
+                # extract the inner thoughts
+                inner_thoughts = func_args.pop(inner_thoughts_key)
+                # replace the kwargs
+                new_choice = choice.model_copy(deep=True)
+                new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
+                # also replace the message content
+                if new_choice.message.content is not None:
+                    warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
+                new_choice.message.content = inner_thoughts
+                return new_choice
+            else:
+                warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
+        except json.JSONDecodeError as e:
+            warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
+            raise e
 def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
     """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
     from letta.utils import printd
@@ -246,15 +249,17 @@ def create(
     # TODO move to llm_config?
     # if unspecified (None), default to something we've tested
     inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
+    model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
     from letta.utils import printd
-    printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
+    if not model_settings:
+        from letta.settings import model_settings
-    # TODO eventually refactor so that credentials are passed through
+        model_settings = model_settings
-    credentials = LettaCredentials.load()
+    printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
     if function_call and not functions:
         printd("unsetting function_call because functions is None")
@@ -286,7 +291,7 @@ def create(
         ]
         # TODO do the same for Azure?
-        if credentials.openai_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
+        if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise ValueError(f"OpenAI key is missing from letta config file")
         if use_tool_naming:
@@ -323,7 +328,7 @@ def create(
             ), type(stream_inferface)
             response = openai_chat_completions_process_stream(
                 url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                api_key=credentials.openai_key,
+                api_key=model_settings.openai_api_key,
                 chat_completion_request=data,
                 stream_inferface=stream_inferface,
             )
@@ -332,10 +337,9 @@ def create(
             if isinstance(stream_inferface, AgentChunkStreamingInterface):
                 stream_inferface.stream_start()
             try:
                 response = openai_chat_completions_request(
                     url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                    api_key=credentials.openai_key,
+                    api_key=model_settings.openai_api_key,
                     chat_completion_request=data,
                 )
             finally:
@@ -343,7 +347,7 @@ def create(
                     stream_inferface.stream_end()
         if inner_thoughts_in_kwargs:
-            response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
         return response
@@ -353,7 +357,7 @@ def create(
             raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
         azure_deployment = (
-            credentials.azure_deployment if credentials.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
+            model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
         )
         if use_tool_naming:
             data = dict(
@@ -374,10 +378,10 @@ def create(
                 user=str(user_id),
             )
         return azure_openai_chat_completions_request(
-            resource_name=credentials.azure_endpoint,
+            resource_name=model_settings.azure_endpoint,
             deployment_id=azure_deployment,
-            api_version=credentials.azure_version,
-            api_key=credentials.azure_key,
+            api_version=model_settings.azure_version,
+            api_key=model_settings.azure_key,
             data=data,
         )
@@ -400,9 +404,9 @@ def create(
         return google_ai_chat_completions_request(
             inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg,
-            service_endpoint=credentials.google_ai_service_endpoint,
+            service_endpoint=GoogleAIProvider(model_settings.gemini_api_key).service_endpoint,
             model=llm_config.model,
-            api_key=credentials.google_ai_key,
+            api_key=model_settings.gemini_api_key,
             # see structure of payload here: https://ai.google.dev/docs/function_calling
             data=dict(
                 contents=[m.to_google_ai_dict() for m in messages],
@@ -424,7 +428,7 @@ def create(
         return anthropic_chat_completions_request(
             url=llm_config.model_endpoint,
-            api_key=credentials.anthropic_key,
+            api_key=model_settings.anthropic_api_key,
             data=ChatCompletionRequest(
                 model=llm_config.model,
                 messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
@@ -455,7 +459,7 @@ def create(
             chat_completion_request=ChatCompletionRequest(
                 model="command-r-plus",  # TODO
                 messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-                tools=[{"type": "function", "function": f} for f in functions] if functions else None,
+                tools=tools,
                 tool_choice=function_call,
                 # user=str(user_id),
                 # NOTE: max_tokens is required for Anthropic API
@@ -463,6 +467,60 @@ def create(
             ),
         )
+    elif llm_config.model_endpoint_type == "groq":
+        if stream:
+            raise NotImplementedError(f"Streaming not yet implemented for Groq.")
+        if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
+            # only is a problem if we are *not* using an openai proxy
+            raise ValueError(f"Groq key is missing from letta config file")
+        # force to true for groq, since they don't support 'content' is non-null
+        inner_thoughts_in_kwargs = True
+        if inner_thoughts_in_kwargs:
+            functions = add_inner_thoughts_to_functions(
+                functions=functions,
+                inner_thoughts_key=INNER_THOUGHTS_KWARG,
+                inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+            )
+        tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
+        data = ChatCompletionRequest(
+            model=llm_config.model,
+            messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs) for m in messages],
+            tools=tools,
+            tool_choice=function_call,
+            user=str(user_id),
+        )
+        # https://console.groq.com/docs/openai
+        # "The following fields are currently not supported and will result in a 400 error (yikes) if they are supplied:"
+        assert data.top_logprobs is None
+        assert data.logit_bias is None
+        assert data.logprobs == False
+        assert data.n == 1
+        # They mention that none of the messages can have names, but it seems to not error out (for now)
+        data.stream = False
+        if isinstance(stream_inferface, AgentChunkStreamingInterface):
+            stream_inferface.stream_start()
+        try:
+            # groq uses the openai chat completions API, so this component should be reusable
+            assert model_settings.groq_api_key is not None, "Groq key is missing"
+            response = openai_chat_completions_request(
+                url=llm_config.model_endpoint,
+                api_key=model_settings.groq_api_key,
+                chat_completion_request=data,
+            )
+        finally:
+            if isinstance(stream_inferface, AgentChunkStreamingInterface):
+                stream_inferface.stream_end()
+        if inner_thoughts_in_kwargs:
+            response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        return response
     # local model
     else:
         if stream:
@@ -481,6 +539,6 @@ def create(
             # hint
             first_message=first_message,
             # auth-related
-            auth_type=credentials.openllm_auth_type,
-            auth_key=credentials.openllm_key,
+            auth_type=model_settings.openllm_auth_type,
+            auth_key=model_settings.openllm_api_key,
         )

letta/local_llm/chat_completion_proxy.py CHANGED Viewed

@@ -12,7 +12,6 @@ from letta.local_llm.grammars.gbnf_grammar_generator import (
     create_dynamic_model_from_function,
     generate_gbnf_grammar_and_documentation,
 )
-from letta.local_llm.groq.api import get_groq_completion
 from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
 from letta.local_llm.llamacpp.api import get_llamacpp_completion
 from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
@@ -170,8 +169,6 @@ def get_chat_completion(
             result, usage = get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
         elif endpoint_type == "vllm":
             result, usage = get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user)
-        elif endpoint_type == "groq":
-            result, usage = get_groq_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
         else:
             raise LocalLLMError(
                 f"Invalid endpoint type {endpoint_type}, please set variable depending on your backend (webui, lmstudio, llamacpp, koboldcpp)"

letta/main.py CHANGED Viewed

@@ -14,7 +14,7 @@ import letta.system as system
 # import benchmark
 from letta import create_client
 from letta.benchmark.benchmark import bench
-from letta.cli.cli import delete_agent, open_folder, quickstart, run, server, version
+from letta.cli.cli import delete_agent, open_folder, run, server, version
 from letta.cli.cli_config import add, add_tool, configure, delete, list, list_tools
 from letta.cli.cli_load import app as load_app
 from letta.config import LettaConfig
@@ -38,7 +38,6 @@ app.command(name="list-tools")(list_tools)
 app.command(name="delete")(delete)
 app.command(name="server")(server)
 app.command(name="folder")(open_folder)
-app.command(name="quickstart")(quickstart)
 # load data commands
 app.add_typer(load_app, name="load")
 # benchmark command

letta/metadata.py CHANGED Viewed

@@ -151,6 +151,18 @@ class OrganizationModel(Base):
         return Organization(id=self.id, name=self.name, created_at=self.created_at)
+# TODO: eventually store providers?
+# class Provider(Base):
+#    __tablename__ = "providers"
+#    __table_args__ = {"extend_existing": True}
+#
+#    id = Column(String, primary_key=True)
+#    name = Column(String, nullable=False)
+#    created_at = Column(DateTime(timezone=True))
+#    api_key = Column(String, nullable=False)
+#    base_url = Column(String, nullable=False)
 class APIKeyModel(Base):
     """Data model for authentication tokens. One-to-many relationship with UserModel (1 User - N tokens)."""

letta/providers.py ADDED Viewed

@@ -0,0 +1,232 @@
+from typing import List, Optional
+from pydantic import BaseModel, Field
+from letta.constants import LLM_MAX_TOKENS
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
+class Provider(BaseModel):
+    base_url: str
+    def list_llm_models(self):
+        return []
+    def list_embedding_models(self):
+        return []
+    def get_model_context_window(self, model_name: str):
+        pass
+class OpenAIProvider(Provider):
+    name: str = "openai"
+    api_key: str = Field(..., description="API key for the OpenAI API.")
+    base_url: str = "https://api.openai.com/v1"
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=self.api_key)
+        model_options = [obj["id"] for obj in response["data"]]
+        configs = []
+        for model_name in model_options:
+            context_window_size = self.get_model_context_window_size(model_name)
+            if not context_window_size:
+                continue
+            configs.append(
+                LLMConfig(model=model_name, model_endpoint_type="openai", model_endpoint=self.base_url, context_window=context_window_size)
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # TODO: actually automatically list models
+        return [
+            EmbeddingConfig(
+                embedding_model="text-embedding-ada-002",
+                embedding_endpoint_type="openai",
+                embedding_endpoint="https://api.openai.com/v1",
+                embedding_dim=1536,
+                embedding_chunk_size=300,
+            )
+        ]
+    def get_model_context_window_size(self, model_name: str):
+        if model_name in LLM_MAX_TOKENS:
+            return LLM_MAX_TOKENS[model_name]
+        else:
+            return None
+class AnthropicProvider(Provider):
+    name: str = "anthropic"
+    api_key: str = Field(..., description="API key for the Anthropic API.")
+    base_url: str = "https://api.anthropic.com/v1"
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.anthropic import anthropic_get_model_list
+        models = anthropic_get_model_list(self.base_url, api_key=self.api_key)
+        configs = []
+        for model in models:
+            configs.append(
+                LLMConfig(
+                    model=model["name"],
+                    model_endpoint_type="anthropic",
+                    model_endpoint=self.base_url,
+                    context_window=model["context_window"],
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        return []
+class OllamaProvider(OpenAIProvider):
+    name: str = "ollama"
+    base_url: str = Field(..., description="Base URL for the Ollama API.")
+    api_key: Optional[str] = Field(None, description="API key for the Ollama API (default: `None`).")
+    def list_llm_models(self) -> List[LLMConfig]:
+        # https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
+        import requests
+        response = requests.get(f"{self.base_url}/api/tags")
+        if response.status_code != 200:
+            raise Exception(f"Failed to list Ollama models: {response.text}")
+        response_json = response.json()
+        configs = []
+        for model in response_json["models"]:
+            context_window = self.get_model_context_window(model["name"])
+            configs.append(
+                LLMConfig(
+                    model=model["name"],
+                    model_endpoint_type="ollama",
+                    model_endpoint=self.base_url,
+                    context_window=context_window,
+                )
+            )
+        return configs
+    def get_model_context_window(self, model_name: str):
+        import requests
+        response = requests.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True})
+        response_json = response.json()
+        # thank you vLLM: https://github.com/vllm-project/vllm/blob/main/vllm/config.py#L1675
+        possible_keys = [
+            # OPT
+            "max_position_embeddings",
+            # GPT-2
+            "n_positions",
+            # MPT
+            "max_seq_len",
+            # ChatGLM2
+            "seq_length",
+            # Command-R
+            "model_max_length",
+            # Others
+            "max_sequence_length",
+            "max_seq_length",
+            "seq_len",
+        ]
+        # max_position_embeddings
+        # parse model cards: nous, dolphon, llama
+        for key, value in response_json["model_info"].items():
+            if "context_window" in key:
+                return value
+        return None
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        # TODO: filter embedding models
+        return []
+class GroqProvider(OpenAIProvider):
+    name: str = "groq"
+    base_url: str = "https://api.groq.com/openai/v1"
+    api_key: str = Field(..., description="API key for the Groq API.")
+    def list_llm_models(self) -> List[LLMConfig]:
+        from letta.llm_api.openai import openai_get_model_list
+        response = openai_get_model_list(self.base_url, api_key=self.api_key)
+        configs = []
+        for model in response["data"]:
+            if not "context_window" in model:
+                continue
+            configs.append(
+                LLMConfig(
+                    model=model["id"], model_endpoint_type="openai", model_endpoint=self.base_url, context_window=model["context_window"]
+                )
+            )
+        return configs
+    def list_embedding_models(self) -> List[EmbeddingConfig]:
+        return []
+    def get_model_context_window_size(self, model_name: str):
+        raise NotImplementedError
+class GoogleAIProvider(Provider):
+    # gemini
+    api_key: str = Field(..., description="API key for the Google AI API.")
+    service_endpoint: str = "generativelanguage"
+    base_url: str = "https://generativelanguage.googleapis.com"
+    def list_llm_models(self):
+        from letta.llm_api.google_ai import google_ai_get_model_list
+        # TODO: use base_url instead
+        model_options = google_ai_get_model_list(service_endpoint=self.service_endpoint, api_key=self.api_key)
+        model_options = [str(m["name"]) for m in model_options]
+        model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
+        # TODO remove manual filtering for gemini-pro
+        model_options = [mo for mo in model_options if str(mo).startswith("gemini") and "-pro" in str(mo)]
+        # TODO: add context windows
+        # model_options = ["gemini-pro"]
+        configs = []
+        for model in model_options:
+            configs.append(
+                LLMConfig(
+                    model=model,
+                    model_endpoint_type="google_ai",
+                    model_endpoint=self.base_url,
+                    context_window=self.get_model_context_window(model),
+                )
+            )
+        return configs
+    def list_embedding_models(self):
+        return []
+    def get_model_context_window(self, model_name: str):
+        from letta.llm_api.google_ai import google_ai_get_model_context_window
+        # TODO: use base_url instead
+        return google_ai_get_model_context_window(self.service_endpoint, self.api_key, model_name)
+class AzureProvider(Provider):
+    pass
+class VLLMProvider(OpenAIProvider):
+    # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
+    pass
+class CohereProvider(OpenAIProvider):
+    pass

letta/schemas/block.py CHANGED Viewed

@@ -53,7 +53,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
         super().__setattr__(name, value)
         if name == "value":
             # run validation
-            self.__class__.validate(self.dict(exclude_unset=True))
+            self.__class__.model_validate(self.model_dump(exclude_unset=True))
 class Block(BaseBlock):

letta/schemas/letta_request.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import List
 from pydantic import BaseModel, Field
+from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.schemas.message import MessageCreate
@@ -21,3 +22,19 @@ class LettaRequest(BaseModel):
         default=False,
         description="Set True to return the raw Message object. Set False to return the Message in the format of the Letta API.",
     )
+    # Flags to support the use of AssistantMessage message types
+    use_assistant_message: bool = Field(
+        default=False,
+        description="[Only applicable if return_message_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
+    )
+    assistant_message_function_name: str = Field(
+        default=DEFAULT_MESSAGE_TOOL,
+        description="[Only applicable if use_assistant_message is True] The name of the designated message tool.",
+    )
+    assistant_message_function_kwarg: str = Field(
+        default=DEFAULT_MESSAGE_TOOL_KWARG,
+        description="[Only applicable if use_assistant_message is True] The name of the message argument in the designated message tool.",
+    )

letta/schemas/letta_response.py CHANGED Viewed

@@ -6,6 +6,7 @@ from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import LettaMessage
 from letta.schemas.message import Message
 from letta.schemas.usage import LettaUsageStatistics
+from letta.utils import json_dumps
 # TODO: consider moving into own file
@@ -23,6 +24,16 @@ class LettaResponse(BaseModel):
     messages: Union[List[Message], List[LettaMessage]] = Field(..., description="The messages returned by the agent.")
     usage: LettaUsageStatistics = Field(..., description="The usage statistics of the agent.")
+    def __str__(self):
+        return json_dumps(
+            {
+                "messages": [message.model_dump() for message in self.messages],
+                # Assume `Message` and `LettaMessage` have a `dict()` method
+                "usage": self.usage.model_dump(),  # Assume `LettaUsageStatistics` has a `dict()` method
+            },
+            indent=4,
+        )
 # The streaming response is either [DONE], [DONE_STEP], [DONE], an error, or a LettaMessage
 LettaStreamingResponse = Union[LettaMessage, MessageStreamStatus]

letta/schemas/llm_config.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Literal, Optional
 from pydantic import BaseModel, ConfigDict, Field
@@ -17,7 +17,23 @@ class LLMConfig(BaseModel):
     # TODO: 🤮 don't default to a vendor! bug city!
     model: str = Field(..., description="LLM model name. ")
-    model_endpoint_type: str = Field(..., description="The endpoint type for the model.")
+    model_endpoint_type: Literal[
+        "openai",
+        "anthropic",
+        "cohere",
+        "google_ai",
+        "azure",
+        "groq",
+        "ollama",
+        "webui",
+        "webui-legacy",
+        "lmstudio",
+        "lmstudio-legacy",
+        "llamacpp",
+        "koboldcpp",
+        "vllm",
+        "hugging-face",
+    ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: str = Field(..., description="The endpoint for the model.")
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
     context_window: int = Field(..., description="The context window size for the model.")

letta-nightly 0.4.1.dev20241004012408__py3-none-any.whl → 0.4.1.dev20241005104008__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.4.1.dev20241004012408py3-none-any.whl → 0.4.1.dev20241005104008py3-none-any.whl