PyPI - letta-nightly - Versions diffs - 0.1.7.dev20240924104148__py3-none-any.whl - Mend

letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show

letta/__init__.py +24 -0
letta/__main__.py +3 -0
letta/agent.py +1427 -0
letta/agent_store/chroma.py +295 -0
letta/agent_store/db.py +546 -0
letta/agent_store/lancedb.py +177 -0
letta/agent_store/milvus.py +198 -0
letta/agent_store/qdrant.py +201 -0
letta/agent_store/storage.py +188 -0
letta/benchmark/benchmark.py +96 -0
letta/benchmark/constants.py +14 -0
letta/cli/cli.py +689 -0
letta/cli/cli_config.py +1282 -0
letta/cli/cli_load.py +166 -0
letta/client/__init__.py +0 -0
letta/client/admin.py +171 -0
letta/client/client.py +2360 -0
letta/client/streaming.py +90 -0
letta/client/utils.py +61 -0
letta/config.py +484 -0
letta/configs/anthropic.json +13 -0
letta/configs/letta_hosted.json +11 -0
letta/configs/openai.json +12 -0
letta/constants.py +134 -0
letta/credentials.py +140 -0
letta/data_sources/connectors.py +247 -0
letta/embeddings.py +218 -0
letta/errors.py +26 -0
letta/functions/__init__.py +0 -0
letta/functions/function_sets/base.py +174 -0
letta/functions/function_sets/extras.py +132 -0
letta/functions/functions.py +105 -0
letta/functions/schema_generator.py +205 -0
letta/humans/__init__.py +0 -0
letta/humans/examples/basic.txt +1 -0
letta/humans/examples/cs_phd.txt +9 -0
letta/interface.py +314 -0
letta/llm_api/__init__.py +0 -0
letta/llm_api/anthropic.py +383 -0
letta/llm_api/azure_openai.py +155 -0
letta/llm_api/cohere.py +396 -0
letta/llm_api/google_ai.py +468 -0
letta/llm_api/llm_api_tools.py +485 -0
letta/llm_api/openai.py +470 -0
letta/local_llm/README.md +3 -0
letta/local_llm/__init__.py +0 -0
letta/local_llm/chat_completion_proxy.py +279 -0
letta/local_llm/constants.py +31 -0
letta/local_llm/function_parser.py +68 -0
letta/local_llm/grammars/__init__.py +0 -0
letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
letta/local_llm/grammars/json.gbnf +26 -0
letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
letta/local_llm/groq/api.py +97 -0
letta/local_llm/json_parser.py +202 -0
letta/local_llm/koboldcpp/api.py +62 -0
letta/local_llm/koboldcpp/settings.py +23 -0
letta/local_llm/llamacpp/api.py +58 -0
letta/local_llm/llamacpp/settings.py +22 -0
letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
letta/local_llm/lmstudio/api.py +100 -0
letta/local_llm/lmstudio/settings.py +29 -0
letta/local_llm/ollama/api.py +88 -0
letta/local_llm/ollama/settings.py +32 -0
letta/local_llm/settings/__init__.py +0 -0
letta/local_llm/settings/deterministic_mirostat.py +45 -0
letta/local_llm/settings/settings.py +72 -0
letta/local_llm/settings/simple.py +28 -0
letta/local_llm/utils.py +265 -0
letta/local_llm/vllm/api.py +63 -0
letta/local_llm/webui/api.py +60 -0
letta/local_llm/webui/legacy_api.py +58 -0
letta/local_llm/webui/legacy_settings.py +23 -0
letta/local_llm/webui/settings.py +24 -0
letta/log.py +76 -0
letta/main.py +437 -0
letta/memory.py +440 -0
letta/metadata.py +884 -0
letta/openai_backcompat/__init__.py +0 -0
letta/openai_backcompat/openai_object.py +437 -0
letta/persistence_manager.py +148 -0
letta/personas/__init__.py +0 -0
letta/personas/examples/anna_pa.txt +13 -0
letta/personas/examples/google_search_persona.txt +15 -0
letta/personas/examples/memgpt_doc.txt +6 -0
letta/personas/examples/memgpt_starter.txt +4 -0
letta/personas/examples/sam.txt +14 -0
letta/personas/examples/sam_pov.txt +14 -0
letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
letta/personas/examples/sqldb/test.db +0 -0
letta/prompts/__init__.py +0 -0
letta/prompts/gpt_summarize.py +14 -0
letta/prompts/gpt_system.py +26 -0
letta/prompts/system/memgpt_base.txt +49 -0
letta/prompts/system/memgpt_chat.txt +58 -0
letta/prompts/system/memgpt_chat_compressed.txt +13 -0
letta/prompts/system/memgpt_chat_fstring.txt +51 -0
letta/prompts/system/memgpt_doc.txt +50 -0
letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
letta/prompts/system/memgpt_modified_chat.txt +23 -0
letta/pytest.ini +0 -0
letta/schemas/agent.py +117 -0
letta/schemas/api_key.py +21 -0
letta/schemas/block.py +135 -0
letta/schemas/document.py +21 -0
letta/schemas/embedding_config.py +54 -0
letta/schemas/enums.py +35 -0
letta/schemas/job.py +38 -0
letta/schemas/letta_base.py +80 -0
letta/schemas/letta_message.py +175 -0
letta/schemas/letta_request.py +23 -0
letta/schemas/letta_response.py +28 -0
letta/schemas/llm_config.py +54 -0
letta/schemas/memory.py +224 -0
letta/schemas/message.py +727 -0
letta/schemas/openai/chat_completion_request.py +123 -0
letta/schemas/openai/chat_completion_response.py +136 -0
letta/schemas/openai/chat_completions.py +123 -0
letta/schemas/openai/embedding_response.py +11 -0
letta/schemas/openai/openai.py +157 -0
letta/schemas/organization.py +20 -0
letta/schemas/passage.py +80 -0
letta/schemas/source.py +62 -0
letta/schemas/tool.py +143 -0
letta/schemas/usage.py +18 -0
letta/schemas/user.py +33 -0
letta/server/__init__.py +0 -0
letta/server/constants.py +6 -0
letta/server/rest_api/__init__.py +0 -0
letta/server/rest_api/admin/__init__.py +0 -0
letta/server/rest_api/admin/agents.py +21 -0
letta/server/rest_api/admin/tools.py +83 -0
letta/server/rest_api/admin/users.py +98 -0
letta/server/rest_api/app.py +193 -0
letta/server/rest_api/auth/__init__.py +0 -0
letta/server/rest_api/auth/index.py +43 -0
letta/server/rest_api/auth_token.py +22 -0
letta/server/rest_api/interface.py +726 -0
letta/server/rest_api/routers/__init__.py +0 -0
letta/server/rest_api/routers/openai/__init__.py +0 -0
letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
letta/server/rest_api/routers/v1/__init__.py +15 -0
letta/server/rest_api/routers/v1/agents.py +543 -0
letta/server/rest_api/routers/v1/blocks.py +73 -0
letta/server/rest_api/routers/v1/jobs.py +46 -0
letta/server/rest_api/routers/v1/llms.py +28 -0
letta/server/rest_api/routers/v1/organizations.py +61 -0
letta/server/rest_api/routers/v1/sources.py +199 -0
letta/server/rest_api/routers/v1/tools.py +103 -0
letta/server/rest_api/routers/v1/users.py +109 -0
letta/server/rest_api/static_files.py +74 -0
letta/server/rest_api/utils.py +69 -0
letta/server/server.py +1995 -0
letta/server/startup.sh +8 -0
letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
letta/server/static_files/assets/index-156816da.css +1 -0
letta/server/static_files/assets/index-486e3228.js +274 -0
letta/server/static_files/favicon.ico +0 -0
letta/server/static_files/index.html +39 -0
letta/server/static_files/memgpt_logo_transparent.png +0 -0
letta/server/utils.py +46 -0
letta/server/ws_api/__init__.py +0 -0
letta/server/ws_api/example_client.py +104 -0
letta/server/ws_api/interface.py +108 -0
letta/server/ws_api/protocol.py +100 -0
letta/server/ws_api/server.py +145 -0
letta/settings.py +165 -0
letta/streaming_interface.py +396 -0
letta/system.py +207 -0
letta/utils.py +1065 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0

letta/local_llm/settings/settings.py ADDED Viewed

@@ -0,0 +1,72 @@
+import json
+import os
+from letta.constants import LETTA_DIR
+from letta.local_llm.settings.deterministic_mirostat import (
+    settings as det_miro_settings,
+)
+from letta.local_llm.settings.simple import settings as simple_settings
+DEFAULT = "simple"
+SETTINGS_FOLDER_NAME = "settings"
+COMPLETION_SETTINGS_FILE_NAME = "completions_api_settings.json"
+def get_completions_settings(defaults="simple") -> dict:
+    """Pull from the home directory settings if they exist, otherwise default"""
+    from letta.utils import printd
+    # Load up some default base settings
+    printd(f"Loading default settings from '{defaults}'")
+    if defaults == "simple":
+        # simple = basic stop strings
+        settings = simple_settings
+    elif defaults == "deterministic_mirostat":
+        settings = det_miro_settings
+    elif defaults is None:
+        settings = dict()
+    else:
+        raise ValueError(defaults)
+    # Check if settings_dir folder exists (if not, create it)
+    settings_dir = os.path.join(LETTA_DIR, SETTINGS_FOLDER_NAME)
+    if not os.path.exists(settings_dir):
+        printd(f"Settings folder '{settings_dir}' doesn't exist, creating it...")
+        try:
+            os.makedirs(settings_dir)
+        except Exception as e:
+            print(f"Error: failed to create settings folder '{settings_dir}'.\n{e}")
+            return settings
+    # Then, check if settings_dir/completions_api_settings.json file exists
+    settings_file = os.path.join(settings_dir, COMPLETION_SETTINGS_FILE_NAME)
+    if os.path.isfile(settings_file):
+        # Load into a dict called "settings"
+        printd(f"Found completion settings file '{settings_file}', loading it...")
+        try:
+            with open(settings_file, "r", encoding="utf-8") as file:
+                user_settings = json.load(file)
+            if len(user_settings) > 0:
+                printd(f"Updating base settings with the following user settings:\n{json_dumps(user_settings,indent=2)}")
+                settings.update(user_settings)
+            else:
+                printd(f"'{settings_file}' was empty, ignoring...")
+        except json.JSONDecodeError as e:
+            print(f"Error: failed to load user settings file '{settings_file}', invalid json.\n{e}")
+        except Exception as e:
+            print(f"Error: failed to load user settings file.\n{e}")
+    else:
+        printd(f"No completion settings file '{settings_file}', skipping...")
+        # Create the file settings_file to make it easy for the user to edit
+        try:
+            with open(settings_file, "w", encoding="utf-8") as file:
+                # We don't want to dump existing default settings in case we modify
+                # the default settings in the future
+                # json.dump(settings, file, indent=4)
+                json.dump({}, file, indent=4)
+        except Exception as e:
+            print(f"Error: failed to create empty settings file '{settings_file}'.\n{e}")
+    return settings

letta/local_llm/settings/simple.py ADDED Viewed

@@ -0,0 +1,28 @@
+settings = {
+    # "stopping_strings": [
+    "stop": [
+        "\nUSER:",
+        "\nASSISTANT:",
+        "\nFUNCTION RETURN:",
+        "\nUSER",
+        "\nASSISTANT",
+        "\nFUNCTION RETURN",
+        "\nFUNCTION",
+        "\nFUNC",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>",
+        # airoboros specific
+        "\n### ",
+        # '\n' +
+        # '</s>',
+        # '<|',
+        "\n#",
+        # "\n\n\n",
+        # prevent chaining function calls / multi json objects / run-on generations
+        # NOTE: this requires the ability to patch the extra '}}' back into the prompt
+        "  }\n}\n",
+    ],
+    # most lm frontends default to 0.7-0.8 these days
+    # "temperature": 0.8,
+}

letta/local_llm/utils.py ADDED Viewed

@@ -0,0 +1,265 @@
+import os
+import warnings
+from typing import List
+import requests
+import tiktoken
+import letta.local_llm.llm_chat_completion_wrappers.airoboros as airoboros
+import letta.local_llm.llm_chat_completion_wrappers.chatml as chatml
+import letta.local_llm.llm_chat_completion_wrappers.configurable_wrapper as configurable_wrapper
+import letta.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
+import letta.local_llm.llm_chat_completion_wrappers.llama3 as llama3
+import letta.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
+def post_json_auth_request(uri, json_payload, auth_type, auth_key):
+    """Send a POST request with a JSON payload and optional authentication"""
+    # By default most local LLM inference servers do not have authorization enabled
+    if auth_type is None:
+        response = requests.post(uri, json=json_payload)
+    # Used by OpenAI, together.ai, Mistral AI
+    elif auth_type == "bearer_token":
+        if auth_key is None:
+            raise ValueError(f"auth_type is {auth_type}, but auth_key is null")
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {auth_key}"}
+        response = requests.post(uri, json=json_payload, headers=headers)
+    # Used by OpenAI Azure
+    elif auth_type == "api_key":
+        if auth_key is None:
+            raise ValueError(f"auth_type is {auth_type}, but auth_key is null")
+        headers = {"Content-Type": "application/json", "api-key": f"{auth_key}"}
+        response = requests.post(uri, json=json_payload, headers=headers)
+    else:
+        raise ValueError(f"Unsupport authentication type: {auth_type}")
+    return response
+# deprecated for Box
+class DotDict(dict):
+    """Allow dot access on properties similar to OpenAI response object"""
+    def __getattr__(self, attr):
+        return self.get(attr)
+    def __setattr__(self, key, value):
+        self[key] = value
+    # following methods necessary for pickling
+    def __getstate__(self):
+        return vars(self)
+    def __setstate__(self, state):
+        vars(self).update(state)
+def load_grammar_file(grammar):
+    # Set grammar
+    grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grammars", f"{grammar}.gbnf")
+    # Check if the file exists
+    if not os.path.isfile(grammar_file):
+        # If the file doesn't exist, raise a FileNotFoundError
+        raise FileNotFoundError(f"The grammar file {grammar_file} does not exist.")
+    with open(grammar_file, "r", encoding="utf-8") as file:
+        grammar_str = file.read()
+    return grammar_str
+# TODO: support tokenizers/tokenizer apis available in local models
+def count_tokens(s: str, model: str = "gpt-4") -> int:
+    encoding = tiktoken.encoding_for_model(model)
+    return len(encoding.encode(s))
+def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
+    """Return the number of tokens used by a list of functions.
+    Copied from https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/11
+    """
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        print("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens = 0
+    for function in functions:
+        function_tokens = len(encoding.encode(function["name"]))
+        function_tokens += len(encoding.encode(function["description"]))
+        if "parameters" in function:
+            parameters = function["parameters"]
+            if "properties" in parameters:
+                for propertiesKey in parameters["properties"]:
+                    function_tokens += len(encoding.encode(propertiesKey))
+                    v = parameters["properties"][propertiesKey]
+                    for field in v:
+                        if field == "type":
+                            function_tokens += 2
+                            function_tokens += len(encoding.encode(v["type"]))
+                        elif field == "description":
+                            function_tokens += 2
+                            function_tokens += len(encoding.encode(v["description"]))
+                        elif field == "enum":
+                            function_tokens -= 3
+                            for o in v["enum"]:
+                                function_tokens += 3
+                                function_tokens += len(encoding.encode(o))
+                        else:
+                            print(f"Warning: not supported field {field}")
+                function_tokens += 11
+        num_tokens += function_tokens
+    num_tokens += 12
+    return num_tokens
+def num_tokens_from_tool_calls(tool_calls: List[dict], model: str = "gpt-4"):
+    """Based on above code (num_tokens_from_functions).
+    Example to encode:
+    [{
+        'id': '8b6707cf-2352-4804-93db-0423f',
+        'type': 'function',
+        'function': {
+            'name': 'send_message',
+            'arguments': '{\n  "message": "More human than human is our motto."\n}'
+        }
+    }]
+    """
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        # print("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens = 0
+    for tool_call in tool_calls:
+        function_tokens = len(encoding.encode(tool_call["id"]))
+        function_tokens += 2 + len(encoding.encode(tool_call["type"]))
+        function_tokens += 2 + len(encoding.encode(tool_call["function"]["name"]))
+        function_tokens += 2 + len(encoding.encode(tool_call["function"]["arguments"]))
+        num_tokens += function_tokens
+    # TODO adjust?
+    num_tokens += 12
+    return num_tokens
+def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int:
+    """Return the number of tokens used by a list of messages.
+    From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+    For counting tokens in function calling RESPONSES, see:
+        https://hmarr.com/blog/counting-openai-tokens/, https://github.com/hmarr/openai-chat-tokens
+    For counting tokens in function calling REQUESTS, see:
+        https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/11
+    """
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        # print("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model in {
+        "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-16k-0613",
+        "gpt-4-0314",
+        "gpt-4-32k-0314",
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
+    }:
+        tokens_per_message = 3
+        tokens_per_name = 1
+    elif model == "gpt-3.5-turbo-0301":
+        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        tokens_per_name = -1  # if there's a name, the role is omitted
+    elif "gpt-3.5-turbo" in model:
+        # print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
+        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
+    elif "gpt-4" in model:
+        # print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+    else:
+        warnings.warn(
+            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+        # raise NotImplementedError(
+        # f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        # )
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            try:
+                if isinstance(value, list) and key == "tool_calls":
+                    num_tokens += num_tokens_from_tool_calls(tool_calls=value, model=model)
+                    # special case for tool calling (list)
+                    # num_tokens += len(encoding.encode(value["name"]))
+                    # num_tokens += len(encoding.encode(value["arguments"]))
+                else:
+                    num_tokens += len(encoding.encode(value))
+                if key == "name":
+                    num_tokens += tokens_per_name
+            except TypeError as e:
+                print(f"tiktoken encoding failed on: {value}")
+                raise e
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    return num_tokens
+def get_available_wrappers() -> dict:
+    return {
+        "llama3": llama3.LLaMA3InnerMonologueWrapper(),
+        "llama3-grammar": llama3.LLaMA3InnerMonologueWrapper(),
+        "llama3-hints-grammar": llama3.LLaMA3InnerMonologueWrapper(assistant_prefix_hint=True),
+        "experimental-wrapper-neural-chat-grammar-noforce": configurable_wrapper.ConfigurableJSONWrapper(
+            post_prompt="### Assistant:",
+            sys_prompt_start="### System:\n",
+            sys_prompt_end="\n",
+            user_prompt_start="### User:\n",
+            user_prompt_end="\n",
+            assistant_prompt_start="### Assistant:\n",
+            assistant_prompt_end="\n",
+            tool_prompt_start="### User:\n",
+            tool_prompt_end="\n",
+            strip_prompt=True,
+        ),
+        # New chatml-based wrappers
+        "chatml": chatml.ChatMLInnerMonologueWrapper(),
+        "chatml-grammar": chatml.ChatMLInnerMonologueWrapper(),
+        "chatml-noforce": chatml.ChatMLOuterInnerMonologueWrapper(),
+        "chatml-noforce-grammar": chatml.ChatMLOuterInnerMonologueWrapper(),
+        # "chatml-noforce-sysm": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True),
+        "chatml-noforce-roles": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True, allow_function_role=True),
+        "chatml-noforce-roles-grammar": chatml.ChatMLOuterInnerMonologueWrapper(use_system_role_in_user=True, allow_function_role=True),
+        # With extra hints
+        "chatml-hints": chatml.ChatMLInnerMonologueWrapper(assistant_prefix_hint=True),
+        "chatml-hints-grammar": chatml.ChatMLInnerMonologueWrapper(assistant_prefix_hint=True),
+        "chatml-noforce-hints": chatml.ChatMLOuterInnerMonologueWrapper(assistant_prefix_hint=True),
+        "chatml-noforce-hints-grammar": chatml.ChatMLOuterInnerMonologueWrapper(assistant_prefix_hint=True),
+        # Legacy wrappers
+        "airoboros-l2-70b-2.1": airoboros.Airoboros21InnerMonologueWrapper(),
+        "airoboros-l2-70b-2.1-grammar": airoboros.Airoboros21InnerMonologueWrapper(assistant_prefix_extra=None),
+        "dolphin-2.1-mistral-7b": dolphin.Dolphin21MistralWrapper(),
+        "dolphin-2.1-mistral-7b-grammar": dolphin.Dolphin21MistralWrapper(include_opening_brace_in_prefix=False),
+        "zephyr-7B": zephyr.ZephyrMistralInnerMonologueWrapper(),
+        "zephyr-7B-grammar": zephyr.ZephyrMistralInnerMonologueWrapper(include_opening_brace_in_prefix=False),
+    }

letta/local_llm/vllm/api.py ADDED Viewed

@@ -0,0 +1,63 @@
+from urllib.parse import urljoin
+from letta.local_llm.settings.settings import get_completions_settings
+from letta.local_llm.utils import count_tokens, post_json_auth_request
+WEBUI_API_SUFFIX = "/v1/completions"
+def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user, grammar=None):
+    """https://github.com/vllm-project/vllm/blob/main/examples/api_client.py"""
+    from letta.utils import printd
+    prompt_tokens = count_tokens(prompt)
+    if prompt_tokens > context_window:
+        raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
+    # Settings for the generation, includes the prompt + stop tokens, max length, etc
+    settings = get_completions_settings()
+    request = settings
+    request["prompt"] = prompt
+    request["max_tokens"] = 3000  # int(context_window - prompt_tokens)
+    request["stream"] = False
+    request["user"] = user
+    # currently hardcoded, since we are only supporting one model with the hosted endpoint
+    request["model"] = model
+    # Set grammar
+    if grammar is not None:
+        raise NotImplementedError
+    if not endpoint.startswith(("http://", "https://")):
+        raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://")
+    try:
+        URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
+        response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
+        if response.status_code == 200:
+            result_full = response.json()
+            printd(f"JSON API response:\n{result_full}")
+            result = result_full["choices"][0]["text"]
+            usage = result_full.get("usage", None)
+        else:
+            raise Exception(
+                f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
+                + f" Make sure that the vLLM server is running and reachable at {URI}."
+            )
+    except:
+        # TODO handle gracefully
+        raise
+    # Pass usage statistics back to main thread
+    # These are used to compute memory warning messages
+    completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
+    total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
+    usage = {
+        "prompt_tokens": prompt_tokens,  # can grab from usage dict, but it's usually wrong (set to 0)
+        "completion_tokens": completion_tokens,
+        "total_tokens": total_tokens,
+    }
+    return result, usage

letta/local_llm/webui/api.py ADDED Viewed

@@ -0,0 +1,60 @@
+from urllib.parse import urljoin
+from letta.local_llm.settings.settings import get_completions_settings
+from letta.local_llm.utils import count_tokens, post_json_auth_request
+WEBUI_API_SUFFIX = "/v1/completions"
+def get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
+    """Compatibility for the new OpenAI API: https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples"""
+    from letta.utils import printd
+    prompt_tokens = count_tokens(prompt)
+    if prompt_tokens > context_window:
+        raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
+    # Settings for the generation, includes the prompt + stop tokens, max length, etc
+    settings = get_completions_settings()
+    request = settings
+    request["prompt"] = prompt
+    request["truncation_length"] = context_window
+    request["max_tokens"] = int(context_window - prompt_tokens)
+    request["max_new_tokens"] = int(context_window - prompt_tokens)  # safety backup to "max_tokens", shouldn't matter
+    # Set grammar
+    if grammar is not None:
+        request["grammar_string"] = grammar
+    if not endpoint.startswith(("http://", "https://")):
+        raise ValueError(f"Endpoint value ({endpoint}) must begin with http:// or https://")
+    try:
+        URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
+        response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
+        if response.status_code == 200:
+            result_full = response.json()
+            printd(f"JSON API response:\n{result_full}")
+            result = result_full["choices"][0]["text"]
+            usage = result_full.get("usage", None)
+        else:
+            raise Exception(
+                f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
+                + f" Make sure that the web UI server is running and reachable at {URI}."
+            )
+    except:
+        # TODO handle gracefully
+        raise
+    # Pass usage statistics back to main thread
+    # These are used to compute memory warning messages
+    completion_tokens = usage.get("completion_tokens", None) if usage is not None else None
+    total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
+    usage = {
+        "prompt_tokens": prompt_tokens,  # can grab from usage dict, but it's usually wrong (set to 0)
+        "completion_tokens": completion_tokens,
+        "total_tokens": total_tokens,
+    }
+    return result, usage

letta/local_llm/webui/legacy_api.py ADDED Viewed

@@ -0,0 +1,58 @@
+from urllib.parse import urljoin
+from letta.local_llm.settings.settings import get_completions_settings
+from letta.local_llm.utils import count_tokens, post_json_auth_request
+WEBUI_API_SUFFIX = "/api/v1/generate"
+def get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=None):
+    """See https://github.com/oobabooga/text-generation-webui for instructions on how to run the LLM web server"""
+    from letta.utils import printd
+    prompt_tokens = count_tokens(prompt)
+    if prompt_tokens > context_window:
+        raise Exception(f"Request exceeds maximum context length ({prompt_tokens} > {context_window} tokens)")
+    # Settings for the generation, includes the prompt + stop tokens, max length, etc
+    settings = get_completions_settings()
+    request = settings
+    request["stopping_strings"] = request["stop"]  # alias
+    request["max_new_tokens"] = 3072  # random hack?
+    request["prompt"] = prompt
+    request["truncation_length"] = context_window  # assuming mistral 7b
+    # Set grammar
+    if grammar is not None:
+        request["grammar_string"] = grammar
+    if not endpoint.startswith(("http://", "https://")):
+        raise ValueError(f"Provided OPENAI_API_BASE value ({endpoint}) must begin with http:// or https://")
+    try:
+        URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
+        response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
+        if response.status_code == 200:
+            result_full = response.json()
+            printd(f"JSON API response:\n{result_full}")
+            result = result_full["results"][0]["text"]
+        else:
+            raise Exception(
+                f"API call got non-200 response code (code={response.status_code}, msg={response.text}) for address: {URI}."
+                + f" Make sure that the web UI server is running and reachable at {URI}."
+            )
+    except:
+        # TODO handle gracefully
+        raise
+    # TODO correct for legacy
+    completion_tokens = None
+    total_tokens = prompt_tokens + completion_tokens if completion_tokens is not None else None
+    usage = {
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "total_tokens": total_tokens,
+    }
+    return result, usage

letta/local_llm/webui/legacy_settings.py ADDED Viewed

@@ -0,0 +1,23 @@
+SIMPLE = {
+    "stopping_strings": [
+        "\nUSER:",
+        "\nASSISTANT:",
+        "\nFUNCTION RETURN:",
+        "\nUSER",
+        "\nASSISTANT",
+        "\nFUNCTION RETURN",
+        "\nFUNCTION",
+        "\nFUNC",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>",
+        # '\n' +
+        # '</s>',
+        # '<|',
+        # '\n#',
+        # '\n\n\n',
+    ],
+    "max_new_tokens": 3072,
+    # "truncation_length": 4096,  # assuming llama2 models
+    # "truncation_length": LLM_MAX_TOKENS,  # assuming mistral 7b
+}

letta/local_llm/webui/settings.py ADDED Viewed

@@ -0,0 +1,24 @@
+SIMPLE = {
+    # "stopping_strings": [
+    "stop": [
+        "\nUSER:",
+        "\nASSISTANT:",
+        "\nFUNCTION RETURN:",
+        "\nUSER",
+        "\nASSISTANT",
+        "\nFUNCTION RETURN",
+        "\nFUNCTION",
+        "\nFUNC",
+        "<|im_start|>",
+        "<|im_end|>",
+        "<|im_sep|>",
+        # '\n' +
+        # '</s>',
+        # '<|',
+        # '\n#',
+        # '\n\n\n',
+    ],
+    # "max_tokens": 3072,
+    # "truncation_length": 4096,  # assuming llama2 models
+    # "truncation_length": LLM_MAX_TOKENS,  # assuming mistral 7b
+}

letta/log.py ADDED Viewed

@@ -0,0 +1,76 @@
+import logging
+from logging.config import dictConfig
+from pathlib import Path
+from sys import stdout
+from typing import Optional
+from letta.settings import settings
+selected_log_level = logging.DEBUG if settings.debug else logging.INFO
+def _setup_logfile() -> "Path":
+    """ensure the logger filepath is in place
+    Returns: the logfile Path
+    """
+    logfile = Path(settings.letta_dir / "logs" / "Letta.log")
+    logfile.parent.mkdir(parents=True, exist_ok=True)
+    logfile.touch(exist_ok=True)
+    return logfile
+# TODO: production logging should be much less invasive
+DEVELOPMENT_LOGGING = {
+    "version": 1,
+    "disable_existing_loggers": True,
+    "formatters": {
+        "standard": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"},
+        "no_datetime": {
+            "format": "%(name)s - %(levelname)s - %(message)s",
+        },
+    },
+    "handlers": {
+        "console": {
+            "level": selected_log_level,
+            "class": "logging.StreamHandler",
+            "stream": stdout,
+            "formatter": "no_datetime",
+        },
+        "file": {
+            "level": "DEBUG",
+            "class": "logging.handlers.RotatingFileHandler",
+            "filename": _setup_logfile(),
+            "maxBytes": 1024**2 * 10,
+            "backupCount": 3,
+            "formatter": "standard",
+        },
+    },
+    "loggers": {
+        "Letta": {
+            "level": logging.DEBUG if settings.debug else logging.INFO,
+            "handlers": [
+                "console",
+                "file",
+            ],
+            "propagate": False,
+        },
+        "uvicorn": {
+            "level": "INFO",
+            "handlers": ["console"],
+            "propagate": False,
+        },
+    },
+}
+def get_logger(name: Optional[str] = None) -> "logging.Logger":
+    """returns the project logger, scoped to a child name if provided
+    Args:
+        name: will define a child logger
+    """
+    dictConfig(DEVELOPMENT_LOGGING)
+    parent_logger = logging.getLogger("Letta")
+    if name:
+        return parent_logger.getChild(name)
+    return parent_logger