PyPI - letta-nightly - Versions diffs - 0.1.7.dev20240924104148__py3-none-any.whl - Mend

letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show

letta/__init__.py +24 -0
letta/__main__.py +3 -0
letta/agent.py +1427 -0
letta/agent_store/chroma.py +295 -0
letta/agent_store/db.py +546 -0
letta/agent_store/lancedb.py +177 -0
letta/agent_store/milvus.py +198 -0
letta/agent_store/qdrant.py +201 -0
letta/agent_store/storage.py +188 -0
letta/benchmark/benchmark.py +96 -0
letta/benchmark/constants.py +14 -0
letta/cli/cli.py +689 -0
letta/cli/cli_config.py +1282 -0
letta/cli/cli_load.py +166 -0
letta/client/__init__.py +0 -0
letta/client/admin.py +171 -0
letta/client/client.py +2360 -0
letta/client/streaming.py +90 -0
letta/client/utils.py +61 -0
letta/config.py +484 -0
letta/configs/anthropic.json +13 -0
letta/configs/letta_hosted.json +11 -0
letta/configs/openai.json +12 -0
letta/constants.py +134 -0
letta/credentials.py +140 -0
letta/data_sources/connectors.py +247 -0
letta/embeddings.py +218 -0
letta/errors.py +26 -0
letta/functions/__init__.py +0 -0
letta/functions/function_sets/base.py +174 -0
letta/functions/function_sets/extras.py +132 -0
letta/functions/functions.py +105 -0
letta/functions/schema_generator.py +205 -0
letta/humans/__init__.py +0 -0
letta/humans/examples/basic.txt +1 -0
letta/humans/examples/cs_phd.txt +9 -0
letta/interface.py +314 -0
letta/llm_api/__init__.py +0 -0
letta/llm_api/anthropic.py +383 -0
letta/llm_api/azure_openai.py +155 -0
letta/llm_api/cohere.py +396 -0
letta/llm_api/google_ai.py +468 -0
letta/llm_api/llm_api_tools.py +485 -0
letta/llm_api/openai.py +470 -0
letta/local_llm/README.md +3 -0
letta/local_llm/__init__.py +0 -0
letta/local_llm/chat_completion_proxy.py +279 -0
letta/local_llm/constants.py +31 -0
letta/local_llm/function_parser.py +68 -0
letta/local_llm/grammars/__init__.py +0 -0
letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
letta/local_llm/grammars/json.gbnf +26 -0
letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
letta/local_llm/groq/api.py +97 -0
letta/local_llm/json_parser.py +202 -0
letta/local_llm/koboldcpp/api.py +62 -0
letta/local_llm/koboldcpp/settings.py +23 -0
letta/local_llm/llamacpp/api.py +58 -0
letta/local_llm/llamacpp/settings.py +22 -0
letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
letta/local_llm/lmstudio/api.py +100 -0
letta/local_llm/lmstudio/settings.py +29 -0
letta/local_llm/ollama/api.py +88 -0
letta/local_llm/ollama/settings.py +32 -0
letta/local_llm/settings/__init__.py +0 -0
letta/local_llm/settings/deterministic_mirostat.py +45 -0
letta/local_llm/settings/settings.py +72 -0
letta/local_llm/settings/simple.py +28 -0
letta/local_llm/utils.py +265 -0
letta/local_llm/vllm/api.py +63 -0
letta/local_llm/webui/api.py +60 -0
letta/local_llm/webui/legacy_api.py +58 -0
letta/local_llm/webui/legacy_settings.py +23 -0
letta/local_llm/webui/settings.py +24 -0
letta/log.py +76 -0
letta/main.py +437 -0
letta/memory.py +440 -0
letta/metadata.py +884 -0
letta/openai_backcompat/__init__.py +0 -0
letta/openai_backcompat/openai_object.py +437 -0
letta/persistence_manager.py +148 -0
letta/personas/__init__.py +0 -0
letta/personas/examples/anna_pa.txt +13 -0
letta/personas/examples/google_search_persona.txt +15 -0
letta/personas/examples/memgpt_doc.txt +6 -0
letta/personas/examples/memgpt_starter.txt +4 -0
letta/personas/examples/sam.txt +14 -0
letta/personas/examples/sam_pov.txt +14 -0
letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
letta/personas/examples/sqldb/test.db +0 -0
letta/prompts/__init__.py +0 -0
letta/prompts/gpt_summarize.py +14 -0
letta/prompts/gpt_system.py +26 -0
letta/prompts/system/memgpt_base.txt +49 -0
letta/prompts/system/memgpt_chat.txt +58 -0
letta/prompts/system/memgpt_chat_compressed.txt +13 -0
letta/prompts/system/memgpt_chat_fstring.txt +51 -0
letta/prompts/system/memgpt_doc.txt +50 -0
letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
letta/prompts/system/memgpt_modified_chat.txt +23 -0
letta/pytest.ini +0 -0
letta/schemas/agent.py +117 -0
letta/schemas/api_key.py +21 -0
letta/schemas/block.py +135 -0
letta/schemas/document.py +21 -0
letta/schemas/embedding_config.py +54 -0
letta/schemas/enums.py +35 -0
letta/schemas/job.py +38 -0
letta/schemas/letta_base.py +80 -0
letta/schemas/letta_message.py +175 -0
letta/schemas/letta_request.py +23 -0
letta/schemas/letta_response.py +28 -0
letta/schemas/llm_config.py +54 -0
letta/schemas/memory.py +224 -0
letta/schemas/message.py +727 -0
letta/schemas/openai/chat_completion_request.py +123 -0
letta/schemas/openai/chat_completion_response.py +136 -0
letta/schemas/openai/chat_completions.py +123 -0
letta/schemas/openai/embedding_response.py +11 -0
letta/schemas/openai/openai.py +157 -0
letta/schemas/organization.py +20 -0
letta/schemas/passage.py +80 -0
letta/schemas/source.py +62 -0
letta/schemas/tool.py +143 -0
letta/schemas/usage.py +18 -0
letta/schemas/user.py +33 -0
letta/server/__init__.py +0 -0
letta/server/constants.py +6 -0
letta/server/rest_api/__init__.py +0 -0
letta/server/rest_api/admin/__init__.py +0 -0
letta/server/rest_api/admin/agents.py +21 -0
letta/server/rest_api/admin/tools.py +83 -0
letta/server/rest_api/admin/users.py +98 -0
letta/server/rest_api/app.py +193 -0
letta/server/rest_api/auth/__init__.py +0 -0
letta/server/rest_api/auth/index.py +43 -0
letta/server/rest_api/auth_token.py +22 -0
letta/server/rest_api/interface.py +726 -0
letta/server/rest_api/routers/__init__.py +0 -0
letta/server/rest_api/routers/openai/__init__.py +0 -0
letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
letta/server/rest_api/routers/v1/__init__.py +15 -0
letta/server/rest_api/routers/v1/agents.py +543 -0
letta/server/rest_api/routers/v1/blocks.py +73 -0
letta/server/rest_api/routers/v1/jobs.py +46 -0
letta/server/rest_api/routers/v1/llms.py +28 -0
letta/server/rest_api/routers/v1/organizations.py +61 -0
letta/server/rest_api/routers/v1/sources.py +199 -0
letta/server/rest_api/routers/v1/tools.py +103 -0
letta/server/rest_api/routers/v1/users.py +109 -0
letta/server/rest_api/static_files.py +74 -0
letta/server/rest_api/utils.py +69 -0
letta/server/server.py +1995 -0
letta/server/startup.sh +8 -0
letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
letta/server/static_files/assets/index-156816da.css +1 -0
letta/server/static_files/assets/index-486e3228.js +274 -0
letta/server/static_files/favicon.ico +0 -0
letta/server/static_files/index.html +39 -0
letta/server/static_files/memgpt_logo_transparent.png +0 -0
letta/server/utils.py +46 -0
letta/server/ws_api/__init__.py +0 -0
letta/server/ws_api/example_client.py +104 -0
letta/server/ws_api/interface.py +108 -0
letta/server/ws_api/protocol.py +100 -0
letta/server/ws_api/server.py +145 -0
letta/settings.py +165 -0
letta/streaming_interface.py +396 -0
letta/system.py +207 -0
letta/utils.py +1065 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0

letta/settings.py ADDED Viewed

@@ -0,0 +1,165 @@
+from pathlib import Path
+from typing import Optional
+import os
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
+from letta.utils import printd
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_prefix="letta_")
+    letta_dir: Optional[Path] = Field(Path.home() / ".letta", env="LETTA_DIR")
+    debug: Optional[bool] = False
+    cors_origins: Optional[list] = ["http://letta.localhost", "http://localhost:8283", "http://localhost:8083"]
+    # database configuration
+    pg_db: Optional[str] = None
+    pg_user: Optional[str] = None
+    pg_password: Optional[str] = None
+    pg_host: Optional[str] = None
+    pg_port: Optional[int] = None
+    pg_uri: Optional[str] = None  # option to specifiy full uri
+    # llm configuration
+    llm_endpoint: Optional[str] = None
+    llm_endpoint_type: Optional[str] = None
+    llm_model: Optional[str] = None
+    llm_context_window: Optional[int] = None
+    # embedding configuration
+    embedding_endpoint: Optional[str] = None
+    embedding_endpoint_type: Optional[str] = None
+    embedding_dim: Optional[int] = None
+    embedding_model: Optional[str] = None
+    embedding_chunk_size: int = 300
+    @property
+    def llm_config(self):
+        # try to get LLM config from settings
+        if self.llm_endpoint and self.llm_endpoint_type and self.llm_model and self.llm_context_window:
+            return LLMConfig(
+                model=self.llm_model,
+                model_endpoint_type=self.llm_endpoint_type,
+                model_endpoint=self.llm_endpoint,
+                model_wrapper=None,
+                context_window=self.llm_context_window,
+            )
+        else:
+            if not self.llm_endpoint:
+                printd(f"No LETTA_LLM_ENDPOINT provided")
+            if not self.llm_endpoint_type:
+                printd(f"No LETTA_LLM_ENDPOINT_TYPE provided")
+            if not self.llm_model:
+                printd(f"No LETTA_LLM_MODEL provided")
+            if not self.llm_context_window:
+                printd(f"No LETTA_LLM_CONTEX_WINDOW provided")
+        # quickstart options
+        if self.llm_model:
+            try:
+                return LLMConfig.default_config(self.llm_model)
+            except ValueError as e:
+                pass
+        # try to read from config file (last resort)
+        from letta.config import LettaConfig
+        if LettaConfig.exists():
+            config = LettaConfig.load()
+            llm_config = LLMConfig(
+                model=config.default_llm_config.model,
+                model_endpoint_type=config.default_llm_config.model_endpoint_type,
+                model_endpoint=config.default_llm_config.model_endpoint,
+                model_wrapper=config.default_llm_config.model_wrapper,
+                context_window=config.default_llm_config.context_window,
+            )
+            return llm_config
+        # check OpenAI API key
+        if os.getenv("OPENAI_API_KEY"):
+            return LLMConfig.default_config(self.llm_model if self.llm_model else "gpt-4")
+        return LLMConfig.default_config("letta")
+    @property
+    def embedding_config(self):
+        # try to get LLM config from settings
+        if self.embedding_endpoint and self.embedding_endpoint_type and self.embedding_model and self.embedding_dim:
+            return EmbeddingConfig(
+                embedding_model=self.embedding_model,
+                embedding_endpoint_type=self.embedding_endpoint_type,
+                embedding_endpoint=self.embedding_endpoint,
+                embedding_dim=self.embedding_dim,
+                embedding_chunk_size=self.embedding_chunk_size,
+            )
+        else:
+            if not self.embedding_endpoint:
+                printd(f"No LETTA_EMBEDDING_ENDPOINT provided")
+            if not self.embedding_endpoint_type:
+                printd(f"No LETTA_EMBEDDING_ENDPOINT_TYPE provided")
+            if not self.embedding_model:
+                printd(f"No LETTA_EMBEDDING_MODEL provided")
+            if not self.embedding_dim:
+                printd(f"No LETTA_EMBEDDING_DIM provided")
+        # TODO
+        ## quickstart options
+        # if self.embedding_model:
+        #    try:
+        #        return EmbeddingConfig.default_config(self.embedding_model)
+        #    except ValueError as e:
+        #        pass
+        # try to read from config file (last resort)
+        from letta.config import LettaConfig
+        if LettaConfig.exists():
+            config = LettaConfig.load()
+            return EmbeddingConfig(
+                embedding_model=config.default_embedding_config.embedding_model,
+                embedding_endpoint_type=config.default_embedding_config.embedding_endpoint_type,
+                embedding_endpoint=config.default_embedding_config.embedding_endpoint,
+                embedding_dim=config.default_embedding_config.embedding_dim,
+                embedding_chunk_size=config.default_embedding_config.embedding_chunk_size,
+            )
+        if os.getenv("OPENAI_API_KEY"):
+            return EmbeddingConfig.default_config(self.embedding_model if self.embedding_model else "text-embedding-ada-002")
+        return EmbeddingConfig.default_config("letta")
+    @property
+    def letta_pg_uri(self) -> str:
+        if self.pg_uri:
+            return self.pg_uri
+        elif self.pg_db and self.pg_user and self.pg_password and self.pg_host and self.pg_port:
+            return f"postgresql+pg8000://{self.pg_user}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}"
+        else:
+            return f"postgresql+pg8000://letta:letta@localhost:5432/letta"
+    # add this property to avoid being returned the default
+    # reference: https://github.com/cpacker/Letta/issues/1362
+    @property
+    def letta_pg_uri_no_default(self) -> str:
+        if self.pg_uri:
+            return self.pg_uri
+        elif self.pg_db and self.pg_user and self.pg_password and self.pg_host and self.pg_port:
+            return f"postgresql+pg8000://{self.pg_user}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}"
+        else:
+            return None
+class TestSettings(Settings):
+    model_config = SettingsConfigDict(env_prefix="letta_test_")
+    letta_dir: Optional[Path] = Field(Path.home() / ".letta/test", env="LETTA_TEST_DIR")
+# singleton
+settings = Settings(_env_parse_none_str='None')
+test_settings = TestSettings()

letta/streaming_interface.py ADDED Viewed

@@ -0,0 +1,396 @@
+import json
+from abc import ABC, abstractmethod
+from datetime import datetime
+from typing import List, Optional
+# from colorama import Fore, Style, init
+from rich.console import Console
+from rich.live import Live
+from rich.markup import escape
+from letta.interface import CLIInterface
+from letta.schemas.message import Message
+from letta.schemas.openai.chat_completion_response import (
+    ChatCompletionChunkResponse,
+    ChatCompletionResponse,
+)
+# init(autoreset=True)
+# DEBUG = True  # puts full message outputs in the terminal
+DEBUG = False  # only dumps important messages in the terminal
+STRIP_UI = False
+class AgentChunkStreamingInterface(ABC):
+    """Interfaces handle Letta-related events (observer pattern)
+    The 'msg' args provides the scoped message, and the optional Message arg can provide additional metadata.
+    """
+    @abstractmethod
+    def user_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta receives a user message"""
+        raise NotImplementedError
+    @abstractmethod
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta generates some internal monologue"""
+        raise NotImplementedError
+    @abstractmethod
+    def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta uses send_message"""
+        raise NotImplementedError
+    @abstractmethod
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta calls a function"""
+        raise NotImplementedError
+    @abstractmethod
+    def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
+        """Process a streaming chunk from an OpenAI-compatible server"""
+        raise NotImplementedError
+    @abstractmethod
+    def stream_start(self):
+        """Any setup required before streaming begins"""
+        raise NotImplementedError
+    @abstractmethod
+    def stream_end(self):
+        """Any cleanup required after streaming ends"""
+        raise NotImplementedError
+class StreamingCLIInterface(AgentChunkStreamingInterface):
+    """Version of the CLI interface that attaches to a stream generator and prints along the way.
+    When a chunk is received, we write the delta to the buffer. If the buffer type has changed,
+    we write out a newline + set the formatting for the new line.
+    The two buffer types are:
+      (1) content (inner thoughts)
+      (2) tool_calls (function calling)
+    NOTE: this assumes that the deltas received in the chunks are in-order, e.g.
+    that once 'content' deltas stop streaming, they won't be received again. See notes
+    on alternative version of the StreamingCLIInterface that does not have this same problem below:
+    An alternative implementation could instead maintain the partial message state, and on each
+    process chunk (1) update the partial message state, (2) refresh/rewrite the state to the screen.
+    """
+    # CLIInterface is static/stateless
+    nonstreaming_interface = CLIInterface()
+    def __init__(self):
+        """The streaming CLI interface state for determining which buffer is currently being written to"""
+        self.streaming_buffer_type = None
+    def _flush(self):
+        pass
+    def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
+        assert len(chunk.choices) == 1, chunk
+        message_delta = chunk.choices[0].delta
+        # Starting a new buffer line
+        if not self.streaming_buffer_type:
+            assert not (
+                message_delta.content is not None and message_delta.tool_calls is not None and len(message_delta.tool_calls)
+            ), f"Error: got both content and tool_calls in message stream\n{message_delta}"
+            if message_delta.content is not None:
+                # Write out the prefix for inner thoughts
+                print("Inner thoughts: ", end="", flush=True)
+            elif message_delta.tool_calls is not None:
+                assert len(message_delta.tool_calls) == 1, f"Error: got more than one tool call in response\n{message_delta}"
+                # Write out the prefix for function calling
+                print("Calling function: ", end="", flush=True)
+        # Potentially switch/flush a buffer line
+        else:
+            pass
+        # Write out the delta
+        if message_delta.content is not None:
+            if self.streaming_buffer_type and self.streaming_buffer_type != "content":
+                print()
+            self.streaming_buffer_type = "content"
+            # Simple, just write out to the buffer
+            print(message_delta.content, end="", flush=True)
+        elif message_delta.tool_calls is not None:
+            if self.streaming_buffer_type and self.streaming_buffer_type != "tool_calls":
+                print()
+            self.streaming_buffer_type = "tool_calls"
+            assert len(message_delta.tool_calls) == 1, f"Error: got more than one tool call in response\n{message_delta}"
+            function_call = message_delta.tool_calls[0].function
+            # Slightly more complex - want to write parameters in a certain way (paren-style)
+            # function_name(function_args)
+            if function_call and function_call.name:
+                # NOTE: need to account for closing the brace later
+                print(f"{function_call.name}(", end="", flush=True)
+            if function_call and function_call.arguments:
+                print(function_call.arguments, end="", flush=True)
+    def stream_start(self):
+        # should be handled by stream_end(), but just in case
+        self.streaming_buffer_type = None
+    def stream_end(self):
+        if self.streaming_buffer_type is not None:
+            # TODO: should have a separate self.tool_call_open_paren flag
+            if self.streaming_buffer_type == "tool_calls":
+                print(")", end="", flush=True)
+            print()  # newline to move the cursor
+            self.streaming_buffer_type = None  # reset buffer tracker
+    @staticmethod
+    def important_message(msg: str):
+        StreamingCLIInterface.nonstreaming_interface(msg)
+    @staticmethod
+    def warning_message(msg: str):
+        StreamingCLIInterface.nonstreaming_interface(msg)
+    @staticmethod
+    def internal_monologue(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def assistant_message(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def memory_message(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def system_message(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def user_message(msg: str, msg_obj: Optional[Message] = None, raw: bool = False, dump: bool = False, debug: bool = DEBUG):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG):
+        StreamingCLIInterface.nonstreaming_interface(msg, msg_obj)
+    @staticmethod
+    def print_messages(message_sequence: List[Message], dump=False):
+        StreamingCLIInterface.nonstreaming_interface(message_sequence, dump)
+    @staticmethod
+    def print_messages_simple(message_sequence: List[Message]):
+        StreamingCLIInterface.nonstreaming_interface.print_messages_simple(message_sequence)
+    @staticmethod
+    def print_messages_raw(message_sequence: List[Message]):
+        StreamingCLIInterface.nonstreaming_interface.print_messages_raw(message_sequence)
+    @staticmethod
+    def step_yield():
+        pass
+class AgentRefreshStreamingInterface(ABC):
+    """Same as the ChunkStreamingInterface, but
+    The 'msg' args provides the scoped message, and the optional Message arg can provide additional metadata.
+    """
+    @abstractmethod
+    def user_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta receives a user message"""
+        raise NotImplementedError
+    @abstractmethod
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta generates some internal monologue"""
+        raise NotImplementedError
+    @abstractmethod
+    def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta uses send_message"""
+        raise NotImplementedError
+    @abstractmethod
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None):
+        """Letta calls a function"""
+        raise NotImplementedError
+    @abstractmethod
+    def process_refresh(self, response: ChatCompletionResponse):
+        """Process a streaming chunk from an OpenAI-compatible server"""
+        raise NotImplementedError
+    @abstractmethod
+    def stream_start(self):
+        """Any setup required before streaming begins"""
+        raise NotImplementedError
+    @abstractmethod
+    def stream_end(self):
+        """Any cleanup required after streaming ends"""
+        raise NotImplementedError
+    @abstractmethod
+    def toggle_streaming(self, on: bool):
+        """Toggle streaming on/off (off = regular CLI interface)"""
+        raise NotImplementedError
+class StreamingRefreshCLIInterface(AgentRefreshStreamingInterface):
+    """Version of the CLI interface that attaches to a stream generator and refreshes a render of the message at every step.
+    We maintain the partial message state in the interface state, and on each
+    process chunk we:
+        (1) update the partial message state,
+        (2) refresh/rewrite the state to the screen.
+    """
+    nonstreaming_interface = CLIInterface
+    def __init__(self, fancy: bool = True, separate_send_message: bool = True, disable_inner_mono_call: bool = True):
+        """Initialize the streaming CLI interface state."""
+        self.console = Console()
+        # Using `Live` with `refresh_per_second` parameter to limit the refresh rate, avoiding excessive updates
+        self.live = Live("", console=self.console, refresh_per_second=10)
+        # self.live.start()  # Start the Live display context and keep it running
+        # Use italics / emoji?
+        self.fancy = fancy
+        self.streaming = True
+        self.separate_send_message = separate_send_message
+        self.disable_inner_mono_call = disable_inner_mono_call
+    def toggle_streaming(self, on: bool):
+        self.streaming = on
+        if on:
+            self.separate_send_message = True
+            self.disable_inner_mono_call = True
+        else:
+            self.separate_send_message = False
+            self.disable_inner_mono_call = False
+    def update_output(self, content: str):
+        """Update the displayed output with new content."""
+        # We use the `Live` object's update mechanism to refresh content without clearing the console
+        if not self.fancy:
+            content = escape(content)
+        self.live.update(self.console.render_str(content), refresh=True)
+    def process_refresh(self, response: ChatCompletionResponse):
+        """Process the response to rewrite the current output buffer."""
+        if not response.choices:
+            self.update_output("💭 [italic]...[/italic]")
+            return  # Early exit if there are no choices
+        choice = response.choices[0]
+        inner_thoughts = choice.message.content if choice.message.content else ""
+        tool_calls = choice.message.tool_calls if choice.message.tool_calls else []
+        if self.fancy:
+            message_string = f"💭 [italic]{inner_thoughts}[/italic]" if inner_thoughts else ""
+        else:
+            message_string = "[inner thoughts] " + inner_thoughts if inner_thoughts else ""
+        if tool_calls:
+            function_call = tool_calls[0].function
+            function_name = function_call.name  # Function name, can be an empty string
+            function_args = function_call.arguments  # Function arguments, can be an empty string
+            if message_string:
+                message_string += "\n"
+            # special case here for send_message
+            if self.separate_send_message and function_name == "send_message":
+                try:
+                    message = json.loads(function_args)["message"]
+                except:
+                    prefix = '{\n  "message": "'
+                    if len(function_args) < len(prefix):
+                        message = "..."
+                    elif function_args.startswith(prefix):
+                        message = function_args[len(prefix) :]
+                    else:
+                        message = function_args
+                message_string += f"🤖 [bold yellow]{message}[/bold yellow]"
+            else:
+                message_string += f"{function_name}({function_args})"
+        self.update_output(message_string)
+    def stream_start(self):
+        if self.streaming:
+            print()
+            self.live.start()  # Start the Live display context and keep it running
+            self.update_output("💭 [italic]...[/italic]")
+    def stream_end(self):
+        if self.streaming:
+            if self.live.is_started:
+                self.live.stop()
+                print()
+                self.live = Live("", console=self.console, refresh_per_second=10)
+    @staticmethod
+    def important_message(msg: str):
+        StreamingCLIInterface.nonstreaming_interface.important_message(msg)
+    @staticmethod
+    def warning_message(msg: str):
+        StreamingCLIInterface.nonstreaming_interface.warning_message(msg)
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
+        if self.disable_inner_mono_call:
+            return
+        StreamingCLIInterface.nonstreaming_interface.internal_monologue(msg, msg_obj)
+    def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
+        if self.separate_send_message:
+            return
+        StreamingCLIInterface.nonstreaming_interface.assistant_message(msg, msg_obj)
+    @staticmethod
+    def memory_message(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface.memory_message(msg, msg_obj)
+    @staticmethod
+    def system_message(msg: str, msg_obj: Optional[Message] = None):
+        StreamingCLIInterface.nonstreaming_interface.system_message(msg, msg_obj)
+    @staticmethod
+    def user_message(msg: str, msg_obj: Optional[Message] = None, raw: bool = False, dump: bool = False, debug: bool = DEBUG):
+        StreamingCLIInterface.nonstreaming_interface.user_message(msg, msg_obj)
+    @staticmethod
+    def function_message(msg: str, msg_obj: Optional[Message] = None, debug: bool = DEBUG):
+        StreamingCLIInterface.nonstreaming_interface.function_message(msg, msg_obj)
+    @staticmethod
+    def print_messages(message_sequence: List[Message], dump=False):
+        StreamingCLIInterface.nonstreaming_interface.print_messages(message_sequence, dump)
+    @staticmethod
+    def print_messages_simple(message_sequence: List[Message]):
+        StreamingCLIInterface.nonstreaming_interface.print_messages_simple(message_sequence)
+    @staticmethod
+    def print_messages_raw(message_sequence: List[Message]):
+        StreamingCLIInterface.nonstreaming_interface.print_messages_raw(message_sequence)
+    @staticmethod
+    def step_yield():
+        pass