PyPI - langroid - Versions diffs - 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

langroid/__init__.py +95 -0
langroid/agent/__init__.py +40 -0
langroid/agent/base.py +222 -91
langroid/agent/batch.py +264 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +247 -101
langroid/agent/chat_document.py +41 -4
langroid/agent/openai_assistant.py +842 -0
langroid/agent/special/__init__.py +50 -0
langroid/agent/special/doc_chat_agent.py +837 -141
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +127 -0
langroid/agent/special/retriever_agent.py +32 -198
langroid/agent/special/sql/__init__.py +11 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +22 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +475 -122
langroid/agent/tool_message.py +75 -13
langroid/agent/tools/__init__.py +13 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +16 -29
langroid/agent/tools/run_python_code.py +60 -0
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/agent/tools/segment_extract_tool.py +36 -0
langroid/cachedb/__init__.py +9 -0
langroid/cachedb/base.py +22 -2
langroid/cachedb/momento_cachedb.py +26 -2
langroid/cachedb/redis_cachedb.py +78 -11
langroid/embedding_models/__init__.py +34 -0
langroid/embedding_models/base.py +21 -2
langroid/embedding_models/models.py +120 -18
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +45 -0
langroid/language_models/azure_openai.py +80 -27
langroid/language_models/base.py +117 -12
langroid/language_models/config.py +5 -0
langroid/language_models/openai_assistants.py +3 -0
langroid/language_models/openai_gpt.py +558 -174
langroid/language_models/prompt_formatter/__init__.py +15 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +18 -21
langroid/mytypes.py +25 -8
langroid/parsing/__init__.py +46 -0
langroid/parsing/document_parser.py +260 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +122 -59
langroid/parsing/repo_loader.py +114 -52
langroid/parsing/search.py +68 -63
langroid/parsing/spider.py +3 -2
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -11
langroid/parsing/urls.py +85 -37
langroid/parsing/utils.py +298 -4
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +11 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +17 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/algorithms/graph.py +103 -0
langroid/utils/configuration.py +36 -5
langroid/utils/constants.py +4 -0
langroid/utils/globals.py +2 -2
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +21 -0
langroid/utils/output/printing.py +47 -1
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +616 -2
langroid/utils/system.py +98 -0
langroid/vector_store/__init__.py +40 -0
langroid/vector_store/base.py +203 -6
langroid/vector_store/chromadb.py +59 -32
langroid/vector_store/lancedb.py +463 -0
langroid/vector_store/meilisearch.py +10 -7
langroid/vector_store/momento.py +262 -0
langroid/vector_store/qdrantdb.py +104 -22
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
langroid-0.1.219.dist-info/RECORD +127 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.85.dist-info/RECORD +0 -94
/langroid/{scripts → agent/callbacks}/__init__.py +0 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0

langroid/agent/chat_agent.py CHANGED Viewed

@@ -1,14 +1,15 @@
+import copy
 import inspect
-import json
 import logging
 import textwrap
 from contextlib import ExitStack
-from typing import Dict, List, Optional, Set, Tuple, Type, cast, no_type_check
+from typing import Dict, List, Optional, Set, Tuple, Type, cast
 from rich import print
 from rich.console import Console
+from rich.markup import escape
-from langroid.agent.base import Agent, AgentConfig
+from langroid.agent.base import Agent, AgentConfig, noop_fn
 from langroid.agent.chat_document import ChatDocument
 from langroid.agent.tool_message import ToolMessage
 from langroid.language_models.base import (
@@ -17,7 +18,9 @@ from langroid.language_models.base import (
     Role,
     StreamingIfAllowed,
 )
+from langroid.language_models.openai_gpt import OpenAIGPT
 from langroid.utils.configuration import settings
+from langroid.utils.output import status
 console = Console()
@@ -40,8 +43,36 @@ class ChatAgentConfig(AgentConfig):
     system_message: str = "You are a helpful assistant."
     user_message: Optional[str] = None
-    use_tools: bool = True
-    use_functions_api: bool = False
+    use_tools: bool = False
+    use_functions_api: bool = True
+    def _set_fn_or_tools(self, fn_available: bool) -> None:
+        """
+        Enable Langroid Tool or OpenAI-like fn-calling,
+        depending on config settings and availability of fn-calling.
+        """
+        if self.use_functions_api and not fn_available:
+            logger.debug(
+                """
+                You have enabled `use_functions_api` but the LLM does not support it.
+                So we will enable `use_tools` instead, so we can use
+                Langroid's ToolMessage mechanism.
+                """
+            )
+            self.use_functions_api = False
+            self.use_tools = True
+        if not self.use_functions_api or not self.use_tools:
+            return
+        if self.use_functions_api and self.use_tools:
+            logger.debug(
+                """
+                You have enabled both `use_tools` and `use_functions_api`.
+                Turning off `use_tools`, since the LLM supports function-calling.
+                """
+            )
+            self.use_tools = False
+            self.use_functions_api = True
 class ChatAgent(Agent):
@@ -61,7 +92,9 @@ class ChatAgent(Agent):
     """
     def __init__(
-        self, config: ChatAgentConfig, task: Optional[List[LLMMessage]] = None
+        self,
+        config: ChatAgentConfig = ChatAgentConfig(),
+        task: Optional[List[LLMMessage]] = None,
     ):
         """
         Chat-mode agent initialized with task spec as the initial message sequence
@@ -71,6 +104,7 @@ class ChatAgent(Agent):
         """
         super().__init__(config)
         self.config: ChatAgentConfig = config
+        self.config._set_fn_or_tools(self._fn_call_available())
         self.message_history: List[LLMMessage] = []
         self.tool_instructions_added: bool = False
         # An agent's "task" is defined by a system msg and an optional user msg;
@@ -102,8 +136,42 @@ class ChatAgent(Agent):
         self.llm_functions_usable: Set[str] = set()
         self.llm_function_force: Optional[Dict[str, str]] = None
+    def clone(self, i: int = 0) -> "ChatAgent":
+        """Create i'th clone of this agent, ensuring tool use/handling is cloned.
+        Important: We assume all member variables are in the __init__ method here
+        and in the Agent class.
+        TODO: We are attempting to close an agent after its state has been
+        changed in possibly many ways. Below is an imperfect solution. Caution advised.
+        Revisit later.
+        """
+        agent_cls = type(self)
+        config_copy = copy.deepcopy(self.config)
+        config_copy.name = f"{config_copy.name}-{i}"
+        new_agent = agent_cls(config_copy)
+        new_agent.system_tool_instructions = self.system_tool_instructions
+        new_agent.system_json_tool_instructions = self.system_json_tool_instructions
+        new_agent.llm_tools_map = self.llm_tools_map
+        new_agent.llm_functions_map = self.llm_functions_map
+        new_agent.llm_functions_handled = self.llm_functions_handled
+        new_agent.llm_functions_usable = self.llm_functions_usable
+        new_agent.llm_function_force = self.llm_function_force
+        # Caution - we are copying the vector-db, maybe we don't always want this?
+        new_agent.vecdb = self.vecdb
+        return new_agent
+    def _fn_call_available(self) -> bool:
+        """Does this agent's LLM support function calling?"""
+        return (
+            self.llm is not None
+            and isinstance(self.llm, OpenAIGPT)
+            and self.llm.is_openai_chat_model()
+        )
     def set_system_message(self, msg: str) -> None:
         self.system_message = msg
+        if len(self.message_history) > 0:
+            # if there is message history, update the system message in it
+            self.message_history[0].content = msg
     def set_user_message(self, msg: str) -> None:
         self.user_message = msg
@@ -160,46 +228,24 @@ class ChatAgent(Agent):
         enabled_classes: List[Type[ToolMessage]] = list(self.llm_tools_map.values())
         if len(enabled_classes) == 0:
             return "You can ask questions in natural language."
         json_instructions = "\n\n".join(
             [
-                textwrap.dedent(
-                    f"""
-                TOOL: {msg_cls.default_value("request")}
-                PURPOSE: {msg_cls.default_value("purpose")}
-                JSON FORMAT: {
-                    json.dumps(
-                        msg_cls.llm_function_schema(request=True).parameters,
-                        indent=4,
-                    )
-                }
-                {"EXAMPLE: " + msg_cls.usage_example() if msg_cls.examples() else ""}
-                """.lstrip()
-                )
-                for i, msg_cls in enumerate(enabled_classes)
+                msg_cls.json_instructions(tool=self.config.use_tools)
+                for _, msg_cls in enumerate(enabled_classes)
                 if msg_cls.default_value("request") in self.llm_tools_usable
             ]
         )
-        return textwrap.dedent(
-            f"""
-            === ALL AVAILABLE TOOLS and THEIR JSON FORMAT INSTRUCTIONS ===
-            You have access to the following TOOLS to accomplish your task:
-            {json_instructions}
-            When one of the above TOOLs is applicable, you must express your
-            request as "TOOL:" followed by the request in the above JSON format.
-            """
-            + """
-            The JSON format will be:
-                \\{
-                    "request": "<tool_name>",
-                    "<arg1>": <value1>,
-                    "<arg2>": <value2>,
-                    ...
-                \\}
-            ----------------------------
-            """.lstrip()
+        # if any of the enabled classes has json_group_instructions, then use that,
+        # else fall back to ToolMessage.json_group_instructions
+        for msg_cls in enabled_classes:
+            if hasattr(msg_cls, "json_group_instructions") and callable(
+                getattr(msg_cls, "json_group_instructions")
+            ):
+                return msg_cls.json_group_instructions().format(
+                    json_instructions=json_instructions
+                )
+        return ToolMessage.json_group_instructions().format(
+            json_instructions=json_instructions
         )
     def tool_instructions(self) -> str:
@@ -260,13 +306,20 @@ class ChatAgent(Agent):
         """
         self.system_message += "\n\n" + message
+    def last_message_with_role(self, role: Role) -> LLMMessage | None:
+        """from `message_history`, return the last message with role `role`"""
+        for i in range(len(self.message_history) - 1, -1, -1):
+            if self.message_history[i].role == role:
+                return self.message_history[i]
+        return None
     def update_last_message(self, message: str, role: str = Role.USER) -> None:
         """
         Update the last message that has role `role` in the message history.
         Useful when we want to replace a long user prompt, that may contain context
         documents plus a question, with just the question.
         Args:
-            message (str): user message
+            message (str): new message to replace with
             role (str): role of message to replace
         """
         if len(self.message_history) == 0:
@@ -302,7 +355,8 @@ class ChatAgent(Agent):
             """.lstrip()
         )
-        return LLMMessage(role=Role.SYSTEM, content=content)
+        # remove leading and trailing newlines and other whitespace
+        return LLMMessage(role=Role.SYSTEM, content=content.strip())
     def enable_message(
         self,
@@ -311,6 +365,7 @@ class ChatAgent(Agent):
         handle: bool = True,
         force: bool = False,
         require_recipient: bool = False,
+        include_defaults: bool = True,
     ) -> None:
         """
         Add the tool (message class) to the agent, and enable either
@@ -331,7 +386,11 @@ class ChatAgent(Agent):
                  `force` is ignored if `message_class` is None.
             require_recipient: whether to require that recipient be specified
                 when using the tool message (only applies if `use` is True).
+            require_defaults: whether to include fields that have default values,
+                in the "properties" section of the JSON format instructions.
+                (Normally the OpenAI completion API ignores these fields,
+                but the Assistant fn-calling seems to pay attn to these,
+                and if we don't want this, we should set this to False.)
         """
         super().enable_message_handling(message_class)  # enables handling only
         tools = self._get_tool_list(message_class)
@@ -339,7 +398,7 @@ class ChatAgent(Agent):
             if require_recipient:
                 message_class = message_class.require_recipient()
             request = message_class.default_value("request")
-            llm_function = message_class.llm_function_schema()
+            llm_function = message_class.llm_function_schema(defaults=include_defaults)
             self.llm_functions_map[request] = llm_function
             if force:
                 self.llm_function_force = dict(name=request)
@@ -403,12 +462,11 @@ class ChatAgent(Agent):
             message_class: The only ToolMessage class to allow
         """
         request = message_class.__fields__["request"].default
-        for r in self.llm_functions_usable:
-            if r != request:
-                self.llm_tools_usable.discard(r)
-                self.llm_functions_usable.discard(r)
+        to_remove = [r for r in self.llm_tools_usable if r != request]
+        for r in to_remove:
+            self.llm_tools_usable.discard(r)
+            self.llm_functions_usable.discard(r)
-    @no_type_check
     def llm_response(
         self, message: Optional[str | ChatDocument] = None
     ) -> Optional[ChatDocument]:
@@ -421,32 +479,51 @@ class ChatAgent(Agent):
         Returns:
             LLM response as a ChatDocument object
         """
+        if self.llm is None:
+            return None
         hist, output_len = self._prep_llm_messages(message)
+        if len(hist) == 0:
+            return None
         with StreamingIfAllowed(self.llm, self.llm.get_stream()):
             response = self.llm_response_messages(hist, output_len)
         # TODO - when response contains function_call we should include
         # that (and related fields) in the message_history
         self.message_history.append(ChatDocument.to_LLMMessage(response))
+        # Preserve trail of tool_ids for OpenAI Assistant fn-calls
+        response.metadata.tool_ids = (
+            []
+            if isinstance(message, str)
+            else message.metadata.tool_ids if message is not None else []
+        )
         return response
-    @no_type_check
     async def llm_response_async(
         self, message: Optional[str | ChatDocument] = None
     ) -> Optional[ChatDocument]:
         """
         Async version of `llm_response`. See there for details.
         """
+        if self.llm is None:
+            return None
         hist, output_len = self._prep_llm_messages(message)
         with StreamingIfAllowed(self.llm, self.llm.get_stream()):
             response = await self.llm_response_messages_async(hist, output_len)
         # TODO - when response contains function_call we should include
         # that (and related fields) in the message_history
         self.message_history.append(ChatDocument.to_LLMMessage(response))
+        # Preserve trail of tool_ids for OpenAI Assistant fn-calls
+        response.metadata.tool_ids = (
+            []
+            if isinstance(message, str)
+            else message.metadata.tool_ids if message is not None else []
+        )
         return response
-    @no_type_check
     def _prep_llm_messages(
-        self, message: Optional[str | ChatDocument] = None
+        self,
+        message: Optional[str | ChatDocument] = None,
+        truncate: bool = True,
     ) -> Tuple[List[LLMMessage], int]:
         """
         Prepare messages to be sent to self.llm_response_messages,
@@ -458,12 +535,21 @@ class ChatAgent(Agent):
                 output_len = max expected number of tokens in response
         """
-        if not self.llm_can_respond(message):
-            return None
+        if (
+            not self.llm_can_respond(message)
+            or self.config.llm is None
+            or self.llm is None
+        ):
+            return [], 0
-        assert (
-            message is not None or len(self.message_history) == 0
-        ), "message can be None only if message_history is empty, i.e. at start."
+        if message is None and len(self.message_history) > 0:
+            # this means agent has been used to get LLM response already,
+            # and so the last message is an "assistant" response.
+            # We delete this last assistant response and re-generate it.
+            self.clear_history(-1)
+            logger.warning(
+                "Re-generating the last assistant response since message is None"
+            )
         if len(self.message_history) == 0:
             # initial messages have not yet been loaded, so load them
@@ -477,8 +563,9 @@ class ChatAgent(Agent):
             if settings.debug:
                 print(
                     f"""
-                [red]LLM Initial Msg History:
-                {self.message_history_str()}
+                [grey37]LLM Initial Msg History:
+                {escape(self.message_history_str())}
+                [/grey37]
                 """
                 )
         else:
@@ -493,7 +580,8 @@ class ChatAgent(Agent):
         hist = self.message_history
         output_len = self.config.llm.max_output_tokens
         if (
-            self.chat_num_tokens(hist)
+            truncate
+            and self.chat_num_tokens(hist)
             > self.llm.chat_context_length() - self.config.llm.max_output_tokens
         ):
             # chat + output > max context length,
@@ -517,7 +605,10 @@ class ChatAgent(Agent):
                         raise ValueError(
                             """
                         The message history is longer than the max chat context
-                        length allowed, and we have run out of messages to drop."""
+                        length allowed, and we have run out of messages to drop.
+                        HINT: In your `OpenAIGPTConfig` object, try increasing
+                        `chat_context_length` or decreasing `max_output_tokens`.
+                        """
                         )
                     # drop the second message, i.e. first msg after the sys msg
                     # (typically user msg).
@@ -559,6 +650,18 @@ class ChatAgent(Agent):
             )
         return hist, output_len
+    def _function_args(
+        self,
+    ) -> Tuple[Optional[List[LLMFunctionSpec]], str | Dict[str, str]]:
+        functions: Optional[List[LLMFunctionSpec]] = None
+        fun_call: str | Dict[str, str] = "none"
+        if self.config.use_functions_api and len(self.llm_functions_usable) > 0:
+            functions = [self.llm_functions_map[f] for f in self.llm_functions_usable]
+            fun_call = (
+                "auto" if self.llm_function_force is None else self.llm_function_force
+            )
+        return functions, fun_call
     def llm_response_messages(
         self, messages: List[LLMMessage], output_len: Optional[int] = None
     ) -> ChatDocument:
@@ -573,24 +676,21 @@ class ChatAgent(Agent):
         """
         assert self.config.llm is not None and self.llm is not None
         output_len = output_len or self.config.llm.max_output_tokens
+        streamer = noop_fn
+        if self.llm.get_stream():
+            streamer = self.callbacks.start_llm_stream()
+        self.llm.config.streamer = streamer
         with ExitStack() as stack:  # for conditionally using rich spinner
             if not self.llm.get_stream():
                 # show rich spinner only if not streaming!
-                cm = console.status("LLM responding to messages...")
+                cm = status(
+                    "LLM responding to messages...",
+                    log_if_quiet=False,
+                )
                 stack.enter_context(cm)
-            if self.llm.get_stream():
+            if self.llm.get_stream() and not settings.quiet:
                 console.print(f"[green]{self.indent}", end="")
-            functions: Optional[List[LLMFunctionSpec]] = None
-            fun_call: str | Dict[str, str] = "none"
-            if self.config.use_functions_api and len(self.llm_functions_usable) > 0:
-                functions = [
-                    self.llm_functions_map[f] for f in self.llm_functions_usable
-                ]
-                fun_call = (
-                    "auto"
-                    if self.llm_function_force is None
-                    else self.llm_function_force
-                )
+            functions, fun_call = self._function_args()
             assert self.llm is not None
             response = self.llm.chat(
                 messages,
@@ -598,22 +698,39 @@ class ChatAgent(Agent):
                 functions=functions,
                 function_call=fun_call,
             )
-        displayed = False
+        if self.llm.get_stream():
+            self.callbacks.finish_llm_stream(
+                content=str(response),
+                is_tool=self.has_tool_message_attempt(
+                    ChatDocument.from_LLMResponse(response, displayed=True)
+                ),
+            )
+        self.llm.config.streamer = noop_fn
+        if response.cached:
+            self.callbacks.cancel_llm_stream()
         if not self.llm.get_stream() or response.cached:
-            displayed = True
+            # We would have already displayed the msg "live" ONLY if
+            # streaming was enabled, AND we did not find a cached response.
+            # If we are here, it means the response has not yet been displayed.
             cached = f"[red]{self.indent}(cached)[/red]" if response.cached else ""
-            if response.function_call is not None:
-                response_str = str(response.function_call)
-            else:
-                response_str = response.message
-            print(cached + "[green]" + response_str)
+            if not settings.quiet:
+                print(cached + "[green]" + escape(str(response)))
+                self.callbacks.show_llm_response(
+                    content=str(response),
+                    is_tool=self.has_tool_message_attempt(
+                        ChatDocument.from_LLMResponse(response, displayed=True)
+                    ),
+                    cached=response.cached,
+                )
         self.update_token_usage(
             response,
             messages,
             self.llm.get_stream(),
-            print_response_stats=True,
+            chat=True,
+            print_response_stats=self.config.show_stats and not settings.quiet,
         )
-        return ChatDocument.from_LLMResponse(response, displayed)
+        return ChatDocument.from_LLMResponse(response, displayed=True)
     async def llm_response_messages_async(
         self, messages: List[LLMMessage], output_len: Optional[int] = None
@@ -631,26 +748,51 @@ class ChatAgent(Agent):
                 "auto" if self.llm_function_force is None else self.llm_function_force
             )
         assert self.llm is not None
+        streamer = noop_fn
+        if self.llm.get_stream():
+            streamer = self.callbacks.start_llm_stream()
+        self.llm.config.streamer = streamer
         response = await self.llm.achat(
             messages,
             output_len,
             functions=functions,
             function_call=fun_call,
         )
-        displayed = True
-        cached = f"[red]{self.indent}(cached)[/red]" if response.cached else ""
-        if response.function_call is not None:
-            response_str = str(response.function_call)
-        else:
-            response_str = response.message
-        print(cached + "[green]" + response_str)
+        if self.llm.get_stream():
+            self.callbacks.finish_llm_stream(
+                content=str(response),
+                is_tool=self.has_tool_message_attempt(
+                    ChatDocument.from_LLMResponse(response, displayed=True)
+                ),
+            )
+        self.llm.config.streamer = noop_fn
+        if response.cached:
+            self.callbacks.cancel_llm_stream()
+        if not self.llm.get_stream() or response.cached:
+            # We would have already displayed the msg "live" ONLY if
+            # streaming was enabled, AND we did not find a cached response.
+            # If we are here, it means the response has not yet been displayed.
+            cached = f"[red]{self.indent}(cached)[/red]" if response.cached else ""
+            if not settings.quiet:
+                print(cached + "[green]" + escape(str(response)))
+                self.callbacks.show_llm_response(
+                    content=str(response),
+                    is_tool=self.has_tool_message_attempt(
+                        ChatDocument.from_LLMResponse(response, displayed=True)
+                    ),
+                    cached=response.cached,
+                )
         self.update_token_usage(
             response,
             messages,
             self.llm.get_stream(),
-            print_response_stats=True,
+            chat=True,
+            print_response_stats=self.config.show_stats and not settings.quiet,
         )
-        return ChatDocument.from_LLMResponse(response, displayed)
+        return ChatDocument.from_LLMResponse(response, displayed=True)
     def _llm_response_temp_context(self, message: str, prompt: str) -> ChatDocument:
         """
@@ -703,12 +845,14 @@ class ChatAgent(Agent):
         """
         # explicitly call THIS class's respond method,
         # not a derived class's (or else there would be infinite recursion!)
+        n_msgs = len(self.message_history)
         with StreamingIfAllowed(self.llm, self.llm.get_stream()):  # type: ignore
             response = cast(ChatDocument, ChatAgent.llm_response(self, message))
-        # clear the last two messages, which are the
-        # user message and the assistant response
-        self.message_history.pop()
-        self.message_history.pop()
+        # If there is a response, then we will have two additional
+        # messages in the message history, i.e. the user message and the
+        # assistant response. We want to (carefully) remove these two messages.
+        self.message_history.pop() if len(self.message_history) > n_msgs else None
+        self.message_history.pop() if len(self.message_history) > n_msgs else None
         return response
     async def llm_response_forget_async(self, message: str) -> ChatDocument:
@@ -717,14 +861,16 @@ class ChatAgent(Agent):
         """
         # explicitly call THIS class's respond method,
         # not a derived class's (or else there would be infinite recursion!)
+        n_msgs = len(self.message_history)
         with StreamingIfAllowed(self.llm, self.llm.get_stream()):  # type: ignore
             response = cast(
                 ChatDocument, await ChatAgent.llm_response_async(self, message)
             )
-        # clear the last two messages, which are the
-        # user message and the assistant response
-        self.message_history.pop()
-        self.message_history.pop()
+        # If there is a response, then we will have two additional
+        # messages in the message history, i.e. the user message and the
+        # assistant response. We want to (carefully) remove these two messages.
+        self.message_history.pop() if len(self.message_history) > n_msgs else None
+        self.message_history.pop() if len(self.message_history) > n_msgs else None
         return response
     def chat_num_tokens(self, messages: Optional[List[LLMMessage]] = None) -> int:

langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl