PyPI - langroid - Versions diffs - 0.53.6__py3-none-any.whl → 0.53.8__py3-none-any.whl - Mend

langroid 0.53.6py3-none-any.whl → 0.53.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

langroid/agent/chat_agent.py CHANGED Viewed

@@ -502,6 +502,17 @@ class ChatAgent(Agent):
         idx = self.nth_message_idx_with_role(role, n_role_msgs)
         return self.message_history[idx]
+    def last_message_idx_with_role(self, role: Role) -> int:
+        """Index of last message in message_history, with specified role.
+        Return -1 if not found. Index = 0 is the first message in the history.
+        """
+        indices_with_role = [
+            i for i, m in enumerate(self.message_history) if m.role == role
+        ]
+        if len(indices_with_role) == 0:
+            return -1
+        return indices_with_role[-1]
     def nth_message_idx_with_role(self, role: Role, n: int) -> int:
         """Index of `n`th message in message_history, with specified role.
         (n is assumed to be 1-based, i.e. 1 is the first message with that role).
@@ -1229,9 +1240,18 @@ class ChatAgent(Agent):
         idx: int,
         tokens: int = 5,
         warning: str = "...[Contents truncated!]",
+        inplace: bool = True,
     ) -> LLMMessage:
-        """Truncate message at idx in msg history to `tokens` tokens"""
-        llm_msg = self.message_history[idx]
+        """
+        Truncate message at idx in msg history to `tokens` tokens.
+        If inplace is True, the message is truncated in place, else
+        it LEAVES the original message INTACT and returns a new message
+        """
+        if inplace:
+            llm_msg = self.message_history[idx]
+        else:
+            llm_msg = copy.deepcopy(self.message_history[idx])
         orig_content = llm_msg.content
         new_content = (
             self.parser.truncate_tokens(orig_content, tokens)
@@ -1463,6 +1483,10 @@ class ChatAgent(Agent):
         """
         Prepare messages to be sent to self.llm_response_messages,
             which is the main method that calls the LLM API to get a response.
+            If desired output tokens + message history exceeds the model context length,
+            then first the max output tokens is reduced to fit, and if that is not
+            possible, older messages may be truncated to accommodate at least
+            self.config.llm.min_output_tokens of output.
         Returns:
             Tuple[List[LLMMessage], int]: (messages, output_len)
@@ -1530,17 +1554,42 @@ class ChatAgent(Agent):
             truncate
             and output_len > self.llm.chat_context_length() - self.chat_num_tokens(hist)
         ):
+            CHAT_HISTORY_BUFFER = 300
             # chat + output > max context length,
             # so first try to shorten requested output len to fit;
-            # use an extra margin of 300 tokens in case our calcs are off
+            # use an extra margin of CHAT_HISTORY_BUFFER tokens
+            # in case our calcs are off (and to allow for some extra tokens)
             output_len = (
-                self.llm.chat_context_length() - self.chat_num_tokens(hist) - 300
+                self.llm.chat_context_length()
+                - self.chat_num_tokens(hist)
+                - CHAT_HISTORY_BUFFER
             )
-            if output_len < self.config.llm.min_output_tokens:
-                # unacceptably small output len, so drop early parts of conv history
-                # if output_len is still too long, then drop early parts of conv history
+            if output_len > self.config.llm.min_output_tokens:
+                logger.warning(
+                    f"""
+                    Chat Model context length is {self.llm.chat_context_length()},
+                    but the current message history is {self.chat_num_tokens(hist)}
+                    tokens long, which does not allow
+                    {self.config.llm.model_max_output_tokens} output tokens.
+                    Therefore we reduced `max_output_tokens` to {output_len} tokens,
+                    so they can fit within the model's context length
+                    """
+                )
+            else:
+                # unacceptably small output len, so compress early parts of conv
+                # history if output_len is still too long.
                 # TODO we should really be doing summarization or other types of
                 #   prompt-size reduction
+                msg_idx_to_compress = 1  # don't touch system msg
+                # we will try compressing msg indices up to but not including
+                # last user msg
+                last_msg_idx_to_compress = (
+                    self.last_message_idx_with_role(
+                        role=Role.USER,
+                    )
+                    - 1
+                )
+                n_truncated = 0
                 while (
                     self.chat_num_tokens(hist)
                     > self.llm.chat_context_length() - self.config.llm.min_output_tokens
@@ -1548,14 +1597,14 @@ class ChatAgent(Agent):
                     # try dropping early parts of conv history
                     # TODO we should really be doing summarization or other types of
                     #   prompt-size reduction
-                    if len(hist) <= 2:
+                    if msg_idx_to_compress > last_msg_idx_to_compress:
                         # We want to preserve the first message (typically system msg)
                         # and last message (user msg).
                         raise ValueError(
                             """
                         The (message history + max_output_tokens) is longer than the
                         max chat context length of this model, and we have tried
-                        reducing the requested max output tokens, as well as dropping
+                        reducing the requested max output tokens, as well as truncating
                         early parts of the message history, to accommodate the model
                         context length, but we have run out of msgs to drop.
@@ -1566,51 +1615,59 @@ class ChatAgent(Agent):
                         - decreasing `max_output_tokens`
                         """
                         )
-                    # drop the second message, i.e. first msg after the sys msg
-                    # (typically user msg).
-                    ChatDocument.delete_id(hist[1].chat_document_id)
-                    hist = hist[:1] + hist[2:]
+                    n_truncated += 1
+                    # compress the msg at idx `msg_idx_to_compress`
+                    hist[msg_idx_to_compress] = self.truncate_message(
+                        msg_idx_to_compress,
+                        tokens=30,
+                        warning="... [Contents truncated!]",
+                    )
-                if len(hist) < len(self.message_history):
+                    msg_idx_to_compress += 1
+                output_len = min(
+                    self.config.llm.model_max_output_tokens,
+                    self.llm.chat_context_length()
+                    - self.chat_num_tokens(hist)
+                    - CHAT_HISTORY_BUFFER,
+                )
+                if output_len < self.config.llm.min_output_tokens:
+                    raise ValueError(
+                        f"""
+                        Tried to shorten prompt history for chat mode
+                        but even after truncating all messages except system msg and
+                        last (user) msg,
+                        the history token len {self.chat_num_tokens(hist)} is
+                        too long to accommodate the desired minimum output tokens
+                        {self.config.llm.min_output_tokens} within the
+                        model's context length {self.llm.chat_context_length()}.
+                        Please try shortening the system msg or user prompts,
+                        or adjust `config.llm.min_output_tokens` to be smaller.
+                        """
+                    )
+                else:
+                    # we MUST have truncated at least one msg
                     msg_tokens = self.chat_num_tokens()
                     logger.warning(
                         f"""
                     Chat Model context length is {self.llm.chat_context_length()}
-                    tokens, but the current message history is {msg_tokens} tokens long.
-                    Dropped the {len(self.message_history) - len(hist)} messages
-                    from early in the conversation history so that history token
-                    length is {self.chat_num_tokens(hist)}.
-                    This may still not be low enough to allow minimum output length of
-                    {self.config.llm.min_output_tokens} tokens.
+                    tokens, but the current message history is {msg_tokens} tokens long,
+                    which does not allow {self.config.llm.model_max_output_tokens}
+                    output tokens.
+                    Therefore we truncated the first {n_truncated} messages
+                    in the conversation history so that history token
+                    length is reduced to {self.chat_num_tokens(hist)}, and
+                    we use `max_output_tokens = {output_len}`,
+                    so they can fit within the model's context length
+                    of {self.llm.chat_context_length()} tokens.
                     """
                     )
-        if output_len < 0:
-            raise ValueError(
-                f"""
-                Tried to shorten prompt history for chat mode
-                but even after dropping all messages except system msg and last (
-                user) msg, the history token len {self.chat_num_tokens(hist)} is longer
-                than the model's max context length {self.llm.chat_context_length()}.
-                Please try shortening the system msg or user prompts.
-                """
-            )
-        if output_len < self.config.llm.min_output_tokens:
-            logger.warning(
-                f"""
-                Tried to shorten prompt history for chat mode
-                but the feasible output length {output_len} is still
-                less than the minimum output length {self.config.llm.min_output_tokens}.
-                Your chat history is too long for this model,
-                and the response may be truncated.
-                """
-            )
         if isinstance(message, ChatDocument):
             # record the position of the corresponding LLMMessage in
             # the message_history
             message.metadata.msg_idx = len(hist) - 1
             message.metadata.agent_id = self.id
         return hist, output_len
     def _function_args(

langroid/language_models/base.py CHANGED Viewed

@@ -620,33 +620,31 @@ class LanguageModel(ABC):
     def __call__(self, prompt: str, max_tokens: int) -> LLMResponse:
         return self.generate(prompt, max_tokens)
+    @staticmethod
+    def _fallback_model_names(model: str) -> List[str]:
+        parts = model.split("/")
+        fallbacks = []
+        for i in range(1, len(parts)):
+            fallbacks.append("/".join(parts[i:]))
+        return fallbacks
     def info(self) -> ModelInfo:
         """Info of relevant chat model"""
-        model = (
-            self.config.completion_model
-            if self.config.use_completion_for_chat
-            else self.config.chat_model
-        )
         orig_model = (
             self.config.completion_model
             if self.config.use_completion_for_chat
             else self.chat_model_orig
         )
-        return get_model_info(orig_model, model)
+        return get_model_info(orig_model, self._fallback_model_names(orig_model))
     def completion_info(self) -> ModelInfo:
         """Info of relevant completion model"""
-        model = (
-            self.config.chat_model
-            if self.config.use_chat_for_completion
-            else self.config.completion_model
-        )
         orig_model = (
             self.chat_model_orig
             if self.config.use_chat_for_completion
             else self.config.completion_model
         )
-        return get_model_info(orig_model, model)
+        return get_model_info(orig_model, self._fallback_model_names(orig_model))
     def supports_functions_or_tools(self) -> bool:
         """

langroid/language_models/mcp_client_lm.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""
+An API for an Agent in an MCP Server to use for chat-completions
+"""
+from typing import Awaitable, Callable, Dict, List, Optional, Union
+from fastmcp.server import Context
+import langroid.language_models as lm
+from langroid.language_models import LLMResponse
+from langroid.language_models.base import (
+    LanguageModel,
+    LLMConfig,
+    OpenAIJsonSchemaSpec,
+    OpenAIToolSpec,
+    ToolChoiceTypes,
+)
+from langroid.utils.types import to_string
+def none_fn(x: str) -> None | str:
+    return None
+class MCPClientLMConfig(LLMConfig):
+    """
+    Mock Language Model Configuration.
+    Attributes:
+        response_dict (Dict[str, str]): A "response rule-book", in the form of a
+            dictionary; if last msg in dialog is x,then respond with response_dict[x]
+    """
+    response_dict: Dict[str, str] = {}
+    response_fn: Callable[[str], None | str] = none_fn
+    response_fn_async: Optional[Callable[[str], Awaitable[Optional[str]]]] = None
+    default_response: str = "Mock response"
+    type: str = "mock"
+class MockLM(LanguageModel):
+    def __init__(self, config: MockLMConfig = MockLMConfig()):
+        super().__init__(config)
+        self.config: MockLMConfig = config
+    def _response(self, msg: str) -> LLMResponse:
+        # response is based on this fallback order:
+        # - response_dict
+        # - response_fn
+        # - default_response
+        mapped_response = self.config.response_dict.get(
+            msg, self.config.response_fn(msg) or self.config.default_response
+        )
+        return lm.LLMResponse(
+            message=to_string(mapped_response),
+            cached=False,
+        )
+    async def _response_async(self, msg: str) -> LLMResponse:
+        # response is based on this fallback order:
+        # - response_dict
+        # - response_fn_async
+        # - response_fn
+        # - default_response
+        if self.config.response_fn_async is not None:
+            response = await self.config.response_fn_async(msg)
+        else:
+            response = self.config.response_fn(msg)
+        mapped_response = self.config.response_dict.get(
+            msg, response or self.config.default_response
+        )
+        return lm.LLMResponse(
+            message=to_string(mapped_response),
+            cached=False,
+        )
+    def chat(
+        self,
+        messages: Union[str, List[lm.LLMMessage]],
+        max_tokens: int = 200,
+        tools: Optional[List[OpenAIToolSpec]] = None,
+        tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
+        functions: Optional[List[lm.LLMFunctionSpec]] = None,
+        function_call: str | Dict[str, str] = "auto",
+        response_format: Optional[OpenAIJsonSchemaSpec] = None,
+    ) -> lm.LLMResponse:
+        """
+        Mock chat function for testing
+        """
+        last_msg = messages[-1].content if isinstance(messages, list) else messages
+        return self._response(last_msg)
+    async def achat(
+        self,
+        messages: Union[str, List[lm.LLMMessage]],
+        max_tokens: int = 200,
+        tools: Optional[List[OpenAIToolSpec]] = None,
+        tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
+        functions: Optional[List[lm.LLMFunctionSpec]] = None,
+        function_call: str | Dict[str, str] = "auto",
+        response_format: Optional[OpenAIJsonSchemaSpec] = None,
+    ) -> lm.LLMResponse:
+        """
+        Mock chat function for testing
+        """
+        last_msg = messages[-1].content if isinstance(messages, list) else messages
+        return await self._response_async(last_msg)
+    def generate(self, prompt: str, max_tokens: int = 200) -> lm.LLMResponse:
+        """
+        Mock generate function for testing
+        """
+        return self._response(prompt)
+    async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
+        """
+        Mock generate function for testing
+        """
+        return await self._response_async(prompt)
+    def get_stream(self) -> bool:
+        return False
+    def set_stream(self, stream: bool) -> bool:
+        return False

langroid/language_models/mock_lm.py CHANGED Viewed

@@ -27,6 +27,7 @@ class MockLMConfig(LLMConfig):
             dictionary; if last msg in dialog is x,then respond with response_dict[x]
     """
+    chat_context_length: int = 1_000_000_000  # infinite
     response_dict: Dict[str, str] = {}
     response_fn: Callable[[str], None | str] = none_fn
     response_fn_async: Optional[Callable[[str], Awaitable[Optional[str]]]] = None

langroid/language_models/model_info.py CHANGED Viewed

@@ -406,10 +406,21 @@ MODEL_INFO: Dict[str, ModelInfo] = {
 def get_model_info(
     model: str | ModelName,
-    fallback_model: str | ModelName = "",
+    fallback_models: List[str] = [],
 ) -> ModelInfo:
     """Get model information by name or enum value"""
-    return _get_model_info(model) or _get_model_info(fallback_model) or ModelInfo()
+    # Sequence of models to try, starting with the primary model
+    models_to_try = [model] + fallback_models
+    # Find the first model in the sequence that has info defined using next()
+    # on a generator expression that filters out None results from _get_model_info
+    found_info = next(
+        (info for m in models_to_try if (info := _get_model_info(m)) is not None),
+        None,  # Default value if the iterator is exhausted (no valid info found)
+    )
+    # Return the found info, or a default ModelInfo if none was found
+    return found_info or ModelInfo()
 def _get_model_info(model: str | ModelName) -> ModelInfo | None:

{langroid-0.53.6.dist-info → langroid-0.53.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.53.6
+Version: 0.53.8
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.53.6.dist-info → langroid-0.53.8.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
 langroid/agent/base.py,sha256=zHwhNU403H-ZvogH4QhKTzaZn5_jt0ZdPHzSEmycDoc,80035
 langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
-langroid/agent/chat_agent.py,sha256=igo7wl3tOig7yae8NokEEqXS5AYuAeWJGq1YZhpzOho,85739
+langroid/agent/chat_agent.py,sha256=2HIYzYxkrGkRIS97ioKfIqjaW3RbX89M39LjzBobBEY,88381
 langroid/agent/chat_document.py,sha256=6O20Fp4QrquykaF2jFtwNHkvcoDte1LLwVZNk9mVH9c,18057
 langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
 langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
@@ -71,10 +71,11 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
 langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
 langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
 langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
-langroid/language_models/base.py,sha256=Axj8U9o9r7ovpCYqhNJ4SaVYLvufLRQXnr51IyIYJKY,28493
+langroid/language_models/base.py,sha256=253xcwXZ0yxSQ1W4SR50tAPZKCDc35yyU1o35EqB9b8,28484
 langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
-langroid/language_models/mock_lm.py,sha256=5BgHKDVRWFbUwDT_PFgTZXz9-k8wJSA2e3PZmyDgQ1k,4022
-langroid/language_models/model_info.py,sha256=7Fv5YByZjsRXKhkaa6okOM8jhDVpWZu6xlYAN3WTSCk,14453
+langroid/language_models/mcp_client_lm.py,sha256=wyDvlc26E_En5u_ZNZxajCHm8KBNi4jzG-dL76QCdt4,4098
+langroid/language_models/mock_lm.py,sha256=tA9JpURznsMZ59iRhFYMmaYQzAc0D0BT-PiJIV58sAk,4079
+langroid/language_models/model_info.py,sha256=0e011vJZMi7XU9OkKT6doxlybrNJfMlP54klLDDNgFg,14939
 langroid/language_models/openai_gpt.py,sha256=F28jqTEerN32m14q3K0oc3vnvBT8J7Q9xqXGZNKUjKU,85938
 langroid/language_models/utils.py,sha256=n55Oe2_V_4VNGhytvPWLYC-0tFS07RTjN83KWl-p_MI,6032
 langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
@@ -132,7 +133,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
 langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
 langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
 langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
-langroid-0.53.6.dist-info/METADATA,sha256=kOJSlrle7MZXPeosRhCuqg25rdUJFF21wif68zBUkcQ,64945
-langroid-0.53.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.53.6.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.53.6.dist-info/RECORD,,
+langroid-0.53.8.dist-info/METADATA,sha256=e4tCH-lXJE0OYlybGv2EIE84o68OQEc3HIxAmYj7BSc,64945
+langroid-0.53.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.53.8.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.53.8.dist-info/RECORD,,

{langroid-0.53.6.dist-info → langroid-0.53.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.53.6.dist-info → langroid-0.53.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.53.6__py3-none-any.whl → 0.53.8__py3-none-any.whl

langroid 0.53.6py3-none-any.whl → 0.53.8py3-none-any.whl