PyPI - langroid - Versions diffs - 0.1.59__py3-none-any.whl → 0.1.60__py3-none-any.whl - Mend

langroid 0.1.59py3-none-any.whl → 0.1.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

langroid/agent/chat_agent.py CHANGED Viewed

@@ -393,11 +393,13 @@ class ChatAgent(Agent):
                     )
         if output_len < self.config.llm.min_output_tokens:
-            raise ValueError(
+            logger.warning(
                 f"""
                 Tried to shorten prompt history for chat mode
                 but the feasible output length {output_len} is still
                 less than the minimum output length {self.config.llm.min_output_tokens}.
+                Your chat history is too long for this model,
+                and the response may be truncated.
                 """
             )
         with StreamingIfAllowed(self.llm):

langroid/language_models/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import json
+import logging
 from abc import ABC, abstractmethod
 from enum import Enum
 from typing import Any, Dict, List, Optional, Tuple, Type, Union
@@ -9,6 +10,7 @@ from pydantic import BaseModel, BaseSettings
 from langroid.cachedb.momento_cachedb import MomentoCacheConfig
 from langroid.cachedb.redis_cachedb import RedisCacheConfig
+from langroid.language_models.config import Llama2FormatterConfig, PromptFormatterConfig
 from langroid.mytypes import Document
 from langroid.parsing.agent_chats import parse_message
 from langroid.parsing.json import top_level_json_field
@@ -20,9 +22,60 @@ from langroid.prompts.templates import (
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import show_if_debug
+logger = logging.getLogger(__name__)
+class LocalModelConfig(BaseModel):
+    """
+    Configuration for local model available via
+    an OpenAI-compatible API.
+    Support local LLM endpoint that spoofs the OpenAI API.
+    Examples of libraries that enable this are:
+    - llama-cpp-python (LCP)
+    - text-generation-webui (TGW) (from oobabooga or "ooba" for short)
+    Typically these allow spinning up a server that listens on
+    http://localhost:8000/v1
+    and we can continue using our OpenAI-API-based python code, except we
+    set openai.api_base to this URL.
+    These endpoints usually support both /completions and /chat/completions requests.
+    Supporting /chat/completions is more complex because each family of local model
+    has its own (finicky) formatting for turns and roles in a chat.
+    The TGW lib has an extensive set of templates for various model families,
+    and the template is auto-detected from the model, at least for common models,
+    so we can directly use the /chat/completions endpoint, and it works well (at
+    least on llama2 models). However, when in doubt,
+    we can always do our own formatting of the chat history and use the /completions
+    endpoint instead.  This is what we do for LCP models. In this case,
+    we need to set `use_completion_for_chat` to True.
+    With a model served via TGW, for chats we can set this to either True or False
+    (in which case we rely on the TGW templates correctly formatting the chat history).
+    Both of the above libs assume a single model is available at the endpoint.
+    As far as I know, they do not support run-time switching of models.
+    There is another library that we can potentially integrate, `localAI`,
+    which does have model switching, and can be very useful, e.g.
+    when we want different agents to use different models.
+    All of the above considerations are outside of this interface, however.
+    All we care about here is the endpoint url.
+    """
+    # OPENAI_LOCAL.* env vars can be used to set these in .env file or environment
+    api_base: str = "http://localhost:8000/v1"
+    model: str = "local"  # usually not needed
+    model_type: str = "llama2"
+    formatter: None | PromptFormatterConfig = Llama2FormatterConfig()
+    context_length: int = 2048  # default for llama-cpp-python
+    use_chat_for_completion: bool = False
+    use_completion_for_chat: bool = True
 class LLMConfig(BaseSettings):
     type: str = "openai"
+    local: None | LocalModelConfig = None
     timeout: int = 20  # timeout for API requests
     chat_model: Optional[str] = None
     completion_model: Optional[str] = None
@@ -32,6 +85,7 @@ class LLMConfig(BaseSettings):
     # if input length + max_output_tokens > context length of model,
     # we will try shortening requested output
     min_output_tokens: int = 64
+    use_completion_for_chat: bool = False  # use completion model for chat?
     use_chat_for_completion: bool = True  # use chat model for completion?
     stream: bool = False  # stream output from API?
     cache_config: None | RedisCacheConfig | MomentoCacheConfig = None
@@ -223,6 +277,73 @@ class LanguageModel(ABC):
         ).get(config.type, openai)
         return cls(config)  # type: ignore
+    @staticmethod
+    def user_assistant_pairs(lst: List[str]) -> List[Tuple[str, str]]:
+        """
+        Given an even-length sequence of strings, split into a sequence of pairs
+        Args:
+            lst (List[str]): sequence of strings
+        Returns:
+            List[Tuple[str,str]]: sequence of pairs of strings
+        """
+        evens = lst[::2]
+        odds = lst[1::2]
+        return list(zip(evens, odds))
+    @staticmethod
+    def get_chat_history_components(
+        messages: List[LLMMessage],
+    ) -> Tuple[str, List[Tuple[str, str]], str]:
+        """
+        From the chat history, extract system prompt, user-assistant turns, and
+        final user msg.
+        Args:
+            messages (List[LLMMessage]): List of messages in the chat history
+        Returns:
+            Tuple[str, List[Tuple[str,str]], str]:
+                system prompt, user-assistant turns, final user msg
+        """
+        # Handle various degenerate cases
+        messages = [m for m in messages]  # copy
+        DUMMY_SYS_PROMPT = "You are a helpful assistant."
+        DUMMY_USER_PROMPT = "Follow the instructions above."
+        if len(messages) == 0 or messages[0].role != Role.SYSTEM:
+            logger.warning("No system msg, creating dummy system prompt")
+            messages.insert(0, LLMMessage(content=DUMMY_SYS_PROMPT, role=Role.SYSTEM))
+        system_prompt = messages[0].content
+        # now we have messages = [Sys,...]
+        if len(messages) == 1:
+            logger.warning(
+                "Got only system message in chat history, creating dummy user prompt"
+            )
+            messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
+        # now we have messages = [Sys, msg, ...]
+        if messages[1].role != Role.USER:
+            messages.insert(1, LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
+        # now we have messages = [Sys, user, ...]
+        if messages[-1].role != Role.USER:
+            logger.warning(
+                "Last message in chat history is not a user message,"
+                " creating dummy user prompt"
+            )
+            messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
+        # now we have messages = [Sys, user, ..., user]
+        # so we omit the first and last elements and make pairs of user-asst messages
+        conversation = [m.content for m in messages[1:-1]]
+        user_prompt = messages[-1].content
+        pairs = LanguageModel.user_assistant_pairs(conversation)
+        return system_prompt, pairs, user_prompt
     @abstractmethod
     def set_stream(self, stream: bool) -> bool:
         """Enable or disable streaming output from API.

langroid/language_models/config.py ADDED Viewed

@@ -0,0 +1,13 @@
+from pydantic import BaseSettings
+class PromptFormatterConfig(BaseSettings):
+    type: str = "llama2"
+    class Config:
+        env_prefix = "FORMAT_"
+        case_sensitive = False
+class Llama2FormatterConfig(PromptFormatterConfig):
+    use_bos_eos: bool = False

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -22,6 +22,9 @@ from langroid.language_models.base import (
     LLMTokenUsage,
     Role,
 )
+from langroid.language_models.prompt_formatter.base import (
+    PromptFormatter,
+)
 from langroid.language_models.utils import (
     async_retry_with_exponential_backoff,
     retry_with_exponential_backoff,
@@ -50,45 +53,26 @@ class OpenAICompletionModel(str, Enum):
     LOCAL = "local"  # dummy for any local model
-class LocalModelConfig(BaseModel):
-    """
-    Configuration for local model available via
-    an OpenAI-compatible API.
-    """
-    # OPENAI_LOCAL.API_BASE env var can be used to set this
-    api_base: str = "http://localhost:8000/v1"
-    # OPENAI_LOCAL.CONTEXT_LENGTH env var can be used to set this
-    context_length: int = 2048  # default for llama-cpp-python
 class OpenAIGPTConfig(LLMConfig):
     type: str = "openai"
-    # This allows local configs to be set via OPENAI_LOCAL.* env vars
-    local: LocalModelConfig = LocalModelConfig()
     api_base: str | None = None  # used for local or other non-OpenAI models
     max_output_tokens: int = 1024
     min_output_tokens: int = 64
     timeout: int = 20
     temperature: float = 0.2
-    chat_model: OpenAIChatModel = OpenAIChatModel.GPT4
-    completion_model: OpenAICompletionModel = OpenAICompletionModel.GPT4
+    chat_model: str | OpenAIChatModel = OpenAIChatModel.GPT4
+    completion_model: str | OpenAICompletionModel = OpenAICompletionModel.GPT4
     context_length: Dict[str, int] = {
         OpenAIChatModel.GPT3_5_TURBO: 4096,
         OpenAIChatModel.GPT4: 8192,
         OpenAIChatModel.GPT4_NOFUNC: 8192,
         OpenAICompletionModel.TEXT_DA_VINCI_003: 4096,
-        # 2048 is default in llama-cpp-python, but can be set
-        # via cmd line, e.g.
-        # python3 -m llama-cpp.server --n_ctx 4096
-        OpenAICompletionModel.LOCAL: 2048,
     }
     cost_per_1k_tokens: Dict[str, Tuple[float, float]] = {
         # (input/prompt cost, output/completion cost)
         OpenAIChatModel.GPT3_5_TURBO: (0.0015, 0.002),
         OpenAIChatModel.GPT4: (0.03, 0.06),  # 8K context
         OpenAIChatModel.GPT4_NOFUNC: (0.03, 0.06),
-        OpenAIChatModel.LOCAL: (0.0, 0.0),
     }
     # all of the non-dict vars above can be set via env vars,
@@ -122,10 +106,16 @@ class OpenAIGPT(LanguageModel):
         if settings.nofunc:
             self.chat_model = OpenAIChatModel.GPT4_NOFUNC
         self.api_base: str | None = None
-        if config.chat_model == OpenAIChatModel.LOCAL:
+        if config.local:
+            self.config.chat_model = config.local.model
+            self.config.use_completion_for_chat = config.local.use_completion_for_chat
+            self.config.use_chat_for_completion = config.local.use_chat_for_completion
             self.api_key = "sx-xxx"
             self.api_base = config.local.api_base
-            config.context_length = {OpenAIChatModel.LOCAL: config.local.context_length}
+            config.context_length = {config.local.model: config.local.context_length}
+            config.cost_per_1k_tokens = {
+                config.local.model: (0.0, 0.0),
+            }
         else:
             # TODO: get rid of this and add `api_key` to the OpenAIGPTConfig
             # so we can get it from the OPENAI_API_KEY env var
@@ -433,6 +423,24 @@ class OpenAIGPT(LanguageModel):
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
+        if self.config.use_completion_for_chat:
+            # only makes sense for local models
+            if self.config.local is None or self.config.local.formatter is None:
+                raise ValueError(
+                    """
+                    `formatter` must be specified in config to use completion for chat.
+                    """
+                )
+            formatter = PromptFormatter.create(self.config.local.formatter)
+            if isinstance(messages, str):
+                messages = [
+                    LLMMessage(
+                        role=Role.SYSTEM, content="You are a helpful assistant."
+                    ),
+                    LLMMessage(role=Role.USER, content=messages),
+                ]
+            prompt = formatter.format(messages)
+            return self.generate(prompt=prompt, max_tokens=max_tokens)
         try:
             return self._chat(messages, max_tokens, functions, function_call)
         except Exception as e:

langroid/language_models/prompt_formatter/__init__.py ADDED Viewed

File without changes

langroid/language_models/prompt_formatter/base.py ADDED Viewed

@@ -0,0 +1,42 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import List
+from langroid.language_models.base import LLMMessage
+from langroid.language_models.config import PromptFormatterConfig
+logger = logging.getLogger(__name__)
+class PromptFormatter(ABC):
+    """
+    Abstract base class for a prompt formatter
+    """
+    def __init__(self, config: PromptFormatterConfig):
+        self.config = config
+    @staticmethod
+    def create(config: PromptFormatterConfig) -> "PromptFormatter":
+        from langroid.language_models.prompt_formatter.llama2_formatter import (
+            Llama2Formatter,
+        )
+        formatter_class = dict(llama2=Llama2Formatter).get(config.type, Llama2Formatter)
+        return formatter_class(config)
+    @abstractmethod
+    def format(self, messages: List[LLMMessage]) -> str:
+        """
+        Convert sequence of messages (system, user, assistant, user, assistant...user)
+            to a single prompt formatted according to the specific format type,
+            to be used in a /completions endpoint.
+        Args:
+            messages (List[LLMMessage]): chat history as a sequence of messages
+        Returns:
+            (str): formatted version of chat history
+        """
+        pass

langroid/language_models/prompt_formatter/llama2_formatter.py ADDED Viewed

@@ -0,0 +1,75 @@
+import logging
+from typing import List, Tuple
+from langroid.language_models.base import LanguageModel, LLMMessage
+from langroid.language_models.config import Llama2FormatterConfig
+from langroid.language_models.prompt_formatter.base import PromptFormatter
+logger = logging.getLogger(__name__)
+BOS: str = ""
+EOS: str = ""
+B_INST: str = "[INST]"
+E_INST: str = "[/INST]"
+B_SYS: str = "<<SYS>>\n"
+E_SYS: str = "\n<</SYS>>\n\n"
+SPECIAL_TAGS: List[str] = [B_INST, E_INST, BOS, EOS, "<<SYS>>", "<</SYS>>"]
+class Llama2Formatter(PromptFormatter):
+    def __int__(self, config: Llama2FormatterConfig) -> None:
+        super().__init__(config)
+        self.config: Llama2FormatterConfig = config
+    def format(self, messages: List[LLMMessage]) -> str:
+        sys_msg, chat_msgs, user_msg = LanguageModel.get_chat_history_components(
+            messages
+        )
+        return self._get_prompt_from_components(sys_msg, chat_msgs, user_msg)
+    def _get_prompt_from_components(
+        self,
+        system_prompt: str,
+        chat_history: List[Tuple[str, str]],
+        user_message: str,
+    ) -> str:
+        """
+        For llama2 models, convert chat history into a single
+        prompt for Llama2 models, for use in the /completions endpoint
+        (as opposed to the /chat/completions endpoint).
+        See:
+        https://www.reddit.com/r/LocalLLaMA/comments/155po2p/get_llama_2_prompt_format_right/
+        https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L44
+        Args:
+            system_prompt (str): system prompt, typically specifying role/task.
+            chat_history (List[Tuple[str,str]]): List of (user, assistant) pairs
+            user_message (str): user message, at the end of the chat, i.e. the message
+                for which we want to generate a response.
+        Returns:
+            str: Prompt for Llama2 models
+        Typical structure of the formatted prompt:
+        Note important that the first [INST], [/INST] surrounds the system prompt,
+        together with the first user message. A lot of libs seem to miss this detail.
+        <s>[INST] <<SYS>>
+        You are are a helpful... bla bla.. assistant
+        <</SYS>>
+        Hi there! [/INST] Hello! How can I help you today? </s><s>[INST]
+        What is a neutron star? [/INST] A neutron star is a ... </s><s>
+        [INST] Okay cool, thank you! [/INST] You're welcome! </s><s>
+        [INST] Ah, I have one more question.. [/INST]
+        """
+        bos = BOS if self.config.use_bos_eos else ""
+        eos = EOS if self.config.use_bos_eos else ""
+        text = f"{bos}{B_INST} {B_SYS}{system_prompt}{E_SYS}"
+        for user_input, response in chat_history:
+            text += (
+                f"{user_input.strip()} {E_INST} {response.strip()} {eos}{bos} {B_INST} "
+            )
+        text += f"{user_message.strip()} {E_INST}"
+        return text

{langroid-0.1.59.dist-info → langroid-0.1.60.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.1.59
+Version: 0.1.60
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani

{langroid-0.1.59.dist-info → langroid-0.1.60.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 langroid/__init__.py,sha256=sEKJ_5WJBAMZApevfeE3gxLK-eotVzJMJlT83G0rAko,30
 langroid/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/base.py,sha256=bnqa_PZsw1_RWDv1w67g1rMrhbGTdt_mTPWcZ_uAZIk,26530
-langroid/agent/chat_agent.py,sha256=Sma0-5XPHDzBOcduthwwlWBmkBgqpk8gGzStF8rcrps,22643
+langroid/agent/chat_agent.py,sha256=eTXkF8ENugOhr39uoaat8yGqtDpkZGcugM36gveSEek,22755
 langroid/agent/chat_document.py,sha256=apaYj38sDu7ALCnsA8tJwoj3Z8zLNmIsNPd4-IujnGk,6153
 langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
@@ -33,8 +33,12 @@ langroid/embedding_models/clustering.py,sha256=tZWElUqXl9Etqla0FAa7og96iDKgjqWju
 langroid/embedding_models/models.py,sha256=1xcv9hqmCTsbUbS8v7XeZRsf25Tu79JUoSipIYpvNoo,2765
 langroid/language_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/language_models/azure_openai.py,sha256=9NLr9s9l7JlCHSuMooxYLLgs1d04IwE_bO7r22bhrg8,3458
-langroid/language_models/base.py,sha256=CHSMWJd9kFwMsI38pLmFcPtgkBUUQ3a47sj77kD8-bw,14743
-langroid/language_models/openai_gpt.py,sha256=uTIa30d-ilo4VNlBdUdr9iE3fZo3szz-goFWiKQykNM,22953
+langroid/language_models/base.py,sha256=zHCZIEmIk-sFMq7GWooZe8qq4GjaJ3YRhTzTC4irgGM,19931
+langroid/language_models/config.py,sha256=PXcmEUq52GCDj2sekt8F9E1flWyyNjP2S0LTRs7T6Kg,269
+langroid/language_models/openai_gpt.py,sha256=f9oegEQ8jeQ6emS-Oh5LE6lbuzRsrsOns_QGvdMuQKk,23486
+langroid/language_models/prompt_formatter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langroid/language_models/prompt_formatter/base.py,sha256=2y_GcwhstvB5ih3haS7l5Fv79jVnFJ_vEw1jqWJzB9k,1247
+langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=2et_OIaDbFRf5fzBUki3E4_Di9xH-HwTxt9MMNINoXs,2892
 langroid/language_models/utils.py,sha256=rmnSn-sJ3aKl_wBdeLPkck0Li4Ed6zkCxZYYl7n1V34,4668
 langroid/mytypes.py,sha256=YA42IJcooJnTxAwk-B4FmZ1hqzIIF1ZZKcpUKzBTGGo,1537
 langroid/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -78,7 +82,7 @@ langroid/vector_store/base.py,sha256=QZx3NUNwf2I0r3A7iuoUHIRGbqt_pFGD0hq1R-Yg8iM
 langroid/vector_store/chromadb.py,sha256=s5pQkKjaMP-Tt5A8M10EInFzttaALPbJAq7q4gf0TKg,5235
 langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
 langroid/vector_store/qdrantdb.py,sha256=KRvIIj1IZG2zFqejofMnRs2hT86B-27LgBEnuczdqOU,9072
-langroid-0.1.59.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.1.59.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
-langroid-0.1.59.dist-info/METADATA,sha256=GyR3K0U7ocxuI_I19-hD3mnsuAC3HNt1x8ebD3fAEPE,35745
-langroid-0.1.59.dist-info/RECORD,,
+langroid-0.1.60.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.1.60.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
+langroid-0.1.60.dist-info/METADATA,sha256=-DEq-l5sI_DEuNmYzEQ58cbxak_MNtwoVS-Dw1lreBE,35745
+langroid-0.1.60.dist-info/RECORD,,

{langroid-0.1.59.dist-info → langroid-0.1.60.dist-info}/LICENSE RENAMED Viewed

File without changes

{langroid-0.1.59.dist-info → langroid-0.1.60.dist-info}/WHEEL RENAMED Viewed

File without changes

langroid 0.1.59__py3-none-any.whl → 0.1.60__py3-none-any.whl

langroid 0.1.59py3-none-any.whl → 0.1.60py3-none-any.whl