PyPI - langroid - Versions diffs - 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

langroid/__init__.py +95 -0
langroid/agent/__init__.py +40 -0
langroid/agent/base.py +222 -91
langroid/agent/batch.py +264 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +247 -101
langroid/agent/chat_document.py +41 -4
langroid/agent/openai_assistant.py +842 -0
langroid/agent/special/__init__.py +50 -0
langroid/agent/special/doc_chat_agent.py +837 -141
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +127 -0
langroid/agent/special/retriever_agent.py +32 -198
langroid/agent/special/sql/__init__.py +11 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +22 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +475 -122
langroid/agent/tool_message.py +75 -13
langroid/agent/tools/__init__.py +13 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +16 -29
langroid/agent/tools/run_python_code.py +60 -0
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/agent/tools/segment_extract_tool.py +36 -0
langroid/cachedb/__init__.py +9 -0
langroid/cachedb/base.py +22 -2
langroid/cachedb/momento_cachedb.py +26 -2
langroid/cachedb/redis_cachedb.py +78 -11
langroid/embedding_models/__init__.py +34 -0
langroid/embedding_models/base.py +21 -2
langroid/embedding_models/models.py +120 -18
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +45 -0
langroid/language_models/azure_openai.py +80 -27
langroid/language_models/base.py +117 -12
langroid/language_models/config.py +5 -0
langroid/language_models/openai_assistants.py +3 -0
langroid/language_models/openai_gpt.py +558 -174
langroid/language_models/prompt_formatter/__init__.py +15 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +18 -21
langroid/mytypes.py +25 -8
langroid/parsing/__init__.py +46 -0
langroid/parsing/document_parser.py +260 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +122 -59
langroid/parsing/repo_loader.py +114 -52
langroid/parsing/search.py +68 -63
langroid/parsing/spider.py +3 -2
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -11
langroid/parsing/urls.py +85 -37
langroid/parsing/utils.py +298 -4
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +11 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +17 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/algorithms/graph.py +103 -0
langroid/utils/configuration.py +36 -5
langroid/utils/constants.py +4 -0
langroid/utils/globals.py +2 -2
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +21 -0
langroid/utils/output/printing.py +47 -1
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +616 -2
langroid/utils/system.py +98 -0
langroid/vector_store/__init__.py +40 -0
langroid/vector_store/base.py +203 -6
langroid/vector_store/chromadb.py +59 -32
langroid/vector_store/lancedb.py +463 -0
langroid/vector_store/meilisearch.py +10 -7
langroid/vector_store/momento.py +262 -0
langroid/vector_store/qdrantdb.py +104 -22
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
langroid-0.1.219.dist-info/RECORD +127 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.85.dist-info/RECORD +0 -94
/langroid/{scripts → agent/callbacks}/__init__.py +0 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -1,16 +1,31 @@
 import ast
 import hashlib
+import json
 import logging
+import os
 import sys
+import warnings
 from enum import Enum
-from typing import Any, Dict, List, Optional, Tuple, Type, Union, no_type_check
+from functools import cache
+from itertools import chain
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    no_type_check,
+)
-import litellm
 import openai
-from litellm import acompletion as litellm_acompletion
-from litellm import completion as litellm_completion
+from httpx import Timeout
+from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel
 from rich import print
+from rich.markup import escape
 from langroid.cachedb.momento_cachedb import MomentoCache, MomentoCacheConfig
 from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
@@ -24,8 +39,10 @@ from langroid.language_models.base import (
     LLMTokenUsage,
     Role,
 )
-from langroid.language_models.prompt_formatter.base import (
-    PromptFormatter,
+from langroid.language_models.config import HFPromptFormatterConfig
+from langroid.language_models.prompt_formatter.hf_formatter import (
+    HFFormatter,
+    find_hf_formatter,
 )
 from langroid.language_models.utils import (
     async_retry_with_exponential_backoff,
@@ -33,44 +50,157 @@ from langroid.language_models.utils import (
 )
 from langroid.utils.configuration import settings
 from langroid.utils.constants import NO_ANSWER, Colors
+from langroid.utils.system import friendly_error
 logging.getLogger("openai").setLevel(logging.ERROR)
-litellm.telemetry = False
+if "OLLAMA_HOST" in os.environ:
+    OLLAMA_BASE_URL = f"http://{os.environ['OLLAMA_HOST']}/v1"
+else:
+    OLLAMA_BASE_URL = "http://localhost:11434/v1"
+OLLAMA_API_KEY = "ollama"
+DUMMY_API_KEY = "xxx"
 class OpenAIChatModel(str, Enum):
     """Enum for OpenAI Chat models"""
-    GPT3_5_TURBO = "gpt-3.5-turbo-0613"
-    GPT4_NOFUNC = "gpt-4"  # before function_call API
+    GPT3_5_TURBO = "gpt-3.5-turbo-1106"
     GPT4 = "gpt-4"
+    GPT4_32K = "gpt-4-32k"
+    GPT4_TURBO = "gpt-4-turbo-preview"
 class OpenAICompletionModel(str, Enum):
     """Enum for OpenAI Completion models"""
     TEXT_DA_VINCI_003 = "text-davinci-003"  # deprecated
-    TEXT_ADA_001 = "text-ada-001"  # deprecated
-    GPT4 = "gpt-4"  # only works on chat-completion endpoint
+    GPT3_5_TURBO_INSTRUCT = "gpt-3.5-turbo-instruct"
 _context_length: Dict[str, int] = {
     # can add other non-openAI models here
-    OpenAIChatModel.GPT3_5_TURBO: 4096,
+    OpenAIChatModel.GPT3_5_TURBO: 16_385,
     OpenAIChatModel.GPT4: 8192,
-    OpenAIChatModel.GPT4_NOFUNC: 8192,
+    OpenAIChatModel.GPT4_32K: 32_768,
+    OpenAIChatModel.GPT4_TURBO: 128_000,
     OpenAICompletionModel.TEXT_DA_VINCI_003: 4096,
 }
 _cost_per_1k_tokens: Dict[str, Tuple[float, float]] = {
     # can add other non-openAI models here.
     # model => (prompt cost, generation cost) in USD
-    OpenAIChatModel.GPT3_5_TURBO: (0.0015, 0.002),
+    OpenAIChatModel.GPT3_5_TURBO: (0.001, 0.002),
     OpenAIChatModel.GPT4: (0.03, 0.06),  # 8K context
-    OpenAIChatModel.GPT4_NOFUNC: (0.03, 0.06),
+    OpenAIChatModel.GPT4_TURBO: (0.01, 0.03),  # 128K context
 }
+openAIChatModelPreferenceList = [
+    OpenAIChatModel.GPT4_TURBO,
+    OpenAIChatModel.GPT4,
+    OpenAIChatModel.GPT3_5_TURBO,
+]
+openAICompletionModelPreferenceList = [
+    OpenAICompletionModel.GPT3_5_TURBO_INSTRUCT,
+    OpenAICompletionModel.TEXT_DA_VINCI_003,
+]
+if "OPENAI_API_KEY" in os.environ:
+    try:
+        available_models = set(map(lambda m: m.id, OpenAI().models.list()))
+    except openai.AuthenticationError as e:
+        if settings.debug:
+            logging.warning(
+                f"""
+            OpenAI Authentication Error: {e}.
+            ---
+            If you intended to use an OpenAI Model, you should fix this,
+            otherwise you can ignore this warning.
+            """
+            )
+        available_models = set()
+    except Exception as e:
+        if settings.debug:
+            logging.warning(
+                f"""
+            Error while fetching available OpenAI models: {e}.
+            Proceeding with an empty set of available models.
+            """
+            )
+        available_models = set()
+else:
+    available_models = set()
+defaultOpenAIChatModel = next(
+    chain(
+        filter(
+            lambda m: m.value in available_models,
+            openAIChatModelPreferenceList,
+        ),
+        [OpenAIChatModel.GPT4_TURBO],
+    )
+)
+defaultOpenAICompletionModel = next(
+    chain(
+        filter(
+            lambda m: m.value in available_models,
+            openAICompletionModelPreferenceList,
+        ),
+        [OpenAICompletionModel.GPT3_5_TURBO_INSTRUCT],
+    )
+)
+class AccessWarning(Warning):
+    pass
+@cache
+def gpt_3_5_warning() -> None:
+    warnings.warn(
+        """
+        GPT-4 is not available, falling back to GPT-3.5.
+        Examples may not work properly and unexpected behavior may occur.
+        Adjustments to prompts may be necessary.
+        """,
+        AccessWarning,
+    )
+def noop() -> None:
+    """Does nothing."""
+    return None
+class OpenAICallParams(BaseModel):
+    """
+    Various params that can be sent to an OpenAI API chat-completion call.
+    When specified, any param here overrides the one with same name in the
+    OpenAIGPTConfig.
+    """
+    max_tokens: int = 1024
+    temperature: float = 0.2
+    frequency_penalty: float | None = 0.0  # between -2 and 2
+    presence_penalty: float | None = 0.0  # between -2 and 2
+    response_format: Dict[str, str] | None = None
+    logit_bias: Dict[int, float] | None = None  # token_id -> bias
+    logprobs: bool = False
+    top_p: int | None = 1
+    top_logprobs: int | None = None  # if int, requires logprobs=True
+    n: int = 1  # how many completions to generate (n > 1 is NOT handled now)
+    stop: str | List[str] | None = None  # (list of) stop sequence(s)
+    seed: int | None = 42
+    user: str | None = None  # user id for tracking
+    def to_dict_exclude_none(self) -> Dict[str, Any]:
+        return {k: v for k, v in self.dict().items() if v is not None}
 class OpenAIGPTConfig(LLMConfig):
     """
     Class for any LLM with an OpenAI-like API: besides the OpenAI models this includes:
@@ -81,17 +211,51 @@ class OpenAIGPTConfig(LLMConfig):
     """
     type: str = "openai"
-    api_key: str = ""  # CAUTION: set this ONLY via env var OPENAI_API_KEY
+    api_key: str = DUMMY_API_KEY  # CAUTION: set this ONLY via env var OPENAI_API_KEY
+    organization: str = ""
     api_base: str | None = None  # used for local or other non-OpenAI models
     litellm: bool = False  # use litellm api?
+    ollama: bool = False  # use ollama's OpenAI-compatible endpoint?
     max_output_tokens: int = 1024
-    min_output_tokens: int = 64
+    min_output_tokens: int = 1
     use_chat_for_completion = True  # do not change this, for OpenAI models!
     timeout: int = 20
     temperature: float = 0.2
+    seed: int | None = 42
+    params: OpenAICallParams | None = None
     # these can be any model name that is served at an OpenAI-compatible API end point
-    chat_model: str = OpenAIChatModel.GPT4
-    completion_model: str = OpenAICompletionModel.GPT4
+    chat_model: str = defaultOpenAIChatModel
+    completion_model: str = defaultOpenAICompletionModel
+    run_on_first_use: Callable[[], None] = noop
+    # a string that roughly matches a HuggingFace chat_template,
+    # e.g. "mistral-instruct-v0.2 (a fuzzy search is done to find the closest match)
+    formatter: str | None = None
+    hf_formatter: HFFormatter | None = None
+    def __init__(self, **kwargs) -> None:  # type: ignore
+        local_model = "api_base" in kwargs and kwargs["api_base"] is not None
+        chat_model = kwargs.get("chat_model", "")
+        local_prefixes = ["local/", "litellm/", "ollama/"]
+        if any(chat_model.startswith(prefix) for prefix in local_prefixes):
+            local_model = True
+        warn_gpt_3_5 = (
+            "chat_model" not in kwargs.keys()
+            and not local_model
+            and defaultOpenAIChatModel == OpenAIChatModel.GPT3_5_TURBO
+        )
+        if warn_gpt_3_5:
+            existing_hook = kwargs.get("run_on_first_use", noop)
+            def with_warning() -> None:
+                existing_hook()
+                gpt_3_5_warning()
+            kwargs["run_on_first_use"] = with_warning
+        super().__init__(**kwargs)
     # all of the vars above can be set via env vars,
     # by upper-casing the name and prefixing with OPENAI_, e.g.
@@ -108,6 +272,20 @@ class OpenAIGPTConfig(LLMConfig):
         """
         if not self.litellm:
             return
+        try:
+            import litellm
+        except ImportError:
+            raise ImportError(
+                """
+                litellm not installed. Please install it via:
+                pip install litellm.
+                Or when installing langroid, install it with the `litellm` extra:
+                pip install langroid[litellm]
+                """
+            )
+        litellm.telemetry = False
+        litellm.drop_params = True  # drop un-supported params without crashing
+        self.seed = None  # some local mdls don't support seed
         keys_dict = litellm.validate_environment(self.chat_model)
         missing_keys = keys_dict.get("missing_keys", [])
         if len(missing_keys) > 0:
@@ -148,57 +326,194 @@ class OpenAIResponse(BaseModel):
     usage: Dict  # type: ignore
-# Define a class for OpenAI GPT-3 that extends the base class
+def litellm_logging_fn(model_call_dict: Dict[str, Any]) -> None:
+    """Logging function for litellm"""
+    try:
+        api_input_dict = model_call_dict.get("additional_args", {}).get(
+            "complete_input_dict"
+        )
+        if api_input_dict is not None:
+            text = escape(json.dumps(api_input_dict, indent=2))
+            print(
+                f"[grey37]LITELLM: {text}[/grey37]",
+            )
+    except Exception:
+        pass
+# Define a class for OpenAI GPT models that extends the base class
 class OpenAIGPT(LanguageModel):
     """
     Class for OpenAI LLMs
     """
-    def __init__(self, config: OpenAIGPTConfig):
+    def __init__(self, config: OpenAIGPTConfig = OpenAIGPTConfig()):
         """
         Args:
             config: configuration for openai-gpt model
         """
+        # copy the config to avoid modifying the original
+        config = config.copy()
         super().__init__(config)
         self.config: OpenAIGPTConfig = config
-        if settings.nofunc:
-            self.config.chat_model = OpenAIChatModel.GPT4_NOFUNC
+        # Run the first time the model is used
+        self.run_on_first_use = cache(self.config.run_on_first_use)
         # global override of chat_model,
         # to allow quick testing with other models
         if settings.chat_model != "":
             self.config.chat_model = settings.chat_model
+            self.config.completion_model = settings.chat_model
+        if len(parts := self.config.chat_model.split("//")) > 1:
+            # there is a formatter specified, e.g.
+            # "litellm/ollama/mistral//hf" or
+            # "local/localhost:8000/v1//mistral-instruct-v0.2"
+            formatter = parts[1]
+            self.config.chat_model = parts[0]
+            if formatter == "hf":
+                # e.g. "litellm/ollama/mistral//hf" -> "litellm/ollama/mistral"
+                formatter = find_hf_formatter(self.config.chat_model)
+                if formatter != "":
+                    # e.g. "mistral"
+                    self.config.formatter = formatter
+                    logging.warning(
+                        f"""
+                        Using completions (not chat) endpoint with HuggingFace
+                        chat_template for {formatter} for
+                        model {self.config.chat_model}
+                        """
+                    )
+            else:
+                # e.g. "local/localhost:8000/v1//mistral-instruct-v0.2"
+                self.config.formatter = formatter
+        if self.config.formatter is not None:
+            self.config.hf_formatter = HFFormatter(
+                HFPromptFormatterConfig(model_name=self.config.formatter)
+            )
         # if model name starts with "litellm",
         # set the actual model name by stripping the "litellm/" prefix
         # and set the litellm flag to True
-        if self.config.chat_model.startswith("litellm"):
+        if self.config.chat_model.startswith("litellm/") or self.config.litellm:
+            # e.g. litellm/ollama/mistral
             self.config.litellm = True
-            self.config.chat_model = self.config.chat_model.split("/", 1)[1]
-            # litellm/ollama/llama2 => ollama/llama2 for example
-        self.api_base: str | None = config.api_base
+            self.api_base = self.config.api_base
+            if self.config.chat_model.startswith("litellm/"):
+                # strip the "litellm/" prefix
+                # e.g. litellm/ollama/llama2 => ollama/llama2
+                self.config.chat_model = self.config.chat_model.split("/", 1)[1]
+        elif self.config.chat_model.startswith("local/"):
+            # expect this to be of the form "local/localhost:8000/v1",
+            # depending on how the model is launched locally.
+            # In this case the model served locally behind an OpenAI-compatible API
+            # so we can just use `openai.*` methods directly,
+            # and don't need a adaptor library like litellm
+            self.config.litellm = False
+            self.config.seed = None  # some models raise an error when seed is set
+            # Extract the api_base from the model name after the "local/" prefix
+            self.api_base = self.config.chat_model.split("/", 1)[1]
+            if not self.api_base.startswith("http"):
+                self.api_base = "http://" + self.api_base
+        elif self.config.chat_model.startswith("ollama/"):
+            self.config.ollama = True
+            self.api_base = OLLAMA_BASE_URL
+            self.api_key = OLLAMA_API_KEY
+            self.config.chat_model = self.config.chat_model.replace("ollama/", "")
+        else:
+            self.api_base = self.config.api_base
+        if settings.chat_model != "":
+            # if we're overriding chat model globally, set completion model to same
+            self.config.completion_model = self.config.chat_model
+        if self.config.formatter is not None:
+            # we want to format chats -> completions using this specific formatter
+            self.config.use_completion_for_chat = True
+            self.config.completion_model = self.config.chat_model
+        if self.config.use_completion_for_chat:
+            self.config.use_chat_for_completion = False
         # NOTE: The api_key should be set in the .env file, or via
         # an explicit `export OPENAI_API_KEY=xxx` or `setenv OPENAI_API_KEY xxx`
         # Pydantic's BaseSettings will automatically pick it up from the
         # .env file
-        self.api_key = config.api_key
+        # The config.api_key is ignored when not using an OpenAI model
+        if self.is_openai_completion_model() or self.is_openai_chat_model():
+            self.api_key = config.api_key
+            if self.api_key == DUMMY_API_KEY:
+                self.api_key = os.getenv("OPENAI_API_KEY", DUMMY_API_KEY)
+        else:
+            self.api_key = DUMMY_API_KEY
+        self.client = OpenAI(
+            api_key=self.api_key,
+            base_url=self.api_base,
+            organization=self.config.organization,
+            timeout=Timeout(self.config.timeout),
+        )
+        self.async_client = AsyncOpenAI(
+            api_key=self.api_key,
+            organization=self.config.organization,
+            base_url=self.api_base,
+            timeout=Timeout(self.config.timeout),
+        )
         self.cache: MomentoCache | RedisCache
         if settings.cache_type == "momento":
-            config.cache_config = MomentoCacheConfig()
+            if config.cache_config is None or isinstance(
+                config.cache_config, RedisCacheConfig
+            ):
+                # switch to fresh momento config if needed
+                config.cache_config = MomentoCacheConfig()
             self.cache = MomentoCache(config.cache_config)
-        else:
-            config.cache_config = RedisCacheConfig()
+        elif "redis" in settings.cache_type:
+            if config.cache_config is None or isinstance(
+                config.cache_config, MomentoCacheConfig
+            ):
+                # switch to fresh redis config if needed
+                config.cache_config = RedisCacheConfig(
+                    fake="fake" in settings.cache_type
+                )
+            if "fake" in settings.cache_type:
+                # force use of fake redis if global cache_type is "fakeredis"
+                config.cache_config.fake = True
             self.cache = RedisCache(config.cache_config)
+        else:
+            raise ValueError(
+                f"Invalid cache type {settings.cache_type}. "
+                "Valid types are momento, redis, fakeredis"
+            )
         self.config._validate_litellm()
-    def _is_openai_chat_model(self) -> bool:
+    def _openai_api_call_params(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Prep the params to be sent to the OpenAI API
+        (or any OpenAI-compatible API, e.g. from Ooba or LmStudio)
+        for chat-completion.
+        Order of priority:
+        - (1) Params (mainly max_tokens) in the chat/achat/generate/agenerate call
+                (these are passed in via kwargs)
+        - (2) Params in OpenAIGPTConfi.params (of class OpenAICallParams)
+        - (3) Specific Params in OpenAIGPTConfig (just temperature for now)
+        """
+        params = dict(
+            temperature=self.config.temperature,
+        )
+        if self.config.params is not None:
+            params.update(self.config.params.to_dict_exclude_none())
+        params.update(kwargs)
+        return params
+    def is_openai_chat_model(self) -> bool:
         openai_chat_models = [e.value for e in OpenAIChatModel]
         return self.config.chat_model in openai_chat_models
-    def _is_openai_completion_model(self) -> bool:
+    def is_openai_completion_model(self) -> bool:
         openai_completion_models = [e.value for e in OpenAICompletionModel]
         return self.config.completion_model in openai_completion_models
@@ -266,44 +581,60 @@ class OpenAIGPT(LanguageModel):
         - function_name: name of the function
         - function_args: args of the function
         """
+        # convert event obj (of type ChatCompletionChunk) to dict so rest of code,
+        # which expects dicts, works as it did before switching to openai v1.x
+        if not isinstance(event, dict):
+            event = event.model_dump()
+        choices = event.get("choices", [{}])
+        if len(choices) == 0:
+            choices = [{}]
         event_args = ""
         event_fn_name = ""
+        # The first two events in the stream of Azure OpenAI is useless.
+        # In the 1st: choices list is empty, in the 2nd: the dict delta has null content
         if chat:
-            delta = event["choices"][0]["delta"]
-            if "function_call" in delta:
-                if "name" in delta.function_call:
-                    event_fn_name = delta.function_call["name"]
-                if "arguments" in delta.function_call:
-                    event_args = delta.function_call["arguments"]
+            delta = choices[0].get("delta", {})
             event_text = delta.get("content", "")
+            if "function_call" in delta and delta["function_call"] is not None:
+                if "name" in delta["function_call"]:
+                    event_fn_name = delta["function_call"]["name"]
+                if "arguments" in delta["function_call"]:
+                    event_args = delta["function_call"]["arguments"]
         else:
-            event_text = event["choices"][0]["text"]
+            event_text = choices[0]["text"]
         if event_text:
             completion += event_text
             if not is_async:
                 sys.stdout.write(Colors().GREEN + event_text)
                 sys.stdout.flush()
+                self.config.streamer(event_text)
         if event_fn_name:
             function_name = event_fn_name
             has_function = True
             if not is_async:
                 sys.stdout.write(Colors().GREEN + "FUNC: " + event_fn_name + ": ")
                 sys.stdout.flush()
+                self.config.streamer(event_fn_name)
         if event_args:
             function_args += event_args
             if not is_async:
                 sys.stdout.write(Colors().GREEN + event_args)
                 sys.stdout.flush()
-        if event["choices"][0].get("finish_reason", "") in ["stop", "function_call"]:
+                self.config.streamer(event_args)
+        if choices[0].get("finish_reason", "") in ["stop", "function_call"]:
             # for function_call, finish_reason does not necessarily
             # contain "function_call" as mentioned in the docs.
             # So we check for "stop" or "function_call" here.
             return True, has_function, function_name, function_args, completion
         return False, has_function, function_name, function_args, completion
+    @retry_with_exponential_backoff
     def _stream_response(  # type: ignore
         self, response, chat: bool = False
-    ) -> Tuple[LLMResponse, OpenAIResponse]:
+    ) -> Tuple[LLMResponse, Dict[str, Any]]:
         """
         Grab and print streaming response from API.
         Args:
@@ -312,7 +643,7 @@ class OpenAIGPT(LanguageModel):
         Returns:
             Tuple consisting of:
                 LLMResponse object (with message, usage),
-                OpenAIResponse object (with choices, usage)
+                Dict version of OpenAIResponse object (with choices, usage)
         """
         completion = ""
@@ -352,9 +683,10 @@ class OpenAIGPT(LanguageModel):
             is_async=False,
         )
+    @async_retry_with_exponential_backoff
     async def _stream_response_async(  # type: ignore
         self, response, chat: bool = False
-    ) -> Tuple[LLMResponse, OpenAIResponse]:
+    ) -> Tuple[LLMResponse, Dict[str, Any]]:
         """
         Grab and print streaming response from API.
         Args:
@@ -411,7 +743,7 @@ class OpenAIGPT(LanguageModel):
         function_args: str = "",
         function_name: str = "",
         is_async: bool = False,
-    ) -> Tuple[LLMResponse, OpenAIResponse]:
+    ) -> Tuple[LLMResponse, Dict[str, Any]]:
         # check if function_call args are valid, if not,
         # treat this as a normal msg, not a function call
         args = {}
@@ -446,7 +778,7 @@ class OpenAIGPT(LanguageModel):
             choices=[msg],
             usage=dict(total_tokens=0),
         )
-        return (  # type: ignore
+        return (
             LLMResponse(
                 message=completion,
                 cached=False,
@@ -455,6 +787,13 @@ class OpenAIGPT(LanguageModel):
             openai_response.dict(),
         )
+    def _cache_store(self, k: str, v: Any) -> None:
+        try:
+            self.cache.store(k, v)
+        except Exception as e:
+            logging.error(f"Error in OpenAIGPT._cache_store: {e}")
+            pass
     def _cache_lookup(self, fn_name: str, **kwargs: Dict[str, Any]) -> Tuple[str, Any]:
         # Use the kwargs as the cache key
         sorted_kwargs_str = str(sorted(kwargs.items()))
@@ -467,7 +806,12 @@ class OpenAIGPT(LanguageModel):
             # when caching disabled, return the hashed_key and none result
             return hashed_key, None
         # Try to get the result from the cache
-        return hashed_key, self.cache.retrieve(hashed_key)
+        try:
+            cached_val = self.cache.retrieve(hashed_key)
+        except Exception as e:
+            logging.error(f"Error in OpenAIGPT._cache_lookup: {e}")
+            return hashed_key, None
+        return hashed_key, cached_val
     def _cost_chat_model(self, prompt: int, completion: int) -> float:
         price = self.chat_cost()
@@ -497,24 +841,22 @@ class OpenAIGPT(LanguageModel):
             prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cost=cost
         )
-    def generate(self, prompt: str, max_tokens: int) -> LLMResponse:
+    def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
+        self.run_on_first_use()
         try:
             return self._generate(prompt, max_tokens)
         except Exception as e:
             # capture exceptions not handled by retry, so we don't crash
-            err_msg = str(e)[:500]
-            logging.error(f"OpenAI API error: {err_msg}")
+            logging.error(friendly_error(e, "Error in OpenAIGPT.generate: "))
             return LLMResponse(message=NO_ANSWER, cached=False)
     def _generate(self, prompt: str, max_tokens: int) -> LLMResponse:
         if self.config.use_chat_for_completion:
             return self.chat(messages=prompt, max_tokens=max_tokens)
-        openai.api_key = self.api_key
-        if self.api_base:
-            openai.api_base = self.api_base
         if settings.debug:
-            print(f"[red]PROMPT: {prompt}[/red]")
+            print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")
         @retry_with_exponential_backoff
         def completions_with_backoff(**kwargs):  # type: ignore
@@ -523,128 +865,148 @@ class OpenAIGPT(LanguageModel):
             if result is not None:
                 cached = True
                 if settings.debug:
-                    print("[red]CACHED[/red]")
+                    print("[grey37]CACHED[/grey37]")
             else:
+                if self.config.litellm:
+                    from litellm import completion as litellm_completion
+                completion_call = (
+                    litellm_completion
+                    if self.config.litellm
+                    else self.client.completions.create
+                )
+                if self.config.litellm and settings.debug:
+                    kwargs["logger_fn"] = litellm_logging_fn
                 # If it's not in the cache, call the API
-                result = openai.Completion.create(**kwargs)  # type: ignore
+                result = completion_call(**kwargs)
                 if self.get_stream():
-                    llm_response, openai_response = self._stream_response(result)
-                    self.cache.store(hashed_key, openai_response)
+                    llm_response, openai_response = self._stream_response(
+                        result,
+                        chat=self.config.litellm,
+                    )
+                    self._cache_store(hashed_key, openai_response)
                     return cached, hashed_key, openai_response
                 else:
-                    self.cache.store(hashed_key, result)
+                    self._cache_store(hashed_key, result.model_dump())
             return cached, hashed_key, result
-        key_name = "engine" if self.config.type == "azure" else "model"
-        cached, hashed_key, response = completions_with_backoff(
-            **{key_name: self.config.completion_model},
-            prompt=prompt,
+        kwargs: Dict[str, Any] = dict(model=self.config.completion_model)
+        if self.config.litellm:
+            # TODO this is a temp fix, we should really be using a proper completion fn
+            # that takes a pre-formatted prompt, rather than mocking it as a sys msg.
+            kwargs["messages"] = [dict(content=prompt, role=Role.SYSTEM)]
+        else:  # any other OpenAI-compatible endpoint
+            kwargs["prompt"] = prompt
+        args = dict(
+            **kwargs,
             max_tokens=max_tokens,  # for output/completion
-            request_timeout=self.config.timeout,
-            temperature=self.config.temperature,
-            echo=False,
             stream=self.get_stream(),
         )
-        msg = response["choices"][0]["text"].strip()
+        args = self._openai_api_call_params(args)
+        cached, hashed_key, response = completions_with_backoff(**args)
+        if not isinstance(response, dict):
+            response = response.dict()
+        if "message" in response["choices"][0]:
+            msg = response["choices"][0]["message"]["content"].strip()
+        else:
+            msg = response["choices"][0]["text"].strip()
         return LLMResponse(message=msg, cached=cached)
-    async def agenerate(self, prompt: str, max_tokens: int) -> LLMResponse:
+    async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
+        self.run_on_first_use()
         try:
             return await self._agenerate(prompt, max_tokens)
         except Exception as e:
             # capture exceptions not handled by retry, so we don't crash
-            err_msg = str(e)[:500]
-            logging.error(f"OpenAI API error: {err_msg}")
+            logging.error(friendly_error(e, "Error in OpenAIGPT.agenerate: "))
             return LLMResponse(message=NO_ANSWER, cached=False)
     async def _agenerate(self, prompt: str, max_tokens: int) -> LLMResponse:
-        openai.api_key = self.api_key
-        if self.api_base:
-            openai.api_base = self.api_base
         # note we typically will not have self.config.stream = True
         # when issuing several api calls concurrently/asynchronously.
         # The calling fn should use the context `with Streaming(..., False)` to
         # disable streaming.
         if self.config.use_chat_for_completion:
-            messages = [
-                LLMMessage(role=Role.SYSTEM, content="You are a helpful assistant."),
-                LLMMessage(role=Role.USER, content=prompt),
-            ]
+            return await self.achat(messages=prompt, max_tokens=max_tokens)
-            @async_retry_with_exponential_backoff
-            async def completions_with_backoff(
-                **kwargs: Dict[str, Any]
-            ) -> Tuple[bool, str, Any]:
-                cached = False
-                hashed_key, result = self._cache_lookup("AsyncChatCompletion", **kwargs)
-                if result is not None:
-                    cached = True
-                else:
-                    completion_call = (
-                        litellm_acompletion
-                        if self.config.litellm
-                        else openai.ChatCompletion.acreate
-                    )
+        if settings.debug:
+            print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")
-                    # If it's not in the cache, call the API
-                    result = await completion_call(**kwargs)
-                    self.cache.store(hashed_key, result)
-                return cached, hashed_key, result
-            cached, hashed_key, response = await completions_with_backoff(
-                model=self.config.chat_model,
-                messages=[m.api_dict() for m in messages],
-                max_tokens=max_tokens,
-                request_timeout=self.config.timeout,
-                temperature=self.config.temperature,
-                stream=False,
-            )
+        # WARNING: .Completion.* endpoints are deprecated,
+        # and as of Sep 2023 only legacy models will work here,
+        # e.g. text-davinci-003, text-ada-001.
+        @async_retry_with_exponential_backoff
+        async def completions_with_backoff(**kwargs):  # type: ignore
+            cached = False
+            hashed_key, result = self._cache_lookup("AsyncCompletion", **kwargs)
+            if result is not None:
+                cached = True
+                if settings.debug:
+                    print("[grey37]CACHED[/grey37]")
+            else:
+                if self.config.litellm:
+                    from litellm import acompletion as litellm_acompletion
+                # TODO this may not work: text_completion is not async,
+                # and we didn't find an async version in litellm
+                acompletion_call = (
+                    litellm_acompletion
+                    if self.config.litellm
+                    else self.async_client.completions.create
+                )
+                if self.config.litellm and settings.debug:
+                    kwargs["logger_fn"] = litellm_logging_fn
+                # If it's not in the cache, call the API
+                result = await acompletion_call(**kwargs)
+                self._cache_store(hashed_key, result.model_dump())
+            return cached, hashed_key, result
+        kwargs: Dict[str, Any] = dict(model=self.config.completion_model)
+        if self.config.litellm:
+            # TODO this is a temp fix, we should really be using a proper completion fn
+            # that takes a pre-formatted prompt, rather than mocking it as a sys msg.
+            kwargs["messages"] = [dict(content=prompt, role=Role.SYSTEM)]
+        else:  # any other OpenAI-compatible endpoint
+            kwargs["prompt"] = prompt
+        cached, hashed_key, response = await completions_with_backoff(
+            **kwargs,
+            max_tokens=max_tokens,
+            stream=False,
+        )
+        if not isinstance(response, dict):
+            response = response.dict()
+        if "message" in response["choices"][0]:
             msg = response["choices"][0]["message"]["content"].strip()
         else:
-            # WARNING: openai.Completion.* endpoints are deprecated,
-            # and as of Sep 2023 only legacy models will work here,
-            # e.g. text-davinci-003, text-ada-001.
-            @retry_with_exponential_backoff
-            async def completions_with_backoff(**kwargs):  # type: ignore
-                cached = False
-                hashed_key, result = self._cache_lookup("AsyncCompletion", **kwargs)
-                if result is not None:
-                    cached = True
-                else:
-                    # If it's not in the cache, call the API
-                    result = await openai.Completion.acreate(**kwargs)  # type: ignore
-                    self.cache.store(hashed_key, result)
-                return cached, hashed_key, result
-            cached, hashed_key, response = await completions_with_backoff(
-                model=self.config.completion_model,
-                prompt=prompt,
-                max_tokens=max_tokens,
-                request_timeout=self.config.timeout,
-                temperature=self.config.temperature,
-                echo=False,
-                stream=False,
-            )
             msg = response["choices"][0]["text"].strip()
         return LLMResponse(message=msg, cached=cached)
     def chat(
         self,
         messages: Union[str, List[LLMMessage]],
-        max_tokens: int,
+        max_tokens: int = 200,
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
-        if self.config.use_completion_for_chat and not self._is_openai_chat_model():
+        self.run_on_first_use()
+        if functions is not None and not self.is_openai_chat_model():
+            raise ValueError(
+                f"""
+                `functions` can only be specified for OpenAI chat models;
+                {self.config.chat_model} does not support function-calling.
+                Instead, please use Langroid's ToolMessages, which are equivalent.
+                In the ChatAgentConfig, set `use_functions_api=False`
+                and `use_tools=True`, this will enable ToolMessages.
+                """
+            )
+        if self.config.use_completion_for_chat and not self.is_openai_chat_model():
             # only makes sense for non-OpenAI models
-            if self.config.formatter is None:
+            if self.config.formatter is None or self.config.hf_formatter is None:
                 raise ValueError(
                     """
                     `formatter` must be specified in config to use completion for chat.
                     """
                 )
-            formatter = PromptFormatter.create(self.config.formatter)
             if isinstance(messages, str):
                 messages = [
                     LLMMessage(
@@ -652,33 +1014,51 @@ class OpenAIGPT(LanguageModel):
                     ),
                     LLMMessage(role=Role.USER, content=messages),
                 ]
-            prompt = formatter.format(messages)
+            prompt = self.config.hf_formatter.format(messages)
             return self.generate(prompt=prompt, max_tokens=max_tokens)
         try:
             return self._chat(messages, max_tokens, functions, function_call)
         except Exception as e:
             # capture exceptions not handled by retry, so we don't crash
-            err_msg = str(e)[:500]
-            logging.error(f"OpenAI API error: {err_msg}")
+            logging.error(friendly_error(e, "Error in OpenAIGPT.chat: "))
             return LLMResponse(message=NO_ANSWER, cached=False)
     async def achat(
         self,
         messages: Union[str, List[LLMMessage]],
-        max_tokens: int,
+        max_tokens: int = 200,
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
+        self.run_on_first_use()
+        if functions is not None and not self.is_openai_chat_model():
+            raise ValueError(
+                f"""
+                `functions` can only be specified for OpenAI chat models;
+                {self.config.chat_model} does not support function-calling.
+                Instead, please use Langroid's ToolMessages, which are equivalent.
+                In the ChatAgentConfig, set `use_functions_api=False`
+                and `use_tools=True`, this will enable ToolMessages.
+                """
+            )
         # turn off streaming for async calls
-        if self.config.use_completion_for_chat and not self._is_openai_chat_model():
-            # only makes sense for local models
+        if (
+            self.config.use_completion_for_chat
+            and not self.is_openai_chat_model()
+            and not self.is_openai_completion_model()
+        ):
+            # only makes sense for local models, where we are trying to
+            # convert a chat dialog msg-sequence to a simple completion prompt.
             if self.config.formatter is None:
                 raise ValueError(
                     """
                     `formatter` must be specified in config to use completion for chat.
                     """
                 )
-            formatter = PromptFormatter.create(self.config.formatter)
+            formatter = HFFormatter(
+                HFPromptFormatterConfig(model_name=self.config.formatter)
+            )
             if isinstance(messages, str):
                 messages = [
                     LLMMessage(
@@ -693,8 +1073,7 @@ class OpenAIGPT(LanguageModel):
             return result
         except Exception as e:
             # capture exceptions not handled by retry, so we don't crash
-            err_msg = str(e)[:500]
-            logging.error(f"OpenAI API error: {err_msg}")
+            logging.error(friendly_error(e, "Error in OpenAIGPT.achat: "))
             return LLMResponse(message=NO_ANSWER, cached=False)
     @retry_with_exponential_backoff
@@ -704,36 +1083,49 @@ class OpenAIGPT(LanguageModel):
         if result is not None:
             cached = True
             if settings.debug:
-                print("[red]CACHED[/red]")
+                print("[grey37]CACHED[/grey37]")
         else:
+            if self.config.litellm:
+                from litellm import completion as litellm_completion
             # If it's not in the cache, call the API
             completion_call = (
                 litellm_completion
                 if self.config.litellm
-                else openai.ChatCompletion.create
+                else self.client.chat.completions.create
             )
+            if self.config.litellm and settings.debug:
+                kwargs["logger_fn"] = litellm_logging_fn
             result = completion_call(**kwargs)
             if not self.get_stream():
                 # if streaming, cannot cache result
                 # since it is a generator. Instead,
                 # we hold on to the hashed_key and
                 # cache the result later
-                self.cache.store(hashed_key, result)
+                self._cache_store(hashed_key, result.model_dump())
         return cached, hashed_key, result
-    @retry_with_exponential_backoff
+    @async_retry_with_exponential_backoff
     async def _achat_completions_with_backoff(self, **kwargs):  # type: ignore
         cached = False
         hashed_key, result = self._cache_lookup("Completion", **kwargs)
         if result is not None:
             cached = True
             if settings.debug:
-                print("[red]CACHED[/red]")
+                print("[grey37]CACHED[/grey37]")
         else:
+            if self.config.litellm:
+                from litellm import acompletion as litellm_acompletion
+            acompletion_call = (
+                litellm_acompletion
+                if self.config.litellm
+                else self.async_client.chat.completions.create
+            )
+            if self.config.litellm and settings.debug:
+                kwargs["logger_fn"] = litellm_logging_fn
             # If it's not in the cache, call the API
-            result = await openai.ChatCompletion.acreate(**kwargs)  # type: ignore
+            result = await acompletion_call(**kwargs)
             if not self.get_stream():
-                self.cache.store(hashed_key, result)
+                self._cache_store(hashed_key, result.model_dump())
         return cached, hashed_key, result
     def _prep_chat_completion(
@@ -743,9 +1135,6 @@ class OpenAIGPT(LanguageModel):
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> Dict[str, Any]:
-        openai.api_key = self.api_key
-        if self.api_base:
-            openai.api_base = self.api_base
         if isinstance(messages, str):
             llm_messages = [
                 LLMMessage(role=Role.SYSTEM, content="You are a helpful assistant."),
@@ -757,22 +1146,17 @@ class OpenAIGPT(LanguageModel):
         # Azure uses different parameters. It uses ``engine`` instead of ``model``
         # and the value should be the deployment_name not ``self.config.chat_model``
         chat_model = self.config.chat_model
-        key_name = "model"
         if self.config.type == "azure":
-            key_name = "engine"
             if hasattr(self, "deployment_name"):
                 chat_model = self.deployment_name
         args: Dict[str, Any] = dict(
-            **{key_name: chat_model},
+            model=chat_model,
             messages=[m.api_dict() for m in llm_messages],
             max_tokens=max_tokens,
-            n=1,
-            stop=None,
-            temperature=self.config.temperature,
-            request_timeout=self.config.timeout,
             stream=self.get_stream(),
         )
+        args.update(self._openai_api_call_params(args))
         # only include functions-related args if functions are provided
         # since the OpenAI API will throw an error if `functions` is None or []
         if functions is not None:
@@ -823,14 +1207,8 @@ class OpenAIGPT(LanguageModel):
         if message.get("function_call") is None:
             fun_call = None
         else:
-            fun_call = LLMFunctionCall(name=message["function_call"]["name"])
             try:
-                fun_args_str = message["function_call"]["arguments"]
-                # sometimes may be malformed with invalid indents,
-                # so we try to be safe by removing newlines.
-                fun_args_str = fun_args_str.replace("\n", "").strip()
-                fun_args = ast.literal_eval(fun_args_str)
-                fun_call.arguments = fun_args
+                fun_call = LLMFunctionCall.from_dict(message["function_call"])
             except (ValueError, SyntaxError):
                 logging.warning(
                     "Could not parse function arguments: "
@@ -884,10 +1262,13 @@ class OpenAIGPT(LanguageModel):
         cached, hashed_key, response = self._chat_completions_with_backoff(**args)
         if self.get_stream() and not cached:
             llm_response, openai_response = self._stream_response(response, chat=True)
-            self.cache.store(hashed_key, openai_response)
-            return llm_response
-        return self._process_chat_completion_response(cached, response)
+            self._cache_store(hashed_key, openai_response)
+            return llm_response  # type: ignore
+        if isinstance(response, dict):
+            response_dict = response
+        else:
+            response_dict = response.model_dump()
+        return self._process_chat_completion_response(cached, response_dict)
     async def _achat(
         self,
@@ -899,7 +1280,6 @@ class OpenAIGPT(LanguageModel):
         """
         Async version of _chat(). See that function for details.
         """
         args = self._prep_chat_completion(
             messages,
             max_tokens,
@@ -913,6 +1293,10 @@ class OpenAIGPT(LanguageModel):
             llm_response, openai_response = await self._stream_response_async(
                 response, chat=True
             )
-            self.cache.store(hashed_key, openai_response)
-            return llm_response
-        return self._process_chat_completion_response(cached, response)
+            self._cache_store(hashed_key, openai_response)
+            return llm_response  # type: ignore
+        if isinstance(response, dict):
+            response_dict = response
+        else:
+            response_dict = response.model_dump()
+        return self._process_chat_completion_response(cached, response_dict)

langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl