PyPI - langroid - Versions diffs - 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

langroid/__init__.py +70 -0
langroid/agent/__init__.py +22 -0
langroid/agent/base.py +120 -33
langroid/agent/batch.py +134 -35
langroid/agent/callbacks/__init__.py +0 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +164 -100
langroid/agent/chat_document.py +19 -2
langroid/agent/openai_assistant.py +20 -10
langroid/agent/special/__init__.py +33 -10
langroid/agent/special/doc_chat_agent.py +521 -108
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +23 -7
langroid/agent/special/retriever_agent.py +29 -174
langroid/agent/special/sql/__init__.py +7 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +11 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +423 -114
langroid/agent/tool_message.py +67 -10
langroid/agent/tools/__init__.py +8 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +6 -24
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/cachedb/__init__.py +6 -0
langroid/embedding_models/__init__.py +24 -0
langroid/embedding_models/base.py +9 -1
langroid/embedding_models/models.py +117 -17
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +22 -0
langroid/language_models/azure_openai.py +47 -4
langroid/language_models/base.py +26 -10
langroid/language_models/config.py +5 -0
langroid/language_models/openai_gpt.py +407 -121
langroid/language_models/prompt_formatter/__init__.py +9 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +10 -9
langroid/mytypes.py +10 -4
langroid/parsing/__init__.py +33 -1
langroid/parsing/document_parser.py +259 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +20 -7
langroid/parsing/repo_loader.py +108 -46
langroid/parsing/search.py +8 -0
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -13
langroid/parsing/urls.py +18 -9
langroid/parsing/utils.py +130 -9
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +7 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +10 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/configuration.py +0 -1
langroid/utils/constants.py +4 -0
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +15 -2
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +446 -4
langroid/utils/system.py +36 -1
langroid/vector_store/__init__.py +34 -2
langroid/vector_store/base.py +33 -2
langroid/vector_store/chromadb.py +42 -13
langroid/vector_store/lancedb.py +226 -60
langroid/vector_store/meilisearch.py +7 -6
langroid/vector_store/momento.py +3 -2
langroid/vector_store/qdrantdb.py +82 -11
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
langroid-0.1.219.dist-info/RECORD +127 -0
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.139.dist-info/RECORD +0 -103
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -1,14 +1,31 @@
 import ast
 import hashlib
+import json
 import logging
+import os
 import sys
+import warnings
 from enum import Enum
-from typing import Any, Dict, List, Optional, Tuple, Type, Union, no_type_check
+from functools import cache
+from itertools import chain
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    no_type_check,
+)
+import openai
 from httpx import Timeout
 from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel
 from rich import print
+from rich.markup import escape
 from langroid.cachedb.momento_cachedb import MomentoCache, MomentoCacheConfig
 from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
@@ -22,8 +39,10 @@ from langroid.language_models.base import (
     LLMTokenUsage,
     Role,
 )
-from langroid.language_models.prompt_formatter.base import (
-    PromptFormatter,
+from langroid.language_models.config import HFPromptFormatterConfig
+from langroid.language_models.prompt_formatter.hf_formatter import (
+    HFFormatter,
+    find_hf_formatter,
 )
 from langroid.language_models.utils import (
     async_retry_with_exponential_backoff,
@@ -35,14 +54,22 @@ from langroid.utils.system import friendly_error
 logging.getLogger("openai").setLevel(logging.ERROR)
+if "OLLAMA_HOST" in os.environ:
+    OLLAMA_BASE_URL = f"http://{os.environ['OLLAMA_HOST']}/v1"
+else:
+    OLLAMA_BASE_URL = "http://localhost:11434/v1"
+OLLAMA_API_KEY = "ollama"
+DUMMY_API_KEY = "xxx"
 class OpenAIChatModel(str, Enum):
     """Enum for OpenAI Chat models"""
     GPT3_5_TURBO = "gpt-3.5-turbo-1106"
-    GPT4_NOFUNC = "gpt-4"  # before function_call API
     GPT4 = "gpt-4"
-    GPT4_TURBO = "gpt-4-1106-preview"
+    GPT4_32K = "gpt-4-32k"
+    GPT4_TURBO = "gpt-4-turbo-preview"
 class OpenAICompletionModel(str, Enum):
@@ -54,9 +81,9 @@ class OpenAICompletionModel(str, Enum):
 _context_length: Dict[str, int] = {
     # can add other non-openAI models here
-    OpenAIChatModel.GPT3_5_TURBO: 4096,
+    OpenAIChatModel.GPT3_5_TURBO: 16_385,
     OpenAIChatModel.GPT4: 8192,
-    OpenAIChatModel.GPT4_NOFUNC: 8192,
+    OpenAIChatModel.GPT4_32K: 32_768,
     OpenAIChatModel.GPT4_TURBO: 128_000,
     OpenAICompletionModel.TEXT_DA_VINCI_003: 4096,
 }
@@ -64,13 +91,116 @@ _context_length: Dict[str, int] = {
 _cost_per_1k_tokens: Dict[str, Tuple[float, float]] = {
     # can add other non-openAI models here.
     # model => (prompt cost, generation cost) in USD
-    OpenAIChatModel.GPT3_5_TURBO: (0.0015, 0.002),
+    OpenAIChatModel.GPT3_5_TURBO: (0.001, 0.002),
     OpenAIChatModel.GPT4: (0.03, 0.06),  # 8K context
     OpenAIChatModel.GPT4_TURBO: (0.01, 0.03),  # 128K context
-    OpenAIChatModel.GPT4_NOFUNC: (0.03, 0.06),
 }
+openAIChatModelPreferenceList = [
+    OpenAIChatModel.GPT4_TURBO,
+    OpenAIChatModel.GPT4,
+    OpenAIChatModel.GPT3_5_TURBO,
+]
+openAICompletionModelPreferenceList = [
+    OpenAICompletionModel.GPT3_5_TURBO_INSTRUCT,
+    OpenAICompletionModel.TEXT_DA_VINCI_003,
+]
+if "OPENAI_API_KEY" in os.environ:
+    try:
+        available_models = set(map(lambda m: m.id, OpenAI().models.list()))
+    except openai.AuthenticationError as e:
+        if settings.debug:
+            logging.warning(
+                f"""
+            OpenAI Authentication Error: {e}.
+            ---
+            If you intended to use an OpenAI Model, you should fix this,
+            otherwise you can ignore this warning.
+            """
+            )
+        available_models = set()
+    except Exception as e:
+        if settings.debug:
+            logging.warning(
+                f"""
+            Error while fetching available OpenAI models: {e}.
+            Proceeding with an empty set of available models.
+            """
+            )
+        available_models = set()
+else:
+    available_models = set()
+defaultOpenAIChatModel = next(
+    chain(
+        filter(
+            lambda m: m.value in available_models,
+            openAIChatModelPreferenceList,
+        ),
+        [OpenAIChatModel.GPT4_TURBO],
+    )
+)
+defaultOpenAICompletionModel = next(
+    chain(
+        filter(
+            lambda m: m.value in available_models,
+            openAICompletionModelPreferenceList,
+        ),
+        [OpenAICompletionModel.GPT3_5_TURBO_INSTRUCT],
+    )
+)
+class AccessWarning(Warning):
+    pass
+@cache
+def gpt_3_5_warning() -> None:
+    warnings.warn(
+        """
+        GPT-4 is not available, falling back to GPT-3.5.
+        Examples may not work properly and unexpected behavior may occur.
+        Adjustments to prompts may be necessary.
+        """,
+        AccessWarning,
+    )
+def noop() -> None:
+    """Does nothing."""
+    return None
+class OpenAICallParams(BaseModel):
+    """
+    Various params that can be sent to an OpenAI API chat-completion call.
+    When specified, any param here overrides the one with same name in the
+    OpenAIGPTConfig.
+    """
+    max_tokens: int = 1024
+    temperature: float = 0.2
+    frequency_penalty: float | None = 0.0  # between -2 and 2
+    presence_penalty: float | None = 0.0  # between -2 and 2
+    response_format: Dict[str, str] | None = None
+    logit_bias: Dict[int, float] | None = None  # token_id -> bias
+    logprobs: bool = False
+    top_p: int | None = 1
+    top_logprobs: int | None = None  # if int, requires logprobs=True
+    n: int = 1  # how many completions to generate (n > 1 is NOT handled now)
+    stop: str | List[str] | None = None  # (list of) stop sequence(s)
+    seed: int | None = 42
+    user: str | None = None  # user id for tracking
+    def to_dict_exclude_none(self) -> Dict[str, Any]:
+        return {k: v for k, v in self.dict().items() if v is not None}
 class OpenAIGPTConfig(LLMConfig):
     """
     Class for any LLM with an OpenAI-like API: besides the OpenAI models this includes:
@@ -81,19 +211,51 @@ class OpenAIGPTConfig(LLMConfig):
     """
     type: str = "openai"
-    api_key: str = ""  # CAUTION: set this ONLY via env var OPENAI_API_KEY
+    api_key: str = DUMMY_API_KEY  # CAUTION: set this ONLY via env var OPENAI_API_KEY
     organization: str = ""
     api_base: str | None = None  # used for local or other non-OpenAI models
     litellm: bool = False  # use litellm api?
+    ollama: bool = False  # use ollama's OpenAI-compatible endpoint?
     max_output_tokens: int = 1024
-    min_output_tokens: int = 64
+    min_output_tokens: int = 1
     use_chat_for_completion = True  # do not change this, for OpenAI models!
     timeout: int = 20
     temperature: float = 0.2
     seed: int | None = 42
+    params: OpenAICallParams | None = None
     # these can be any model name that is served at an OpenAI-compatible API end point
-    chat_model: str = OpenAIChatModel.GPT4
-    completion_model: str = OpenAICompletionModel.GPT3_5_TURBO_INSTRUCT
+    chat_model: str = defaultOpenAIChatModel
+    completion_model: str = defaultOpenAICompletionModel
+    run_on_first_use: Callable[[], None] = noop
+    # a string that roughly matches a HuggingFace chat_template,
+    # e.g. "mistral-instruct-v0.2 (a fuzzy search is done to find the closest match)
+    formatter: str | None = None
+    hf_formatter: HFFormatter | None = None
+    def __init__(self, **kwargs) -> None:  # type: ignore
+        local_model = "api_base" in kwargs and kwargs["api_base"] is not None
+        chat_model = kwargs.get("chat_model", "")
+        local_prefixes = ["local/", "litellm/", "ollama/"]
+        if any(chat_model.startswith(prefix) for prefix in local_prefixes):
+            local_model = True
+        warn_gpt_3_5 = (
+            "chat_model" not in kwargs.keys()
+            and not local_model
+            and defaultOpenAIChatModel == OpenAIChatModel.GPT3_5_TURBO
+        )
+        if warn_gpt_3_5:
+            existing_hook = kwargs.get("run_on_first_use", noop)
+            def with_warning() -> None:
+                existing_hook()
+                gpt_3_5_warning()
+            kwargs["run_on_first_use"] = with_warning
+        super().__init__(**kwargs)
     # all of the vars above can be set via env vars,
     # by upper-casing the name and prefixing with OPENAI_, e.g.
@@ -122,6 +284,7 @@ class OpenAIGPTConfig(LLMConfig):
                 """
             )
         litellm.telemetry = False
+        litellm.drop_params = True  # drop un-supported params without crashing
         self.seed = None  # some local mdls don't support seed
         keys_dict = litellm.validate_environment(self.chat_model)
         missing_keys = keys_dict.get("missing_keys", [])
@@ -163,37 +326,85 @@ class OpenAIResponse(BaseModel):
     usage: Dict  # type: ignore
-# Define a class for OpenAI GPT-3 that extends the base class
+def litellm_logging_fn(model_call_dict: Dict[str, Any]) -> None:
+    """Logging function for litellm"""
+    try:
+        api_input_dict = model_call_dict.get("additional_args", {}).get(
+            "complete_input_dict"
+        )
+        if api_input_dict is not None:
+            text = escape(json.dumps(api_input_dict, indent=2))
+            print(
+                f"[grey37]LITELLM: {text}[/grey37]",
+            )
+    except Exception:
+        pass
+# Define a class for OpenAI GPT models that extends the base class
 class OpenAIGPT(LanguageModel):
     """
     Class for OpenAI LLMs
     """
-    def __init__(self, config: OpenAIGPTConfig):
+    def __init__(self, config: OpenAIGPTConfig = OpenAIGPTConfig()):
         """
         Args:
             config: configuration for openai-gpt model
         """
+        # copy the config to avoid modifying the original
+        config = config.copy()
         super().__init__(config)
         self.config: OpenAIGPTConfig = config
-        if settings.nofunc:
-            self.config.chat_model = OpenAIChatModel.GPT4_NOFUNC
+        # Run the first time the model is used
+        self.run_on_first_use = cache(self.config.run_on_first_use)
         # global override of chat_model,
         # to allow quick testing with other models
         if settings.chat_model != "":
             self.config.chat_model = settings.chat_model
+            self.config.completion_model = settings.chat_model
+        if len(parts := self.config.chat_model.split("//")) > 1:
+            # there is a formatter specified, e.g.
+            # "litellm/ollama/mistral//hf" or
+            # "local/localhost:8000/v1//mistral-instruct-v0.2"
+            formatter = parts[1]
+            self.config.chat_model = parts[0]
+            if formatter == "hf":
+                # e.g. "litellm/ollama/mistral//hf" -> "litellm/ollama/mistral"
+                formatter = find_hf_formatter(self.config.chat_model)
+                if formatter != "":
+                    # e.g. "mistral"
+                    self.config.formatter = formatter
+                    logging.warning(
+                        f"""
+                        Using completions (not chat) endpoint with HuggingFace
+                        chat_template for {formatter} for
+                        model {self.config.chat_model}
+                        """
+                    )
+            else:
+                # e.g. "local/localhost:8000/v1//mistral-instruct-v0.2"
+                self.config.formatter = formatter
+        if self.config.formatter is not None:
+            self.config.hf_formatter = HFFormatter(
+                HFPromptFormatterConfig(model_name=self.config.formatter)
+            )
         # if model name starts with "litellm",
         # set the actual model name by stripping the "litellm/" prefix
         # and set the litellm flag to True
         if self.config.chat_model.startswith("litellm/") or self.config.litellm:
+            # e.g. litellm/ollama/mistral
             self.config.litellm = True
             self.api_base = self.config.api_base
             if self.config.chat_model.startswith("litellm/"):
                 # strip the "litellm/" prefix
+                # e.g. litellm/ollama/llama2 => ollama/llama2
                 self.config.chat_model = self.config.chat_model.split("/", 1)[1]
-            # litellm/ollama/llama2 => ollama/llama2 for example
         elif self.config.chat_model.startswith("local/"):
             # expect this to be of the form "local/localhost:8000/v1",
             # depending on how the model is launched locally.
@@ -203,15 +414,40 @@ class OpenAIGPT(LanguageModel):
             self.config.litellm = False
             self.config.seed = None  # some models raise an error when seed is set
             # Extract the api_base from the model name after the "local/" prefix
-            self.api_base = "http://" + self.config.chat_model.split("/", 1)[1]
+            self.api_base = self.config.chat_model.split("/", 1)[1]
+            if not self.api_base.startswith("http"):
+                self.api_base = "http://" + self.api_base
+        elif self.config.chat_model.startswith("ollama/"):
+            self.config.ollama = True
+            self.api_base = OLLAMA_BASE_URL
+            self.api_key = OLLAMA_API_KEY
+            self.config.chat_model = self.config.chat_model.replace("ollama/", "")
         else:
             self.api_base = self.config.api_base
+        if settings.chat_model != "":
+            # if we're overriding chat model globally, set completion model to same
+            self.config.completion_model = self.config.chat_model
+        if self.config.formatter is not None:
+            # we want to format chats -> completions using this specific formatter
+            self.config.use_completion_for_chat = True
+            self.config.completion_model = self.config.chat_model
+        if self.config.use_completion_for_chat:
+            self.config.use_chat_for_completion = False
         # NOTE: The api_key should be set in the .env file, or via
         # an explicit `export OPENAI_API_KEY=xxx` or `setenv OPENAI_API_KEY xxx`
         # Pydantic's BaseSettings will automatically pick it up from the
         # .env file
-        self.api_key = config.api_key or "xxx"
+        # The config.api_key is ignored when not using an OpenAI model
+        if self.is_openai_completion_model() or self.is_openai_chat_model():
+            self.api_key = config.api_key
+            if self.api_key == DUMMY_API_KEY:
+                self.api_key = os.getenv("OPENAI_API_KEY", DUMMY_API_KEY)
+        else:
+            self.api_key = DUMMY_API_KEY
         self.client = OpenAI(
             api_key=self.api_key,
             base_url=self.api_base,
@@ -241,8 +477,10 @@ class OpenAIGPT(LanguageModel):
                 config.cache_config = RedisCacheConfig(
                     fake="fake" in settings.cache_type
                 )
+            if "fake" in settings.cache_type:
+                # force use of fake redis if global cache_type is "fakeredis"
+                config.cache_config.fake = True
             self.cache = RedisCache(config.cache_config)
-            config.cache_config.fake = "fake" in settings.cache_type
         else:
             raise ValueError(
                 f"Invalid cache type {settings.cache_type}. "
@@ -251,11 +489,31 @@ class OpenAIGPT(LanguageModel):
         self.config._validate_litellm()
+    def _openai_api_call_params(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Prep the params to be sent to the OpenAI API
+        (or any OpenAI-compatible API, e.g. from Ooba or LmStudio)
+        for chat-completion.
+        Order of priority:
+        - (1) Params (mainly max_tokens) in the chat/achat/generate/agenerate call
+                (these are passed in via kwargs)
+        - (2) Params in OpenAIGPTConfi.params (of class OpenAICallParams)
+        - (3) Specific Params in OpenAIGPTConfig (just temperature for now)
+        """
+        params = dict(
+            temperature=self.config.temperature,
+        )
+        if self.config.params is not None:
+            params.update(self.config.params.to_dict_exclude_none())
+        params.update(kwargs)
+        return params
     def is_openai_chat_model(self) -> bool:
         openai_chat_models = [e.value for e in OpenAIChatModel]
         return self.config.chat_model in openai_chat_models
-    def _is_openai_completion_model(self) -> bool:
+    def is_openai_completion_model(self) -> bool:
         openai_completion_models = [e.value for e in OpenAICompletionModel]
         return self.config.completion_model in openai_completion_models
@@ -351,17 +609,21 @@ class OpenAIGPT(LanguageModel):
             if not is_async:
                 sys.stdout.write(Colors().GREEN + event_text)
                 sys.stdout.flush()
+                self.config.streamer(event_text)
         if event_fn_name:
             function_name = event_fn_name
             has_function = True
             if not is_async:
                 sys.stdout.write(Colors().GREEN + "FUNC: " + event_fn_name + ": ")
                 sys.stdout.flush()
+                self.config.streamer(event_fn_name)
         if event_args:
             function_args += event_args
             if not is_async:
                 sys.stdout.write(Colors().GREEN + event_args)
                 sys.stdout.flush()
+                self.config.streamer(event_args)
         if choices[0].get("finish_reason", "") in ["stop", "function_call"]:
             # for function_call, finish_reason does not necessarily
             # contain "function_call" as mentioned in the docs.
@@ -369,6 +631,7 @@ class OpenAIGPT(LanguageModel):
             return True, has_function, function_name, function_args, completion
         return False, has_function, function_name, function_args, completion
+    @retry_with_exponential_backoff
     def _stream_response(  # type: ignore
         self, response, chat: bool = False
     ) -> Tuple[LLMResponse, Dict[str, Any]]:
@@ -420,6 +683,7 @@ class OpenAIGPT(LanguageModel):
             is_async=False,
         )
+    @async_retry_with_exponential_backoff
     async def _stream_response_async(  # type: ignore
         self, response, chat: bool = False
     ) -> Tuple[LLMResponse, Dict[str, Any]]:
@@ -524,7 +788,11 @@ class OpenAIGPT(LanguageModel):
         )
     def _cache_store(self, k: str, v: Any) -> None:
-        self.cache.store(k, v)
+        try:
+            self.cache.store(k, v)
+        except Exception as e:
+            logging.error(f"Error in OpenAIGPT._cache_store: {e}")
+            pass
     def _cache_lookup(self, fn_name: str, **kwargs: Dict[str, Any]) -> Tuple[str, Any]:
         # Use the kwargs as the cache key
@@ -538,7 +806,12 @@ class OpenAIGPT(LanguageModel):
             # when caching disabled, return the hashed_key and none result
             return hashed_key, None
         # Try to get the result from the cache
-        return hashed_key, self.cache.retrieve(hashed_key)
+        try:
+            cached_val = self.cache.retrieve(hashed_key)
+        except Exception as e:
+            logging.error(f"Error in OpenAIGPT._cache_lookup: {e}")
+            return hashed_key, None
+        return hashed_key, cached_val
     def _cost_chat_model(self, prompt: int, completion: int) -> float:
         price = self.chat_cost()
@@ -569,6 +842,8 @@ class OpenAIGPT(LanguageModel):
         )
     def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
+        self.run_on_first_use()
         try:
             return self._generate(prompt, max_tokens)
         except Exception as e:
@@ -581,7 +856,7 @@ class OpenAIGPT(LanguageModel):
             return self.chat(messages=prompt, max_tokens=max_tokens)
         if settings.debug:
-            print(f"[red]PROMPT: {prompt}[/red]")
+            print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")
         @retry_with_exponential_backoff
         def completions_with_backoff(**kwargs):  # type: ignore
@@ -590,32 +865,55 @@ class OpenAIGPT(LanguageModel):
             if result is not None:
                 cached = True
                 if settings.debug:
-                    print("[red]CACHED[/red]")
+                    print("[grey37]CACHED[/grey37]")
             else:
+                if self.config.litellm:
+                    from litellm import completion as litellm_completion
+                completion_call = (
+                    litellm_completion
+                    if self.config.litellm
+                    else self.client.completions.create
+                )
+                if self.config.litellm and settings.debug:
+                    kwargs["logger_fn"] = litellm_logging_fn
                 # If it's not in the cache, call the API
-                result = self.client.completions.create(**kwargs)
+                result = completion_call(**kwargs)
                 if self.get_stream():
-                    llm_response, openai_response = self._stream_response(result)
+                    llm_response, openai_response = self._stream_response(
+                        result,
+                        chat=self.config.litellm,
+                    )
                     self._cache_store(hashed_key, openai_response)
                     return cached, hashed_key, openai_response
                 else:
                     self._cache_store(hashed_key, result.model_dump())
             return cached, hashed_key, result
-        key_name = "model"
-        cached, hashed_key, response = completions_with_backoff(
-            **{key_name: self.config.completion_model},
-            prompt=prompt,
+        kwargs: Dict[str, Any] = dict(model=self.config.completion_model)
+        if self.config.litellm:
+            # TODO this is a temp fix, we should really be using a proper completion fn
+            # that takes a pre-formatted prompt, rather than mocking it as a sys msg.
+            kwargs["messages"] = [dict(content=prompt, role=Role.SYSTEM)]
+        else:  # any other OpenAI-compatible endpoint
+            kwargs["prompt"] = prompt
+        args = dict(
+            **kwargs,
             max_tokens=max_tokens,  # for output/completion
-            temperature=self.config.temperature,
-            echo=False,
             stream=self.get_stream(),
         )
-        msg = response["choices"][0]["text"].strip()
+        args = self._openai_api_call_params(args)
+        cached, hashed_key, response = completions_with_backoff(**args)
+        if not isinstance(response, dict):
+            response = response.dict()
+        if "message" in response["choices"][0]:
+            msg = response["choices"][0]["message"]["content"].strip()
+        else:
+            msg = response["choices"][0]["text"].strip()
         return LLMResponse(message=msg, cached=cached)
     async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
+        self.run_on_first_use()
         try:
             return await self._agenerate(prompt, max_tokens)
         except Exception as e:
@@ -629,76 +927,56 @@ class OpenAIGPT(LanguageModel):
         # The calling fn should use the context `with Streaming(..., False)` to
         # disable streaming.
         if self.config.use_chat_for_completion:
-            messages = [
-                LLMMessage(role=Role.SYSTEM, content="You are a helpful assistant."),
-                LLMMessage(role=Role.USER, content=prompt),
-            ]
+            return await self.achat(messages=prompt, max_tokens=max_tokens)
-            @async_retry_with_exponential_backoff
-            async def completions_with_backoff(
-                **kwargs: Dict[str, Any]
-            ) -> Tuple[bool, str, Any]:
-                cached = False
-                hashed_key, result = self._cache_lookup("AsyncChatCompletion", **kwargs)
-                if result is not None:
-                    cached = True
-                else:
-                    if self.config.litellm:
-                        from litellm import acompletion as litellm_acompletion
-                    acompletion_call = (
-                        litellm_acompletion
-                        if self.config.litellm
-                        else self.async_client.chat.completions.create
-                    )
+        if settings.debug:
+            print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")
-                    # If it's not in the cache, call the API
-                    result = await acompletion_call(**kwargs)
-                    self._cache_store(hashed_key, result.model_dump())
-                return cached, hashed_key, result
-            cached, hashed_key, response = await completions_with_backoff(
-                model=self.config.chat_model,
-                messages=[m.api_dict() for m in messages],
-                max_tokens=max_tokens,
-                temperature=self.config.temperature,
-                stream=False,
-            )
-            if isinstance(response, dict):
-                response_dict = response
+        # WARNING: .Completion.* endpoints are deprecated,
+        # and as of Sep 2023 only legacy models will work here,
+        # e.g. text-davinci-003, text-ada-001.
+        @async_retry_with_exponential_backoff
+        async def completions_with_backoff(**kwargs):  # type: ignore
+            cached = False
+            hashed_key, result = self._cache_lookup("AsyncCompletion", **kwargs)
+            if result is not None:
+                cached = True
+                if settings.debug:
+                    print("[grey37]CACHED[/grey37]")
             else:
-                response_dict = response.model_dump()
-            msg = response_dict["choices"][0]["message"]["content"].strip()
+                if self.config.litellm:
+                    from litellm import acompletion as litellm_acompletion
+                # TODO this may not work: text_completion is not async,
+                # and we didn't find an async version in litellm
+                acompletion_call = (
+                    litellm_acompletion
+                    if self.config.litellm
+                    else self.async_client.completions.create
+                )
+                if self.config.litellm and settings.debug:
+                    kwargs["logger_fn"] = litellm_logging_fn
+                # If it's not in the cache, call the API
+                result = await acompletion_call(**kwargs)
+                self._cache_store(hashed_key, result.model_dump())
+            return cached, hashed_key, result
+        kwargs: Dict[str, Any] = dict(model=self.config.completion_model)
+        if self.config.litellm:
+            # TODO this is a temp fix, we should really be using a proper completion fn
+            # that takes a pre-formatted prompt, rather than mocking it as a sys msg.
+            kwargs["messages"] = [dict(content=prompt, role=Role.SYSTEM)]
+        else:  # any other OpenAI-compatible endpoint
+            kwargs["prompt"] = prompt
+        cached, hashed_key, response = await completions_with_backoff(
+            **kwargs,
+            max_tokens=max_tokens,
+            stream=False,
+        )
+        if not isinstance(response, dict):
+            response = response.dict()
+        if "message" in response["choices"][0]:
+            msg = response["choices"][0]["message"]["content"].strip()
         else:
-            # WARNING: .Completion.* endpoints are deprecated,
-            # and as of Sep 2023 only legacy models will work here,
-            # e.g. text-davinci-003, text-ada-001.
-            @retry_with_exponential_backoff
-            async def completions_with_backoff(**kwargs):  # type: ignore
-                cached = False
-                hashed_key, result = self._cache_lookup("AsyncCompletion", **kwargs)
-                if result is not None:
-                    cached = True
-                else:
-                    if self.config.litellm:
-                        from litellm import acompletion as litellm_acompletion
-                    acompletion_call = (
-                        litellm_acompletion
-                        if self.config.litellm
-                        else self.async_client.completions.create
-                    )
-                    # If it's not in the cache, call the API
-                    result = await acompletion_call(**kwargs)
-                    self._cache_store(hashed_key, result.model_dump())
-                return cached, hashed_key, result
-            cached, hashed_key, response = await completions_with_backoff(
-                model=self.config.completion_model,
-                prompt=prompt,
-                max_tokens=max_tokens,
-                temperature=self.config.temperature,
-                echo=False,
-                stream=False,
-            )
             msg = response["choices"][0]["text"].strip()
         return LLMResponse(message=msg, cached=cached)
@@ -709,6 +987,8 @@ class OpenAIGPT(LanguageModel):
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
+        self.run_on_first_use()
         if functions is not None and not self.is_openai_chat_model():
             raise ValueError(
                 f"""
@@ -721,13 +1001,12 @@ class OpenAIGPT(LanguageModel):
             )
         if self.config.use_completion_for_chat and not self.is_openai_chat_model():
             # only makes sense for non-OpenAI models
-            if self.config.formatter is None:
+            if self.config.formatter is None or self.config.hf_formatter is None:
                 raise ValueError(
                     """
                     `formatter` must be specified in config to use completion for chat.
                     """
                 )
-            formatter = PromptFormatter.create(self.config.formatter)
             if isinstance(messages, str):
                 messages = [
                     LLMMessage(
@@ -735,7 +1014,7 @@ class OpenAIGPT(LanguageModel):
                     ),
                     LLMMessage(role=Role.USER, content=messages),
                 ]
-            prompt = formatter.format(messages)
+            prompt = self.config.hf_formatter.format(messages)
             return self.generate(prompt=prompt, max_tokens=max_tokens)
         try:
             return self._chat(messages, max_tokens, functions, function_call)
@@ -751,6 +1030,8 @@ class OpenAIGPT(LanguageModel):
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
+        self.run_on_first_use()
         if functions is not None and not self.is_openai_chat_model():
             raise ValueError(
                 f"""
@@ -762,15 +1043,22 @@ class OpenAIGPT(LanguageModel):
                 """
             )
         # turn off streaming for async calls
-        if self.config.use_completion_for_chat and not self.is_openai_chat_model():
-            # only makes sense for local models
+        if (
+            self.config.use_completion_for_chat
+            and not self.is_openai_chat_model()
+            and not self.is_openai_completion_model()
+        ):
+            # only makes sense for local models, where we are trying to
+            # convert a chat dialog msg-sequence to a simple completion prompt.
             if self.config.formatter is None:
                 raise ValueError(
                     """
                     `formatter` must be specified in config to use completion for chat.
                     """
                 )
-            formatter = PromptFormatter.create(self.config.formatter)
+            formatter = HFFormatter(
+                HFPromptFormatterConfig(model_name=self.config.formatter)
+            )
             if isinstance(messages, str):
                 messages = [
                     LLMMessage(
@@ -795,7 +1083,7 @@ class OpenAIGPT(LanguageModel):
         if result is not None:
             cached = True
             if settings.debug:
-                print("[red]CACHED[/red]")
+                print("[grey37]CACHED[/grey37]")
         else:
             if self.config.litellm:
                 from litellm import completion as litellm_completion
@@ -805,6 +1093,8 @@ class OpenAIGPT(LanguageModel):
                 if self.config.litellm
                 else self.client.chat.completions.create
             )
+            if self.config.litellm and settings.debug:
+                kwargs["logger_fn"] = litellm_logging_fn
             result = completion_call(**kwargs)
             if not self.get_stream():
                 # if streaming, cannot cache result
@@ -814,14 +1104,14 @@ class OpenAIGPT(LanguageModel):
                 self._cache_store(hashed_key, result.model_dump())
         return cached, hashed_key, result
-    @retry_with_exponential_backoff
+    @async_retry_with_exponential_backoff
     async def _achat_completions_with_backoff(self, **kwargs):  # type: ignore
         cached = False
         hashed_key, result = self._cache_lookup("Completion", **kwargs)
         if result is not None:
             cached = True
             if settings.debug:
-                print("[red]CACHED[/red]")
+                print("[grey37]CACHED[/grey37]")
         else:
             if self.config.litellm:
                 from litellm import acompletion as litellm_acompletion
@@ -830,6 +1120,8 @@ class OpenAIGPT(LanguageModel):
                 if self.config.litellm
                 else self.async_client.chat.completions.create
             )
+            if self.config.litellm and settings.debug:
+                kwargs["logger_fn"] = litellm_logging_fn
             # If it's not in the cache, call the API
             result = await acompletion_call(**kwargs)
             if not self.get_stream():
@@ -854,22 +1146,17 @@ class OpenAIGPT(LanguageModel):
         # Azure uses different parameters. It uses ``engine`` instead of ``model``
         # and the value should be the deployment_name not ``self.config.chat_model``
         chat_model = self.config.chat_model
-        key_name = "model"
         if self.config.type == "azure":
             if hasattr(self, "deployment_name"):
                 chat_model = self.deployment_name
         args: Dict[str, Any] = dict(
-            **{key_name: chat_model},
+            model=chat_model,
             messages=[m.api_dict() for m in llm_messages],
             max_tokens=max_tokens,
-            n=1,
-            stop=None,
-            temperature=self.config.temperature,
             stream=self.get_stream(),
         )
-        if self.config.seed is not None:
-            args.update(dict(seed=self.config.seed))
+        args.update(self._openai_api_call_params(args))
         # only include functions-related args if functions are provided
         # since the OpenAI API will throw an error if `functions` is None or []
         if functions is not None:
@@ -976,7 +1263,7 @@ class OpenAIGPT(LanguageModel):
         if self.get_stream() and not cached:
             llm_response, openai_response = self._stream_response(response, chat=True)
             self._cache_store(hashed_key, openai_response)
-            return llm_response
+            return llm_response  # type: ignore
         if isinstance(response, dict):
             response_dict = response
         else:
@@ -993,7 +1280,6 @@ class OpenAIGPT(LanguageModel):
         """
         Async version of _chat(). See that function for details.
         """
         args = self._prep_chat_completion(
             messages,
             max_tokens,
@@ -1008,7 +1294,7 @@ class OpenAIGPT(LanguageModel):
                 response, chat=True
             )
             self._cache_store(hashed_key, openai_response)
-            return llm_response
+            return llm_response  # type: ignore
         if isinstance(response, dict):
             response_dict = response
         else:

langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl