PyPI - langroid - Versions diffs - 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

langroid/__init__.py +95 -0
langroid/agent/__init__.py +40 -0
langroid/agent/base.py +222 -91
langroid/agent/batch.py +264 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +247 -101
langroid/agent/chat_document.py +41 -4
langroid/agent/openai_assistant.py +842 -0
langroid/agent/special/__init__.py +50 -0
langroid/agent/special/doc_chat_agent.py +837 -141
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +127 -0
langroid/agent/special/retriever_agent.py +32 -198
langroid/agent/special/sql/__init__.py +11 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +22 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +475 -122
langroid/agent/tool_message.py +75 -13
langroid/agent/tools/__init__.py +13 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +16 -29
langroid/agent/tools/run_python_code.py +60 -0
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/agent/tools/segment_extract_tool.py +36 -0
langroid/cachedb/__init__.py +9 -0
langroid/cachedb/base.py +22 -2
langroid/cachedb/momento_cachedb.py +26 -2
langroid/cachedb/redis_cachedb.py +78 -11
langroid/embedding_models/__init__.py +34 -0
langroid/embedding_models/base.py +21 -2
langroid/embedding_models/models.py +120 -18
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +45 -0
langroid/language_models/azure_openai.py +80 -27
langroid/language_models/base.py +117 -12
langroid/language_models/config.py +5 -0
langroid/language_models/openai_assistants.py +3 -0
langroid/language_models/openai_gpt.py +558 -174
langroid/language_models/prompt_formatter/__init__.py +15 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +18 -21
langroid/mytypes.py +25 -8
langroid/parsing/__init__.py +46 -0
langroid/parsing/document_parser.py +260 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +122 -59
langroid/parsing/repo_loader.py +114 -52
langroid/parsing/search.py +68 -63
langroid/parsing/spider.py +3 -2
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -11
langroid/parsing/urls.py +85 -37
langroid/parsing/utils.py +298 -4
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +11 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +17 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/algorithms/graph.py +103 -0
langroid/utils/configuration.py +36 -5
langroid/utils/constants.py +4 -0
langroid/utils/globals.py +2 -2
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +21 -0
langroid/utils/output/printing.py +47 -1
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +616 -2
langroid/utils/system.py +98 -0
langroid/vector_store/__init__.py +40 -0
langroid/vector_store/base.py +203 -6
langroid/vector_store/chromadb.py +59 -32
langroid/vector_store/lancedb.py +463 -0
langroid/vector_store/meilisearch.py +10 -7
langroid/vector_store/momento.py +262 -0
langroid/vector_store/qdrantdb.py +104 -22
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
langroid-0.1.219.dist-info/RECORD +127 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.85.dist-info/RECORD +0 -94
/langroid/{scripts → agent/callbacks}/__init__.py +0 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0

langroid/embedding_models/remote_embeds.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""
+If run as a script, starts an RPC server which handles remote
+embedding requests:
+For example:
+python3 -m langroid.embedding_models.remote_embeds --port `port`
+where `port` is the port at which the service is exposed.  Currently,
+supports insecure connections only, and this should NOT be exposed to
+the internet.
+"""
+import atexit
+import subprocess
+import time
+from typing import Callable, Optional
+import grpc
+from fire import Fire
+import langroid.embedding_models.models as em
+import langroid.embedding_models.protoc.embeddings_pb2 as embeddings_pb
+import langroid.embedding_models.protoc.embeddings_pb2_grpc as embeddings_grpc
+from langroid.mytypes import Embeddings
+class RemoteEmbeddingRPCs(embeddings_grpc.EmbeddingServicer):
+    def __init__(
+        self,
+        model_name: str,
+        batch_size: int,
+        data_parallel: bool,
+        device: Optional[str],
+        devices: Optional[list[str]],
+    ):
+        super().__init__()
+        self.embedding_fn = em.SentenceTransformerEmbeddings(
+            em.SentenceTransformerEmbeddingsConfig(
+                model_name=model_name,
+                batch_size=batch_size,
+                data_parallel=data_parallel,
+                device=device,
+                devices=devices,
+            )
+        ).embedding_fn()
+    def Embed(
+        self, request: embeddings_pb.EmbeddingRequest, _: grpc.RpcContext
+    ) -> embeddings_pb.BatchEmbeds:
+        embeds = self.embedding_fn(list(request.strings))
+        embeds_pb = [embeddings_pb.Embed(embed=e) for e in embeds]
+        return embeddings_pb.BatchEmbeds(embeds=embeds_pb)
+class RemoteEmbeddingsConfig(em.SentenceTransformerEmbeddingsConfig):
+    api_base: str = "localhost"
+    port: int = 50052
+    # The below are used only when waiting for server creation
+    poll_delay: float = 0.01
+    max_retries: int = 1000
+class RemoteEmbeddings(em.SentenceTransformerEmbeddings):
+    def __init__(self, config: RemoteEmbeddingsConfig = RemoteEmbeddingsConfig()):
+        super().__init__(config)
+        self.config: RemoteEmbeddingsConfig = config
+        self.have_started_server: bool = False
+    def embedding_fn(self) -> Callable[[list[str]], Embeddings]:
+        def fn(texts: list[str]) -> Embeddings:
+            url = f"{self.config.api_base}:{self.config.port}"
+            with grpc.insecure_channel(url) as channel:
+                stub = embeddings_grpc.EmbeddingStub(channel)  # type: ignore
+                response = stub.Embed(
+                    embeddings_pb.EmbeddingRequest(
+                        strings=texts,
+                    )
+                )
+                return [list(emb.embed) for emb in response.embeds]
+        def with_handling(texts: list[str]) -> Embeddings:
+            # In local mode, start the server if it has not already
+            # been started
+            if self.config.api_base == "localhost" and not self.have_started_server:
+                try:
+                    return fn(texts)
+                # Occurs when the server hasn't been started
+                except grpc.RpcError:
+                    self.have_started_server = True
+                    # Start the server
+                    proc = subprocess.Popen(
+                        [
+                            "python3",
+                            __file__,
+                            "--bind_address_base",
+                            self.config.api_base,
+                            "--port",
+                            str(self.config.port),
+                            "--batch_size",
+                            str(self.config.batch_size),
+                            "--model_name",
+                            self.config.model_name,
+                        ],
+                    )
+                    atexit.register(lambda: proc.terminate())
+                    for _ in range(self.config.max_retries - 1):
+                        try:
+                            return fn(texts)
+                        except grpc.RpcError:
+                            time.sleep(self.config.poll_delay)
+            # The remote is not local or we have exhausted retries
+            # We should now raise an error if the server is not accessible
+            return fn(texts)
+        return with_handling
+async def serve(
+    bind_address_base: str = "localhost",
+    port: int = 50052,
+    batch_size: int = 512,
+    data_parallel: bool = False,
+    device: Optional[str] = None,
+    devices: Optional[list[str]] = None,
+    model_name: str = "BAAI/bge-large-en-v1.5",
+) -> None:
+    """Starts the RPC server."""
+    server = grpc.aio.server()
+    embeddings_grpc.add_EmbeddingServicer_to_server(
+        RemoteEmbeddingRPCs(
+            model_name=model_name,
+            batch_size=batch_size,
+            data_parallel=data_parallel,
+            device=device,
+            devices=devices,
+        ),
+        server,
+    )  # type: ignore
+    url = f"{bind_address_base}:{port}"
+    server.add_insecure_port(url)
+    await server.start()
+    print(f"Embedding server started, listening on {url}")
+    await server.wait_for_termination()
+if __name__ == "__main__":
+    Fire(serve)

langroid/language_models/__init__.py CHANGED Viewed

@@ -0,0 +1,45 @@
+from . import utils
+from . import config
+from . import base
+from . import openai_gpt
+from . import azure_openai
+from . import prompt_formatter
+from .base import (
+    LLMConfig,
+    LLMMessage,
+    LLMFunctionCall,
+    LLMFunctionSpec,
+    Role,
+    LLMTokenUsage,
+    LLMResponse,
+)
+from .openai_gpt import (
+    OpenAIChatModel,
+    OpenAICompletionModel,
+    OpenAIGPTConfig,
+    OpenAIGPT,
+)
+from .azure_openai import AzureConfig, AzureGPT
+__all__ = [
+    "utils",
+    "config",
+    "base",
+    "openai_gpt",
+    "azure_openai",
+    "prompt_formatter",
+    "LLMConfig",
+    "LLMMessage",
+    "LLMFunctionCall",
+    "LLMFunctionSpec",
+    "Role",
+    "LLMTokenUsage",
+    "LLMResponse",
+    "OpenAIChatModel",
+    "OpenAICompletionModel",
+    "OpenAIGPTConfig",
+    "OpenAIGPT",
+    "AzureConfig",
+    "AzureGPT",
+]

langroid/language_models/azure_openai.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import os
-import openai
 from dotenv import load_dotenv
+from httpx import Timeout
+from openai import AsyncAzureOpenAI, AzureOpenAI
 from langroid.language_models.openai_gpt import (
     OpenAIChatModel,
@@ -23,12 +22,26 @@ class AzureConfig(OpenAIGPTConfig):
             chose for your deployment when you deployed a model.
         model_name (str): can be set in the ``.env`` file as ``AZURE_GPT_MODEL_NAME``
             and should be based on the model name chosen during setup.
+        model_version (str): can be set in the ``.env`` file as
+          ``AZURE_OPENAI_MODEL_VERSION`` and should be based on the model name
+          chosen during setup.
     """
+    api_key: str = ""  # CAUTION: set this ONLY via env var AZURE_OPENAI_API_KEY
     type: str = "azure"
     api_version: str = "2023-05-15"
     deployment_name: str = ""
     model_name: str = ""
+    model_version: str = ""  # is used to determine the cost of using the model
+    api_base: str = ""
+    # all of the vars above can be set via env vars,
+    # by upper-casing the name and prefixing with `env_prefix`, e.g.
+    # AZURE_OPENAI_API_VERSION=2023-05-15
+    # This is either done in the .env file, or via an explicit
+    # `export AZURE_OPENAI_API_VERSION=...`
+    class Config:
+        env_prefix = "AZURE_OPENAI_"
 class AzureGPT(OpenAIGPT):
@@ -41,59 +54,99 @@ class AzureGPT(OpenAIGPT):
         api_base (str): Azure API base url
         api_version (str): Azure API version
         model_name (str): the name of gpt model in your deployment
+        model_version (str): the version of gpt model in your deployment
     """
     def __init__(self, config: AzureConfig):
+        # This will auto-populate config values from .env file
+        load_dotenv()
         super().__init__(config)
         self.config: AzureConfig = config
-        self.api_type = config.type
-        openai.api_type = self.api_type
-        load_dotenv()
-        self.api_key = os.getenv("AZURE_API_KEY", "")
-        if self.api_key == "":
+        if self.config.api_key == "":
             raise ValueError(
                 """
-                AZURE_API_KEY not set in .env file,
+                AZURE_OPENAI_API_KEY not set in .env file,
                 please set it to your Azure API key."""
             )
-        self.api_base = os.getenv("AZURE_OPENAI_API_BASE", "")
-        if self.api_base == "":
+        if self.config.api_base == "":
             raise ValueError(
                 """
                 AZURE_OPENAI_API_BASE not set in .env file,
                 please set it to your Azure API key."""
             )
-        # we don't need this for ``api_key`` because it's handled inside
-        # ``openai_gpt.py`` methods before invoking chat/completion calls
-        else:
-            openai.api_base = self.api_base
-        self.api_version = (
-            os.getenv("AZURE_OPENAI_API_VERSION", "") or config.api_version
-        )
-        openai.api_version = self.api_version
-        self.deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "")
-        if self.deployment_name == "":
+        if self.config.deployment_name == "":
             raise ValueError(
                 """
                 AZURE_OPENAI_DEPLOYMENT_NAME not set in .env file,
                 please set it to your Azure openai deployment name."""
             )
+        self.deployment_name = self.config.deployment_name
-        self.model_name = os.getenv("AZURE_GPT_MODEL_NAME", "")
-        if self.model_name == "":
+        if self.config.model_name == "":
             raise ValueError(
                 """
-                AZURE_GPT_MODEL_NAME not set in .env file,
-                please set it to chat model name in you deployment model."""
+                AZURE_OPENAI_MODEL_NAME not set in .env file,
+                please set it to chat model name in your deployment."""
             )
         # set the chat model to be the same as the model_name
         # This corresponds to the gpt model you chose for your deployment
         # when you deployed a model
-        if "35-turbo" in self.model_name:
+        self.set_chat_model()
+        self.client = AzureOpenAI(
+            api_key=self.config.api_key,
+            azure_endpoint=self.config.api_base,
+            api_version=self.config.api_version,
+            azure_deployment=self.config.deployment_name,
+        )
+        self.async_client = AsyncAzureOpenAI(
+            api_key=self.config.api_key,
+            azure_endpoint=self.config.api_base,
+            api_version=self.config.api_version,
+            azure_deployment=self.config.deployment_name,
+            timeout=Timeout(self.config.timeout),
+        )
+    def set_chat_model(self) -> None:
+        """
+        Sets the chat model configuration based on the model name specified in the
+        ``.env``. This function checks the `model_name` in the configuration and sets
+        the appropriate chat model in the `config.chat_model`. It supports handling for
+        '35-turbo' and 'gpt-4' models. For 'gpt-4', it further delegates the handling
+        to `handle_gpt4_model` method. If the model name does not match any predefined
+        models, it defaults to `OpenAIChatModel.GPT4`.
+        """
+        MODEL_35_TURBO = "35-turbo"
+        MODEL_GPT4 = "gpt-4"
+        if self.config.model_name == MODEL_35_TURBO:
             self.config.chat_model = OpenAIChatModel.GPT3_5_TURBO
+        elif self.config.model_name == MODEL_GPT4:
+            self.handle_gpt4_model()
+        else:
+            self.config.chat_model = OpenAIChatModel.GPT4
+    def handle_gpt4_model(self) -> None:
+        """
+        Handles the setting of the GPT-4 model in the configuration.
+        This function checks the `model_version` in the configuration.
+        If the version is not set, it raises a ValueError indicating that the model
+        version needs to be specified in the ``.env`` file.
+        It sets `OpenAIChatModel.GPT4_TURBO` if the version is
+        '1106-Preview', otherwise, it defaults to setting `OpenAIChatModel.GPT4`.
+        """
+        VERSION_1106_PREVIEW = "1106-Preview"
+        if self.config.model_version == "":
+            raise ValueError(
+                "AZURE_OPENAI_MODEL_VERSION not set in .env file. "
+                "Please set it to the chat model version used in your deployment."
+            )
+        if self.config.model_version == VERSION_1106_PREVIEW:
+            self.config.chat_model = OpenAIChatModel.GPT4_TURBO
         else:
             self.config.chat_model = OpenAIChatModel.GPT4

langroid/language_models/base.py CHANGED Viewed

@@ -1,19 +1,20 @@
+import ast
 import asyncio
 import json
 import logging
 from abc import ABC, abstractmethod
+from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 import aiohttp
-from pydantic import BaseModel, BaseSettings
+from pydantic import BaseModel, BaseSettings, Field
 from langroid.cachedb.momento_cachedb import MomentoCacheConfig
 from langroid.cachedb.redis_cachedb import RedisCacheConfig
-from langroid.language_models.config import Llama2FormatterConfig, PromptFormatterConfig
 from langroid.mytypes import Document
 from langroid.parsing.agent_chats import parse_message
-from langroid.parsing.json import top_level_json_field
+from langroid.parsing.parse_json import top_level_json_field
 from langroid.prompts.dialog import collate_chat_history
 from langroid.prompts.templates import (
     EXTRACTION_PROMPT_GPT4,
@@ -25,15 +26,21 @@ from langroid.utils.output.printing import show_if_debug
 logger = logging.getLogger(__name__)
+def noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None:
+    pass
 class LLMConfig(BaseSettings):
     type: str = "openai"
-    formatter: None | PromptFormatterConfig = Llama2FormatterConfig()
+    streamer: Optional[Callable[[Any], None]] = noop_fn
+    api_base: str | None = None
+    formatter: None | str = None
     timeout: int = 20  # timeout for API requests
     chat_model: str = ""
     completion_model: str = ""
     temperature: float = 0.0
-    chat_context_length: int = 1024
-    completion_context_length: int = 1024
+    chat_context_length: int = 8000
+    completion_context_length: int = 8000
     max_output_tokens: int = 1024  # generate at most this many tokens
     # if input length + max_output_tokens > context length of model,
     # we will try shortening requested output
@@ -59,6 +66,26 @@ class LLMFunctionCall(BaseModel):
     to: str = ""  # intended recipient
     arguments: Optional[Dict[str, Any]] = None
+    @staticmethod
+    def from_dict(message: Dict[str, Any]) -> "LLMFunctionCall":
+        """
+        Initialize from dictionary.
+        Args:
+            d: dictionary containing fields to initialize
+        """
+        fun_call = LLMFunctionCall(name=message["name"])
+        fun_args_str = message["arguments"]
+        # sometimes may be malformed with invalid indents,
+        # so we try to be safe by removing newlines.
+        if fun_args_str is not None:
+            fun_args_str = fun_args_str.replace("\n", "").strip()
+            fun_args = ast.literal_eval(fun_args_str)
+        else:
+            fun_args = None
+        fun_call.arguments = fun_args
+        return fun_call
     def __str__(self) -> str:
         return "FUNC: " + json.dumps(self.dict(), indent=2)
@@ -79,6 +106,20 @@ class LLMTokenUsage(BaseModel):
     prompt_tokens: int = 0
     completion_tokens: int = 0
     cost: float = 0.0
+    calls: int = 0  # how many API calls
+    def reset(self) -> None:
+        self.prompt_tokens = 0
+        self.completion_tokens = 0
+        self.cost = 0.0
+        self.calls = 0
+    def __str__(self) -> str:
+        return (
+            f"Tokens = "
+            f"(prompt {self.prompt_tokens}, completion {self.completion_tokens}), "
+            f"Cost={self.cost}, Calls={self.calls}"
+        )
     @property
     def total_tokens(self) -> int:
@@ -99,12 +140,16 @@ class LLMMessage(BaseModel):
     role: Role
     name: Optional[str] = None
+    tool_id: str = ""  # used by OpenAIAssistant
     content: str
     function_call: Optional[LLMFunctionCall] = None
+    timestamp: datetime = Field(default_factory=datetime.utcnow)
     def api_dict(self) -> Dict[str, Any]:
         """
         Convert to dictionary for API request.
+        DROP the tool_id, since it is only for use in the Assistant API,
+        not the completion API.
         Returns:
             dict: dictionary representation of LLM message
         """
@@ -120,6 +165,8 @@ class LLMMessage(BaseModel):
                 dict_no_none["function_call"]["arguments"] = json.dumps(
                     dict_no_none["function_call"]["arguments"]
                 )
+        dict_no_none.pop("tool_id", None)
+        dict_no_none.pop("timestamp", None)
         return dict_no_none
     def __str__(self) -> str:
@@ -137,10 +184,17 @@ class LLMResponse(BaseModel):
     """
     message: str
+    tool_id: str = ""  # used by OpenAIAssistant
     function_call: Optional[LLMFunctionCall] = None
     usage: Optional[LLMTokenUsage]
     cached: bool = False
+    def __str__(self) -> str:
+        if self.function_call is not None:
+            return str(self.function_call)
+        else:
+            return self.message
     def to_LLMMessage(self) -> LLMMessage:
         content = self.message
         role = Role.ASSISTANT if self.function_call is None else Role.FUNCTION
@@ -204,7 +258,10 @@ class LanguageModel(ABC):
     Abstract base class for language models.
     """
-    def __init__(self, config: LLMConfig):
+    # usage cost by model, accumulates here
+    usage_cost_dict: Dict[str, LLMTokenUsage] = {}
+    def __init__(self, config: LLMConfig = LLMConfig()):
         self.config = config
     @staticmethod
@@ -215,6 +272,16 @@ class LanguageModel(ABC):
             config: configuration for language model
         Returns: instance of language model
         """
+        if type(config) is LLMConfig:
+            raise ValueError(
+                """
+                Cannot create a Language Model object from LLMConfig.
+                Please specify a specific subclass of LLMConfig e.g.,
+                OpenAIGPTConfig. If you are creating a ChatAgent from
+                a ChatAgentConfig, please specify the `llm` field of this config
+                as a specific subclass of LLMConfig, e.g., OpenAIGPTConfig.
+                """
+            )
         from langroid.language_models.azure_openai import AzureGPT
         from langroid.language_models.openai_gpt import OpenAIGPT
@@ -311,18 +378,18 @@ class LanguageModel(ABC):
         pass
     @abstractmethod
-    def generate(self, prompt: str, max_tokens: int) -> LLMResponse:
+    def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
         pass
     @abstractmethod
-    async def agenerate(self, prompt: str, max_tokens: int) -> LLMResponse:
+    async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
         pass
     @abstractmethod
     def chat(
         self,
         messages: Union[str, List[LLMMessage]],
-        max_tokens: int,
+        max_tokens: int = 200,
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
@@ -332,7 +399,7 @@ class LanguageModel(ABC):
     async def achat(
         self,
         messages: Union[str, List[LLMMessage]],
-        max_tokens: int,
+        max_tokens: int = 200,
         functions: Optional[List[LLMFunctionSpec]] = None,
         function_call: str | Dict[str, str] = "auto",
     ) -> LLMResponse:
@@ -350,6 +417,44 @@ class LanguageModel(ABC):
     def chat_cost(self) -> Tuple[float, float]:
         return self.config.chat_cost_per_1k_tokens
+    def reset_usage_cost(self) -> None:
+        for mdl in [self.config.chat_model, self.config.completion_model]:
+            if mdl is None:
+                return
+            if mdl not in self.usage_cost_dict:
+                self.usage_cost_dict[mdl] = LLMTokenUsage()
+            counter = self.usage_cost_dict[mdl]
+            counter.reset()
+    def update_usage_cost(
+        self, chat: bool, prompts: int, completions: int, cost: float
+    ) -> None:
+        """
+        Update usage cost for this LLM.
+        Args:
+            chat (bool): whether to update for chat or completion model
+            prompts (int): number of tokens used for prompts
+            completions (int): number of tokens used for completions
+            cost (float): total token cost in USD
+        """
+        mdl = self.config.chat_model if chat else self.config.completion_model
+        if mdl is None:
+            return
+        if mdl not in self.usage_cost_dict:
+            self.usage_cost_dict[mdl] = LLMTokenUsage()
+        counter = self.usage_cost_dict[mdl]
+        counter.prompt_tokens += prompts
+        counter.completion_tokens += completions
+        counter.cost += cost
+        counter.calls += 1
+    @classmethod
+    def usage_cost_summary(cls) -> str:
+        s = ""
+        for model, counter in cls.usage_cost_dict.items():
+            s += f"{model}: {counter}\n"
+        return s
     def followup_to_standalone(
         self, chat_history: List[Tuple[str, str]], question: str
     ) -> str:

langroid/language_models/config.py CHANGED Viewed

@@ -11,3 +11,8 @@ class PromptFormatterConfig(BaseSettings):
 class Llama2FormatterConfig(PromptFormatterConfig):
     use_bos_eos: bool = False
+class HFPromptFormatterConfig(PromptFormatterConfig):
+    type: str = "hf"
+    model_name: str

langroid/language_models/openai_assistants.py ADDED Viewed

@@ -0,0 +1,3 @@
+import openai
+openai.models.list()

langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl