PyPI - langroid - Versions diffs - 0.1.251__py3-none-any.whl → 0.1.253__py3-none-any.whl - Mend

langroid 0.1.251py3-none-any.whl → 0.1.253py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

langroid/__init__.py +16 -15
langroid/agent/__init__.py +1 -0
langroid/agent/base.py +11 -1
langroid/agent/callbacks/chainlit.py +5 -12
langroid/agent/special/__init__.py +3 -2
langroid/agent/special/doc_chat_agent.py +36 -56
langroid/agent/special/neo4j/csv_kg_chat.py +2 -2
langroid/agent/special/sql/__init__.py +1 -2
langroid/agent/special/sql/sql_chat_agent.py +10 -4
langroid/agent/special/sql/utils/__init__.py +4 -5
langroid/agent/special/sql/utils/description_extractors.py +7 -2
langroid/agent/special/sql/utils/populate_metadata.py +6 -1
langroid/agent/special/table_chat_agent.py +2 -2
langroid/agent/task.py +25 -8
langroid/agent/tool_message.py +14 -3
langroid/agent/tools/__init__.py +2 -3
langroid/agent/tools/duckduckgo_search_tool.py +2 -2
langroid/agent/tools/google_search_tool.py +2 -2
langroid/agent/tools/metaphor_search_tool.py +2 -2
langroid/agent/tools/retrieval_tool.py +2 -2
langroid/agent/tools/run_python_code.py +2 -2
langroid/agent/tools/segment_extract_tool.py +2 -2
langroid/cachedb/base.py +10 -2
langroid/cachedb/momento_cachedb.py +10 -4
langroid/cachedb/redis_cachedb.py +2 -3
langroid/embedding_models/__init__.py +1 -0
langroid/exceptions.py +57 -0
langroid/language_models/__init__.py +1 -0
langroid/language_models/base.py +2 -3
langroid/language_models/openai_gpt.py +15 -14
langroid/language_models/prompt_formatter/__init__.py +4 -3
langroid/parsing/document_parser.py +20 -4
langroid/parsing/parser.pyi +56 -0
langroid/utils/logging.py +7 -3
langroid/utils/output/__init__.py +1 -2
langroid/utils/output/citations.py +41 -0
langroid/utils/output/printing.py +7 -2
langroid/vector_store/__init__.py +33 -17
langroid/vector_store/chromadb.py +2 -8
langroid/vector_store/lancedb.py +36 -5
langroid/vector_store/meilisearch.py +21 -11
langroid/vector_store/momento.py +31 -14
{langroid-0.1.251.dist-info → langroid-0.1.253.dist-info}/METADATA +31 -29
{langroid-0.1.251.dist-info → langroid-0.1.253.dist-info}/RECORD +46 -44
{langroid-0.1.251.dist-info → langroid-0.1.253.dist-info}/LICENSE +0 -0
{langroid-0.1.251.dist-info → langroid-0.1.253.dist-info}/WHEEL +0 -0

langroid/cachedb/momento_cachedb.py CHANGED Viewed

@@ -4,17 +4,23 @@ import os
 from datetime import timedelta
 from typing import Any, Dict, List
-import momento
+from langroid.cachedb.base import CacheDBConfig
+from langroid.exceptions import LangroidImportError
+try:
+    import momento
+    from momento.responses import CacheGet
+except ImportError:
+    raise LangroidImportError(package="momento", extra="momento")
 from dotenv import load_dotenv
-from momento.responses import CacheGet
-from pydantic import BaseModel
 from langroid.cachedb.base import CacheDB
 logger = logging.getLogger(__name__)
-class MomentoCacheConfig(BaseModel):
+class MomentoCacheConfig(CacheDBConfig):
     """Configuration model for Momento Cache."""
     ttl: int = 60 * 60 * 24 * 7  # 1 week

langroid/cachedb/redis_cachedb.py CHANGED Viewed

@@ -7,15 +7,14 @@ from typing import Any, Dict, List, TypeVar
 import fakeredis
 import redis
 from dotenv import load_dotenv
-from pydantic import BaseModel
-from langroid.cachedb.base import CacheDB
+from langroid.cachedb.base import CacheDB, CacheDBConfig
 T = TypeVar("T", bound="RedisCache")
 logger = logging.getLogger(__name__)
-class RedisCacheConfig(BaseModel):
+class RedisCacheConfig(CacheDBConfig):
     """Configuration model for RedisCache."""
     fake: bool = False

langroid/embedding_models/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .remote_embeds import (
     RemoteEmbeddings,
 )
 __all__ = [
     "base",
     "models",

langroid/exceptions.py CHANGED Viewed

@@ -1,3 +1,60 @@
+from typing import Optional
 class InfiniteLoopException(Exception):
     def __init__(self, message: str = "Infinite loop detected", *args: object) -> None:
         super().__init__(message, *args)
+class LangroidImportError(ImportError):
+    def __init__(
+        self,
+        package: Optional[str] = None,
+        extra: Optional[str] = None,
+        error: str = "",
+        *args: object,
+    ) -> None:
+        """
+        Generate helpful warning when attempting to import package or module.
+        Args:
+            package (str): The name of the package to import.
+            extra (str): The name of the extras package required for this import.
+            error (str): The error message to display. Depending on context, we
+                can set this by capturing the ImportError message.
+        """
+        if error == "" and package is not None:
+            error = f"{package} is not installed by default with Langroid.\n"
+        if extra:
+            install_help = f"""
+                If you want to use it, please install langroid
+                with the `{extra}` extra, for example:
+                If you are using pip:
+                pip install "langroid[{extra}]"
+                For multiple extras, you can separate them with commas:
+                pip install "langroid[{extra},another-extra]"
+                If you are using Poetry:
+                poetry add langroid --extras "{extra}"
+                For multiple extras with Poetry, list them with spaces:
+                poetry add langroid --extras "{extra} another-extra"
+                If you are working within the langroid dev env (which uses Poetry),
+                you can do:
+                poetry install -E "{extra}"
+                or if you want to include multiple extras:
+                poetry install -E "{extra} another-extra"
+                """
+        else:
+            install_help = """
+                If you want to use it, please install it in the same
+                virtual environment as langroid.
+                """
+        msg = error + install_help
+        super().__init__(msg, *args)

langroid/language_models/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .openai_gpt import (
 )
 from .azure_openai import AzureConfig, AzureGPT
 __all__ = [
     "utils",
     "config",

langroid/language_models/base.py CHANGED Viewed

@@ -10,8 +10,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 import aiohttp
 from pydantic import BaseModel, BaseSettings, Field
-from langroid.cachedb.momento_cachedb import MomentoCacheConfig
-from langroid.cachedb.redis_cachedb import RedisCacheConfig
+from langroid.cachedb.base import CacheDBConfig
 from langroid.mytypes import Document
 from langroid.parsing.agent_chats import parse_message
 from langroid.parsing.parse_json import top_level_json_field
@@ -49,7 +48,7 @@ class LLMConfig(BaseSettings):
     # use chat model for completion? For OpenAI models, this MUST be set to True!
     use_chat_for_completion: bool = True
     stream: bool = True  # stream output from API?
-    cache_config: None | RedisCacheConfig | MomentoCacheConfig = None
+    cache_config: None | CacheDBConfig = None
     # Dict of model -> (input/prompt cost, output/completion cost)
     chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -28,8 +28,9 @@ from pydantic import BaseModel
 from rich import print
 from rich.markup import escape
-from langroid.cachedb.momento_cachedb import MomentoCache, MomentoCacheConfig
+from langroid.cachedb.base import CacheDB
 from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
+from langroid.exceptions import LangroidImportError
 from langroid.language_models.base import (
     LanguageModel,
     LLMConfig,
@@ -280,14 +281,7 @@ class OpenAIGPTConfig(LLMConfig):
         try:
             import litellm
         except ImportError:
-            raise ImportError(
-                """
-                litellm not installed. Please install it via:
-                pip install litellm.
-                Or when installing langroid, install it with the `litellm` extra:
-                pip install langroid[litellm]
-                """
-            )
+            raise LangroidImportError("litellm", "litellm")
         litellm.telemetry = False
         litellm.drop_params = True  # drop un-supported params without crashing
         self.seed = None  # some local mdls don't support seed
@@ -482,17 +476,24 @@ class OpenAIGPT(LanguageModel):
                 timeout=Timeout(self.config.timeout),
             )
-        self.cache: MomentoCache | RedisCache
+        self.cache: CacheDB
         if settings.cache_type == "momento":
-            if config.cache_config is None or isinstance(
-                config.cache_config, RedisCacheConfig
+            from langroid.cachedb.momento_cachedb import (
+                MomentoCache,
+                MomentoCacheConfig,
+            )
+            if config.cache_config is None or not isinstance(
+                config.cache_config,
+                MomentoCacheConfig,
             ):
                 # switch to fresh momento config if needed
                 config.cache_config = MomentoCacheConfig()
             self.cache = MomentoCache(config.cache_config)
         elif "redis" in settings.cache_type:
-            if config.cache_config is None or isinstance(
-                config.cache_config, MomentoCacheConfig
+            if config.cache_config is None or not isinstance(
+                config.cache_config,
+                RedisCacheConfig,
             ):
                 # switch to fresh redis config if needed
                 config.cache_config = RedisCacheConfig(

langroid/language_models/prompt_formatter/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from . import base
+from . import llama2_formatter
 from .base import PromptFormatter
 from .llama2_formatter import Llama2Formatter
-from ..config import PromptFormatterConfig, Llama2FormatterConfig
+from ..config import PromptFormatterConfig
+from ..config import Llama2FormatterConfig
-from . import base
-from . import llama2_formatter
 __all__ = [
     "PromptFormatter",

langroid/parsing/document_parser.py CHANGED Viewed

@@ -5,9 +5,19 @@ from enum import Enum
 from io import BytesIO
 from typing import Any, Generator, List, Tuple
-import fitz
+from langroid.exceptions import LangroidImportError
+try:
+    import fitz
+except ImportError:
+    raise LangroidImportError("PyMuPDF", "pdf-parsers")
+try:
+    import pypdf
+except ImportError:
+    raise LangroidImportError("pypdf", "pdf-parsers")
 import pdfplumber
-import pypdf
 import requests
 from bs4 import BeautifulSoup
 from PIL import Image
@@ -456,7 +466,10 @@ class ImagePdfParser(DocumentParser):
     def iterate_pages(
         self,
     ) -> Generator[Tuple[int, "Image"], None, None]:  # type: ignore
-        from pdf2image import convert_from_bytes
+        try:
+            from pdf2image import convert_from_bytes
+        except ImportError:
+            raise LangroidImportError("pdf2image", "pdf-parsers")
         images = convert_from_bytes(self.doc_bytes.getvalue())
         for i, image in enumerate(images):
@@ -472,7 +485,10 @@ class ImagePdfParser(DocumentParser):
         Returns:
             str: Extracted text from the image.
         """
-        import pytesseract
+        try:
+            import pytesseract
+        except ImportError:
+            raise LangroidImportError("pytesseract", "pdf-parsers")
         text = pytesseract.image_to_string(page)
         return self.fix_text(text)

langroid/parsing/parser.pyi ADDED Viewed

@@ -0,0 +1,56 @@
+from enum import Enum
+from typing import Literal
+from _typeshed import Incomplete
+from pydantic import BaseSettings
+from langroid.mytypes import Document as Document
+from langroid.parsing.para_sentence_split import (
+    create_chunks as create_chunks,
+)
+from langroid.parsing.para_sentence_split import (
+    remove_extra_whitespace as remove_extra_whitespace,
+)
+logger: Incomplete
+class Splitter(str, Enum):
+    TOKENS: str
+    PARA_SENTENCE: str
+    SIMPLE: str
+class PdfParsingConfig(BaseSettings):
+    library: Literal["fitz", "pdfplumber", "pypdf", "unstructured", "pdf2image"]
+class DocxParsingConfig(BaseSettings):
+    library: Literal["python-docx", "unstructured"]
+class DocParsingConfig(BaseSettings):
+    library: Literal["unstructured"]
+class ParsingConfig(BaseSettings):
+    splitter: str
+    chunk_size: int
+    overlap: int
+    max_chunks: int
+    min_chunk_chars: int
+    discard_chunk_chars: int
+    n_similar_docs: int
+    n_neighbor_ids: int
+    separators: list[str]
+    token_encoding_model: str
+    pdf: PdfParsingConfig
+    docx: DocxParsingConfig
+    doc: DocParsingConfig
+class Parser:
+    config: Incomplete
+    tokenizer: Incomplete
+    def __init__(self, config: ParsingConfig) -> None: ...
+    def num_tokens(self, text: str) -> int: ...
+    def add_window_ids(self, chunks: list[Document]) -> None: ...
+    def split_simple(self, docs: list[Document]) -> list[Document]: ...
+    def split_para_sentence(self, docs: list[Document]) -> list[Document]: ...
+    def split_chunk_tokens(self, docs: list[Document]) -> list[Document]: ...
+    def chunk_tokens(self, text: str) -> list[str]: ...
+    def split(self, docs: list[Document]) -> list[Document]: ...

langroid/utils/logging.py CHANGED Viewed

@@ -31,7 +31,11 @@ def setup_colored_logging() -> None:
     # logger.setLevel(logging.DEBUG)
-def setup_logger(name: str, level: int = logging.INFO) -> logging.Logger:
+def setup_logger(
+    name: str,
+    level: int = logging.INFO,
+    terminal: bool = False,
+) -> logging.Logger:
     """
     Set up a logger of module `name` at a desired level.
     Args:
@@ -42,7 +46,7 @@ def setup_logger(name: str, level: int = logging.INFO) -> logging.Logger:
     """
     logger = logging.getLogger(name)
     logger.setLevel(level)
-    if not logger.hasHandlers():
+    if not logger.hasHandlers() and terminal:
         handler = logging.StreamHandler()
         formatter = logging.Formatter(
             "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -73,7 +77,7 @@ def setup_file_logger(
 ) -> logging.Logger:
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     file_mode = "a" if append else "w"
-    logger = setup_logger(name)
+    logger = setup_logger(name, terminal=False)
     handler = logging.FileHandler(filename, mode=file_mode)
     handler.setLevel(logging.INFO)
     if log_format:

langroid/utils/output/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from . import printing
 from .printing import (
     shorten_text,
     print_long_text,
@@ -7,9 +6,9 @@ from .printing import (
     SuppressLoggerWarnings,
     PrintColored,
 )
 from .status import status
 __all__ = [
     "printing",
     "shorten_text",

langroid/utils/output/citations.py ADDED Viewed

@@ -0,0 +1,41 @@
+def extract_markdown_references(md_string: str) -> list[int]:
+    """
+    Extracts markdown references (e.g., [^1], [^2]) from a string and returns
+    them as a sorted list of integers.
+    Args:
+        md_string (str): The markdown string containing references.
+    Returns:
+        list[int]: A sorted list of unique integers from the markdown references.
+    """
+    import re
+    # Regex to find all occurrences of [^<number>]
+    matches = re.findall(r"\[\^(\d+)\]", md_string)
+    # Convert matches to integers, remove duplicates with set, and sort
+    return sorted(set(int(match) for match in matches))
+def format_footnote_text(content: str, width: int = 80) -> str:
+    """
+    Formats the content part of a footnote (i.e. not the first line that
+    appears right after the reference [^4])
+    It wraps the text so that no line is longer than the specified width and indents
+    lines as necessary for markdown footnotes.
+    Args:
+        content (str): The text of the footnote to be formatted.
+        width (int): Maximum width of the text lines.
+    Returns:
+        str: Properly formatted markdown footnote text.
+    """
+    import textwrap
+    # Wrap the text to the specified width
+    wrapped_lines = textwrap.wrap(content, width)
+    if len(wrapped_lines) == 0:
+        return ""
+    indent = "    "  # Indentation for markdown footnotes
+    return indent + ("\n" + indent).join(wrapped_lines)

langroid/utils/output/printing.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import sys
 from contextlib import contextmanager
-from typing import Any, Iterator, Optional
+from typing import Any, Iterator, Optional, Type
 from rich import print as rprint
 from rich.text import Text
@@ -89,6 +89,11 @@ class SuppressLoggerWarnings:
         # Set the logging level to 'ERROR' to suppress warnings
         self.logger.setLevel(logging.ERROR)
-    def __exit__(self, exc_type, exc_value, traceback) -> None:  # type: ignore
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Any,
+    ) -> None:
         # Reset the logging level to its original value
         self.logger.setLevel(self.original_level)

langroid/vector_store/__init__.py CHANGED Viewed

@@ -1,25 +1,9 @@
 from . import base
 from . import qdrantdb
-from . import meilisearch
-from . import lancedb
 from .base import VectorStoreConfig, VectorStore
 from .qdrantdb import QdrantDBConfig, QdrantDB
-from .meilisearch import MeiliSearch, MeiliSearchConfig
-from .lancedb import LanceDB, LanceDBConfig
-has_chromadb = False
-try:
-    from . import chromadb
-    from .chromadb import ChromaDBConfig, ChromaDB
-    chromadb  # silence linters
-    ChromaDB
-    ChromaDBConfig
-    has_chromadb = True
-except ImportError:
-    pass
 __all__ = [
     "base",
@@ -36,5 +20,37 @@ __all__ = [
     "LanceDBConfig",
 ]
-if has_chromadb:
+try:
+    from . import meilisearch
+    from .meilisearch import MeiliSearch, MeiliSearchConfig
+    meilisearch
+    MeiliSearch
+    MeiliSearchConfig
+    __all__.extend(["meilisearch", "MeiliSearch", "MeiliSearchConfig"])
+except ImportError:
+    pass
+try:
+    from . import lancedb
+    from .lancedb import LanceDB, LanceDBConfig
+    lancedb
+    LanceDB
+    LanceDBConfig
+    __all__.extend(["lancedb", "LanceDB", "LanceDBConfig"])
+except ImportError:
+    pass
+try:
+    from . import chromadb
+    from .chromadb import ChromaDBConfig, ChromaDB
+    chromadb  # silence linters
+    ChromaDB
+    ChromaDBConfig
     __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
+except ImportError:
+    pass

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langroid.embedding_models.base import (
     EmbeddingModelsConfig,
 )
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
+from langroid.exceptions import LangroidImportError
 from langroid.mytypes import DocMetaData, Document
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import print_long_text
@@ -29,14 +30,7 @@ class ChromaDB(VectorStore):
         try:
             import chromadb
         except ImportError:
-            raise ImportError(
-                """
-                ChromaDB is not installed by default with Langroid.
-                If you want to use it, please install it with the `chromadb` extra, e.g.
-                pip install "langroid[chromadb]"
-                or an equivalent command.
-                """
-            )
+            raise LangroidImportError("chromadb", "chromadb")
         self.config = config
         emb_model = EmbeddingModel.create(config.embedding)
         self.embedding_fn = emb_model.embedding_fn()

langroid/vector_store/lancedb.py CHANGED Viewed

@@ -1,18 +1,31 @@
+from __future__ import annotations
 import logging
-from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Type
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+)
-import lancedb
 import pandas as pd
 from dotenv import load_dotenv
-from lancedb.pydantic import LanceModel, Vector
-from lancedb.query import LanceVectorQueryBuilder
 from pydantic import BaseModel, ValidationError, create_model
+if TYPE_CHECKING:
+    from lancedb.query import LanceVectorQueryBuilder
 from langroid.embedding_models.base import (
     EmbeddingModel,
     EmbeddingModelsConfig,
 )
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
+from langroid.exceptions import LangroidImportError
 from langroid.mytypes import Document, EmbeddingFunction
 from langroid.utils.configuration import settings
 from langroid.utils.pydantic_utils import (
@@ -26,6 +39,14 @@ from langroid.utils.pydantic_utils import (
 )
 from langroid.vector_store.base import VectorStore, VectorStoreConfig
+try:
+    import lancedb
+    from lancedb.pydantic import LanceModel, Vector
+    has_lancedb = True
+except ImportError:
+    has_lancedb = False
 logger = logging.getLogger(__name__)
@@ -44,6 +65,9 @@ class LanceDBConfig(VectorStoreConfig):
 class LanceDB(VectorStore):
     def __init__(self, config: LanceDBConfig = LanceDBConfig()):
         super().__init__(config)
+        if not has_lancedb:
+            raise LangroidImportError("lancedb", "lancedb")
         self.config: LanceDBConfig = config
         emb_model = EmbeddingModel.create(config.embedding)
         self.embedding_fn: EmbeddingFunction = emb_model.embedding_fn()
@@ -170,6 +194,9 @@ class LanceDB(VectorStore):
         if not issubclass(doc_cls, Document):
             raise ValueError("DocClass must be a subclass of Document")
+        if not has_lancedb:
+            raise LangroidImportError("lancedb", "lancedb")
         n = self.embedding_dim
         # Prepare fields for the new model
@@ -193,6 +220,8 @@ class LanceDB(VectorStore):
         Flat version of the lance_schema, as nested Pydantic schemas are not yet
         supported by LanceDB.
         """
+        if not has_lancedb:
+            raise LangroidImportError("lancedb", "lancedb")
         lance_model = self._create_lance_schema(doc_cls)
         FlatModel = flatten_pydantic_model(lance_model, base_model=LanceModel)
         return FlatModel
@@ -368,7 +397,9 @@ class LanceDB(VectorStore):
     def delete_collection(self, collection_name: str) -> None:
         self.client.drop_table(collection_name, ignore_missing=True)
-    def _lance_result_to_docs(self, result: LanceVectorQueryBuilder) -> List[Document]:
+    def _lance_result_to_docs(
+        self, result: "LanceVectorQueryBuilder"
+    ) -> List[Document]:
         if self.is_from_dataframe:
             df = result.to_pandas()
             return dataframe_to_documents(

langroid 0.1.251__py3-none-any.whl → 0.1.253__py3-none-any.whl

langroid 0.1.251py3-none-any.whl → 0.1.253py3-none-any.whl