PyPI - langroid - Versions diffs - 0.1.252__py3-none-any.whl → 0.1.254__py3-none-any.whl - Mend

langroid 0.1.252py3-none-any.whl → 0.1.254py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

langroid/__init__.py +16 -15
langroid/agent/__init__.py +1 -0
langroid/agent/callbacks/chainlit.py +5 -12
langroid/agent/special/__init__.py +13 -4
langroid/agent/special/doc_chat_agent.py +39 -55
langroid/agent/special/neo4j/csv_kg_chat.py +2 -2
langroid/agent/special/sql/__init__.py +12 -6
langroid/agent/special/sql/sql_chat_agent.py +10 -4
langroid/agent/special/sql/utils/__init__.py +4 -5
langroid/agent/special/sql/utils/description_extractors.py +7 -2
langroid/agent/special/sql/utils/populate_metadata.py +6 -1
langroid/agent/special/table_chat_agent.py +2 -2
langroid/agent/tool_message.py +14 -3
langroid/agent/tools/__init__.py +2 -3
langroid/agent/tools/duckduckgo_search_tool.py +2 -2
langroid/agent/tools/google_search_tool.py +2 -2
langroid/agent/tools/metaphor_search_tool.py +2 -2
langroid/agent/tools/retrieval_tool.py +2 -2
langroid/agent/tools/run_python_code.py +2 -2
langroid/agent/tools/segment_extract_tool.py +2 -2
langroid/cachedb/__init__.py +10 -2
langroid/cachedb/base.py +10 -2
langroid/cachedb/momento_cachedb.py +10 -4
langroid/cachedb/redis_cachedb.py +2 -3
langroid/embedding_models/__init__.py +1 -0
langroid/exceptions.py +57 -0
langroid/language_models/__init__.py +1 -0
langroid/language_models/base.py +2 -3
langroid/language_models/openai_gpt.py +15 -14
langroid/language_models/prompt_formatter/__init__.py +4 -3
langroid/parsing/__init__.py +8 -2
langroid/parsing/document_parser.py +46 -10
langroid/parsing/parser.pyi +56 -0
langroid/parsing/spider.py +12 -7
langroid/utils/logging.py +7 -3
langroid/utils/output/__init__.py +1 -2
langroid/utils/output/citations.py +41 -0
langroid/utils/output/printing.py +7 -2
langroid/vector_store/__init__.py +33 -23
langroid/vector_store/chromadb.py +2 -8
langroid/vector_store/lancedb.py +36 -5
langroid/vector_store/meilisearch.py +21 -11
langroid/vector_store/momento.py +31 -14
{langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/METADATA +59 -47
{langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/RECORD +47 -45
{langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/LICENSE +0 -0
{langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/WHEEL +0 -0

langroid/exceptions.py CHANGED Viewed

@@ -1,3 +1,60 @@
+from typing import Optional
 class InfiniteLoopException(Exception):
     def __init__(self, message: str = "Infinite loop detected", *args: object) -> None:
         super().__init__(message, *args)
+class LangroidImportError(ImportError):
+    def __init__(
+        self,
+        package: Optional[str] = None,
+        extra: Optional[str] = None,
+        error: str = "",
+        *args: object,
+    ) -> None:
+        """
+        Generate helpful warning when attempting to import package or module.
+        Args:
+            package (str): The name of the package to import.
+            extra (str): The name of the extras package required for this import.
+            error (str): The error message to display. Depending on context, we
+                can set this by capturing the ImportError message.
+        """
+        if error == "" and package is not None:
+            error = f"{package} is not installed by default with Langroid.\n"
+        if extra:
+            install_help = f"""
+                If you want to use it, please install langroid
+                with the `{extra}` extra, for example:
+                If you are using pip:
+                pip install "langroid[{extra}]"
+                For multiple extras, you can separate them with commas:
+                pip install "langroid[{extra},another-extra]"
+                If you are using Poetry:
+                poetry add langroid --extras "{extra}"
+                For multiple extras with Poetry, list them with spaces:
+                poetry add langroid --extras "{extra} another-extra"
+                If you are working within the langroid dev env (which uses Poetry),
+                you can do:
+                poetry install -E "{extra}"
+                or if you want to include multiple extras:
+                poetry install -E "{extra} another-extra"
+                """
+        else:
+            install_help = """
+                If you want to use it, please install it in the same
+                virtual environment as langroid.
+                """
+        msg = error + install_help
+        super().__init__(msg, *args)

langroid/language_models/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .openai_gpt import (
 )
 from .azure_openai import AzureConfig, AzureGPT
 __all__ = [
     "utils",
     "config",

langroid/language_models/base.py CHANGED Viewed

@@ -10,8 +10,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 import aiohttp
 from pydantic import BaseModel, BaseSettings, Field
-from langroid.cachedb.momento_cachedb import MomentoCacheConfig
-from langroid.cachedb.redis_cachedb import RedisCacheConfig
+from langroid.cachedb.base import CacheDBConfig
 from langroid.mytypes import Document
 from langroid.parsing.agent_chats import parse_message
 from langroid.parsing.parse_json import top_level_json_field
@@ -49,7 +48,7 @@ class LLMConfig(BaseSettings):
     # use chat model for completion? For OpenAI models, this MUST be set to True!
     use_chat_for_completion: bool = True
     stream: bool = True  # stream output from API?
-    cache_config: None | RedisCacheConfig | MomentoCacheConfig = None
+    cache_config: None | CacheDBConfig = None
     # Dict of model -> (input/prompt cost, output/completion cost)
     chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -28,8 +28,9 @@ from pydantic import BaseModel
 from rich import print
 from rich.markup import escape
-from langroid.cachedb.momento_cachedb import MomentoCache, MomentoCacheConfig
+from langroid.cachedb.base import CacheDB
 from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
+from langroid.exceptions import LangroidImportError
 from langroid.language_models.base import (
     LanguageModel,
     LLMConfig,
@@ -280,14 +281,7 @@ class OpenAIGPTConfig(LLMConfig):
         try:
             import litellm
         except ImportError:
-            raise ImportError(
-                """
-                litellm not installed. Please install it via:
-                pip install litellm.
-                Or when installing langroid, install it with the `litellm` extra:
-                pip install langroid[litellm]
-                """
-            )
+            raise LangroidImportError("litellm", "litellm")
         litellm.telemetry = False
         litellm.drop_params = True  # drop un-supported params without crashing
         self.seed = None  # some local mdls don't support seed
@@ -482,17 +476,24 @@ class OpenAIGPT(LanguageModel):
                 timeout=Timeout(self.config.timeout),
             )
-        self.cache: MomentoCache | RedisCache
+        self.cache: CacheDB
         if settings.cache_type == "momento":
-            if config.cache_config is None or isinstance(
-                config.cache_config, RedisCacheConfig
+            from langroid.cachedb.momento_cachedb import (
+                MomentoCache,
+                MomentoCacheConfig,
+            )
+            if config.cache_config is None or not isinstance(
+                config.cache_config,
+                MomentoCacheConfig,
             ):
                 # switch to fresh momento config if needed
                 config.cache_config = MomentoCacheConfig()
             self.cache = MomentoCache(config.cache_config)
         elif "redis" in settings.cache_type:
-            if config.cache_config is None or isinstance(
-                config.cache_config, MomentoCacheConfig
+            if config.cache_config is None or not isinstance(
+                config.cache_config,
+                RedisCacheConfig,
             ):
                 # switch to fresh redis config if needed
                 config.cache_config = RedisCacheConfig(

langroid/language_models/prompt_formatter/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from . import base
+from . import llama2_formatter
 from .base import PromptFormatter
 from .llama2_formatter import Llama2Formatter
-from ..config import PromptFormatterConfig, Llama2FormatterConfig
+from ..config import PromptFormatterConfig
+from ..config import Llama2FormatterConfig
-from . import base
-from . import llama2_formatter
 __all__ = [
     "PromptFormatter",

langroid/parsing/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@ from . import urls
 from . import utils
 from . import search
 from . import web_search
-from . import spider
 from .parser import (
     Splitter,
@@ -36,7 +35,6 @@ __all__ = [
     "utils",
     "search",
     "web_search",
-    "spider",
     "Splitter",
     "PdfParsingConfig",
     "DocxParsingConfig",
@@ -44,3 +42,11 @@ __all__ = [
     "ParsingConfig",
     "Parser",
 ]
+try:
+    from . import spider
+    spider
+    __all__.append("spider")
+except ImportError:
+    pass

langroid/parsing/document_parser.py CHANGED Viewed

@@ -1,16 +1,37 @@
+from __future__ import annotations
 import itertools
 import logging
 import re
 from enum import Enum
 from io import BytesIO
-from typing import Any, Generator, List, Tuple
+from typing import TYPE_CHECKING, Any, Generator, List, Tuple
+from langroid.exceptions import LangroidImportError
+try:
+    import fitz
+except ImportError:
+    if not TYPE_CHECKING:
+        fitz = None
+try:
+    import pypdf
+except ImportError:
+    if not TYPE_CHECKING:
+        pypdf = None
+try:
+    import pdfplumber
+except ImportError:
+    if not TYPE_CHECKING:
+        pdfplumber = None
-import fitz
-import pdfplumber
-import pypdf
 import requests
 from bs4 import BeautifulSoup
-from PIL import Image
+if TYPE_CHECKING:
+    from PIL import Image
 from langroid.mytypes import DocMetaData, Document
 from langroid.parsing.parser import Parser, ParsingConfig
@@ -363,19 +384,21 @@ class FitzPDFParser(DocumentParser):
     Parser for processing PDFs using the `fitz` library.
     """
-    def iterate_pages(self) -> Generator[Tuple[int, fitz.Page], None, None]:
+    def iterate_pages(self) -> Generator[Tuple[int, "fitz.Page"], None, None]:
         """
         Yield each page in the PDF using `fitz`.
         Returns:
             Generator[fitz.Page]: Generator yielding each page.
         """
+        if fitz is None:
+            raise LangroidImportError("fitz", "pdf-parsers")
         doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
         for i, page in enumerate(doc):
             yield i, page
         doc.close()
-    def extract_text_from_page(self, page: fitz.Page) -> str:
+    def extract_text_from_page(self, page: "fitz.Page") -> str:
         """
         Extract text from a given `fitz` page.
@@ -400,6 +423,8 @@ class PyPDFParser(DocumentParser):
         Returns:
             Generator[pypdf.pdf.PageObject]: Generator yielding each page.
         """
+        if pypdf is None:
+            raise LangroidImportError("pypdf", "pdf-parsers")
         reader = pypdf.PdfReader(self.doc_bytes)
         for i, page in enumerate(reader.pages):
             yield i, page
@@ -431,6 +456,8 @@ class PDFPlumberParser(DocumentParser):
         Returns:
             Generator[pdfplumber.Page]: Generator yielding each page.
         """
+        if pdfplumber is None:
+            raise LangroidImportError("pdfplumber", "pdf-parsers")
         with pdfplumber.open(self.doc_bytes) as pdf:
             for i, page in enumerate(pdf.pages):
                 yield i, page
@@ -456,7 +483,10 @@ class ImagePdfParser(DocumentParser):
     def iterate_pages(
         self,
     ) -> Generator[Tuple[int, "Image"], None, None]:  # type: ignore
-        from pdf2image import convert_from_bytes
+        try:
+            from pdf2image import convert_from_bytes
+        except ImportError:
+            raise LangroidImportError("pdf2image", "pdf-parsers")
         images = convert_from_bytes(self.doc_bytes.getvalue())
         for i, image in enumerate(images):
@@ -472,7 +502,10 @@ class ImagePdfParser(DocumentParser):
         Returns:
             str: Extracted text from the image.
         """
-        import pytesseract
+        try:
+            import pytesseract
+        except ImportError:
+            raise LangroidImportError("pytesseract", "pdf-parsers")
         text = pytesseract.image_to_string(page)
         return self.fix_text(text)
@@ -638,7 +671,10 @@ class PythonDocxParser(DocumentParser):
         In a DOCX file, pages are not explicitly defined,
         so we consider each paragraph as a separate 'page' for simplicity.
         """
-        import docx
+        try:
+            import docx
+        except ImportError:
+            raise LangroidImportError("python-docx", "docx")
         doc = docx.Document(self.doc_bytes)
         for i, para in enumerate(doc.paragraphs, start=1):

langroid/parsing/parser.pyi ADDED Viewed

@@ -0,0 +1,56 @@
+from enum import Enum
+from typing import Literal
+from _typeshed import Incomplete
+from pydantic import BaseSettings
+from langroid.mytypes import Document as Document
+from langroid.parsing.para_sentence_split import (
+    create_chunks as create_chunks,
+)
+from langroid.parsing.para_sentence_split import (
+    remove_extra_whitespace as remove_extra_whitespace,
+)
+logger: Incomplete
+class Splitter(str, Enum):
+    TOKENS: str
+    PARA_SENTENCE: str
+    SIMPLE: str
+class PdfParsingConfig(BaseSettings):
+    library: Literal["fitz", "pdfplumber", "pypdf", "unstructured", "pdf2image"]
+class DocxParsingConfig(BaseSettings):
+    library: Literal["python-docx", "unstructured"]
+class DocParsingConfig(BaseSettings):
+    library: Literal["unstructured"]
+class ParsingConfig(BaseSettings):
+    splitter: str
+    chunk_size: int
+    overlap: int
+    max_chunks: int
+    min_chunk_chars: int
+    discard_chunk_chars: int
+    n_similar_docs: int
+    n_neighbor_ids: int
+    separators: list[str]
+    token_encoding_model: str
+    pdf: PdfParsingConfig
+    docx: DocxParsingConfig
+    doc: DocParsingConfig
+class Parser:
+    config: Incomplete
+    tokenizer: Incomplete
+    def __init__(self, config: ParsingConfig) -> None: ...
+    def num_tokens(self, text: str) -> int: ...
+    def add_window_ids(self, chunks: list[Document]) -> None: ...
+    def split_simple(self, docs: list[Document]) -> list[Document]: ...
+    def split_para_sentence(self, docs: list[Document]) -> list[Document]: ...
+    def split_chunk_tokens(self, docs: list[Document]) -> list[Document]: ...
+    def chunk_tokens(self, text: str) -> list[str]: ...
+    def split(self, docs: list[Document]) -> list[Document]: ...

langroid/parsing/spider.py CHANGED Viewed

@@ -1,13 +1,18 @@
 from typing import List, Set, no_type_check
 from urllib.parse import urlparse
-from pydispatch import dispatcher
-from scrapy import signals
-from scrapy.crawler import CrawlerRunner
-from scrapy.http import Response
-from scrapy.linkextractors import LinkExtractor
-from scrapy.spiders import CrawlSpider, Rule
-from twisted.internet import defer, reactor
+from langroid.exceptions import LangroidImportError
+try:
+    from pydispatch import dispatcher
+    from scrapy import signals
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.http import Response
+    from scrapy.linkextractors import LinkExtractor
+    from scrapy.spiders import CrawlSpider, Rule
+    from twisted.internet import defer, reactor
+except ImportError:
+    raise LangroidImportError("scrapy", "scrapy")
 @no_type_check

langroid/utils/logging.py CHANGED Viewed

@@ -31,7 +31,11 @@ def setup_colored_logging() -> None:
     # logger.setLevel(logging.DEBUG)
-def setup_logger(name: str, level: int = logging.INFO) -> logging.Logger:
+def setup_logger(
+    name: str,
+    level: int = logging.INFO,
+    terminal: bool = False,
+) -> logging.Logger:
     """
     Set up a logger of module `name` at a desired level.
     Args:
@@ -42,7 +46,7 @@ def setup_logger(name: str, level: int = logging.INFO) -> logging.Logger:
     """
     logger = logging.getLogger(name)
     logger.setLevel(level)
-    if not logger.hasHandlers():
+    if not logger.hasHandlers() and terminal:
         handler = logging.StreamHandler()
         formatter = logging.Formatter(
             "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -73,7 +77,7 @@ def setup_file_logger(
 ) -> logging.Logger:
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     file_mode = "a" if append else "w"
-    logger = setup_logger(name)
+    logger = setup_logger(name, terminal=False)
     handler = logging.FileHandler(filename, mode=file_mode)
     handler.setLevel(logging.INFO)
     if log_format:

langroid/utils/output/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from . import printing
 from .printing import (
     shorten_text,
     print_long_text,
@@ -7,9 +6,9 @@ from .printing import (
     SuppressLoggerWarnings,
     PrintColored,
 )
 from .status import status
 __all__ = [
     "printing",
     "shorten_text",

langroid/utils/output/citations.py ADDED Viewed

@@ -0,0 +1,41 @@
+def extract_markdown_references(md_string: str) -> list[int]:
+    """
+    Extracts markdown references (e.g., [^1], [^2]) from a string and returns
+    them as a sorted list of integers.
+    Args:
+        md_string (str): The markdown string containing references.
+    Returns:
+        list[int]: A sorted list of unique integers from the markdown references.
+    """
+    import re
+    # Regex to find all occurrences of [^<number>]
+    matches = re.findall(r"\[\^(\d+)\]", md_string)
+    # Convert matches to integers, remove duplicates with set, and sort
+    return sorted(set(int(match) for match in matches))
+def format_footnote_text(content: str, width: int = 80) -> str:
+    """
+    Formats the content part of a footnote (i.e. not the first line that
+    appears right after the reference [^4])
+    It wraps the text so that no line is longer than the specified width and indents
+    lines as necessary for markdown footnotes.
+    Args:
+        content (str): The text of the footnote to be formatted.
+        width (int): Maximum width of the text lines.
+    Returns:
+        str: Properly formatted markdown footnote text.
+    """
+    import textwrap
+    # Wrap the text to the specified width
+    wrapped_lines = textwrap.wrap(content, width)
+    if len(wrapped_lines) == 0:
+        return ""
+    indent = "    "  # Indentation for markdown footnotes
+    return indent + ("\n" + indent).join(wrapped_lines)

langroid/utils/output/printing.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import sys
 from contextlib import contextmanager
-from typing import Any, Iterator, Optional
+from typing import Any, Iterator, Optional, Type
 from rich import print as rprint
 from rich.text import Text
@@ -89,6 +89,11 @@ class SuppressLoggerWarnings:
         # Set the logging level to 'ERROR' to suppress warnings
         self.logger.setLevel(logging.ERROR)
-    def __exit__(self, exc_type, exc_value, traceback) -> None:  # type: ignore
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_value: Optional[BaseException],
+        traceback: Any,
+    ) -> None:
         # Reset the logging level to its original value
         self.logger.setLevel(self.original_level)

langroid/vector_store/__init__.py CHANGED Viewed

@@ -1,40 +1,50 @@
 from . import base
 from . import qdrantdb
-from . import meilisearch
-from . import lancedb
 from .base import VectorStoreConfig, VectorStore
 from .qdrantdb import QdrantDBConfig, QdrantDB
-from .meilisearch import MeiliSearch, MeiliSearchConfig
-from .lancedb import LanceDB, LanceDBConfig
-has_chromadb = False
-try:
-    from . import chromadb
-    from .chromadb import ChromaDBConfig, ChromaDB
-    chromadb  # silence linters
-    ChromaDB
-    ChromaDBConfig
-    has_chromadb = True
-except ImportError:
-    pass
 __all__ = [
     "base",
     "VectorStore",
     "VectorStoreConfig",
     "qdrantdb",
-    "meilisearch",
-    "lancedb",
     "QdrantDBConfig",
     "QdrantDB",
-    "MeiliSearch",
-    "MeiliSearchConfig",
-    "LanceDB",
-    "LanceDBConfig",
 ]
-if has_chromadb:
+try:
+    from . import meilisearch
+    from .meilisearch import MeiliSearch, MeiliSearchConfig
+    meilisearch
+    MeiliSearch
+    MeiliSearchConfig
+    __all__.extend(["meilisearch", "MeiliSearch", "MeiliSearchConfig"])
+except ImportError:
+    pass
+try:
+    from . import lancedb
+    from .lancedb import LanceDB, LanceDBConfig
+    lancedb
+    LanceDB
+    LanceDBConfig
+    __all__.extend(["lancedb", "LanceDB", "LanceDBConfig"])
+except ImportError:
+    pass
+try:
+    from . import chromadb
+    from .chromadb import ChromaDBConfig, ChromaDB
+    chromadb  # silence linters
+    ChromaDB
+    ChromaDBConfig
     __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
+except ImportError:
+    pass

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langroid.embedding_models.base import (
     EmbeddingModelsConfig,
 )
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
+from langroid.exceptions import LangroidImportError
 from langroid.mytypes import DocMetaData, Document
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import print_long_text
@@ -29,14 +30,7 @@ class ChromaDB(VectorStore):
         try:
             import chromadb
         except ImportError:
-            raise ImportError(
-                """
-                ChromaDB is not installed by default with Langroid.
-                If you want to use it, please install it with the `chromadb` extra, e.g.
-                pip install "langroid[chromadb]"
-                or an equivalent command.
-                """
-            )
+            raise LangroidImportError("chromadb", "chromadb")
         self.config = config
         emb_model = EmbeddingModel.create(config.embedding)
         self.embedding_fn = emb_model.embedding_fn()

langroid 0.1.252__py3-none-any.whl → 0.1.254__py3-none-any.whl

langroid 0.1.252py3-none-any.whl → 0.1.254py3-none-any.whl