PyPI - langroid - Versions diffs - 0.36.0__py3-none-any.whl → 0.37.0__py3-none-any.whl - Mend

langroid 0.36.0py3-none-any.whl → 0.37.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

langroid/agent/special/doc_chat_agent.py +14 -10
langroid/embedding_models/models.py +2 -2
langroid/exceptions.py +16 -4
langroid/parsing/code_parser.py +1 -1
langroid/parsing/document_parser.py +167 -64
langroid/parsing/parser.py +11 -7
langroid/parsing/utils.py +2 -2
langroid/utils/output/citations.py +32 -12
langroid/vector_store/base.py +1 -1
langroid/vector_store/chromadb.py +12 -1
langroid/vector_store/qdrantdb.py +1 -1
langroid/vector_store/weaviatedb.py +5 -5
{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/METADATA +33 -16
{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/RECORD +16 -16
{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/WHEEL +0 -0
{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/licenses/LICENSE +0 -0

langroid/agent/special/doc_chat_agent.py CHANGED Viewed

@@ -15,6 +15,7 @@ pip install "langroid[hf-embeddings]"
 """
 import logging
+import textwrap
 from collections import OrderedDict
 from functools import cache
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, no_type_check
@@ -81,7 +82,7 @@ You will be given various passages from these documents, and asked to answer que
 about them, or summarize them into coherent answers.
 """
-CHUNK_ENRICHMENT_DELIMITER = "<##-##-##>"
+CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>"
 has_sentence_transformers = False
 try:
@@ -99,7 +100,7 @@ hf_embed_config = SentenceTransformerEmbeddingsConfig(
 oai_embed_config = OpenAIEmbeddingsConfig(
     model_type="openai",
-    model_name="text-embedding-ada-002",
+    model_name="text-embedding-3-small",
     dims=1536,
 )
@@ -188,8 +189,8 @@ class DocChatAgentConfig(ChatAgentConfig):
             # NOTE: PDF parsing is extremely challenging, and each library
             # has its own strengths and weaknesses.
             # Try one that works for your use case.
-            # or "unstructured", "pdfplumber", "fitz", "pypdf"
-            library="pdfplumber",
+            # or "unstructured", "fitz", "pymupdf4llm", "pypdf"
+            library="pymupdf4llm",
         ),
     )
@@ -810,9 +811,11 @@ class DocChatAgent(ChatAgent):
         return "\n".join(
             [
                 f"""
-                [{i+1}]
+                -----[EXTRACT #{i+1}]----------
                 {content}
                 {source}
+                -----END OF EXTRACT------------
                 """
                 for i, (content, source) in enumerate(zip(contents, sources))
             ]
@@ -949,12 +952,13 @@ class DocChatAgent(ChatAgent):
                     continue
                 # Combine original content with questions in a structured way
-                combined_content = f"""
-                {doc.content}
+                combined_content = textwrap.dedent(
+                    f"""\
+                {doc.content}
                 {enrichment_config.delimiter}
                 {enrichment}
-                """.strip()
+                """
+                )
                 new_doc = doc.copy(
                     update={
@@ -1440,7 +1444,7 @@ class DocChatAgent(ChatAgent):
         delimiter = self.config.chunk_enrichment_config.delimiter
         return [
             (
-                doc.copy(update={"content": doc.content.split(delimiter)[0].strip()})
+                doc.copy(update={"content": doc.content.split(delimiter)[0]})
                 if doc.content and getattr(doc.metadata, "has_enrichment", False)
                 else doc
             )

langroid/embedding_models/models.py CHANGED Viewed

@@ -18,7 +18,7 @@ AzureADTokenProvider = Callable[[], str]
 class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
     model_type: str = "openai"
-    model_name: str = "text-embedding-ada-002"
+    model_name: str = "text-embedding-3-large"
     api_key: str = ""
     api_base: Optional[str] = None
     organization: str = ""
@@ -28,7 +28,7 @@ class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
 class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
     model_type: str = "azure-openai"
-    model_name: str = "text-embedding-ada-002"
+    model_name: str = "text-embedding-3-large"
     api_key: str = ""
     api_base: str = ""
     deployment_name: Optional[str] = None

langroid/exceptions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import List, Optional
 class XMLException(Exception):
@@ -15,7 +15,7 @@ class LangroidImportError(ImportError):
     def __init__(
         self,
         package: Optional[str] = None,
-        extra: Optional[str] = None,
+        extra: Optional[str | List[str]] = None,
         error: str = "",
         *args: object,
     ) -> None:
@@ -33,9 +33,21 @@ class LangroidImportError(ImportError):
             error = f"{package} is not installed by default with Langroid.\n"
         if extra:
+            if isinstance(extra, list):
+                help_preamble = f"""
+                If you want to use it, please install langroid with one of these
+                extras: {', '.join(extra)}. The examples below use the first one,
+                i.e. {extra[0]}.
+                """
+                extra = extra[0]
+            else:
+                help_preamble = f"""
+                If you want to use it, please install langroid with the
+                `{extra}` extra.
+                """
             install_help = f"""
-                If you want to use it, please install langroid
-                with the `{extra}` extra, for example:
+                {help_preamble}
                 If you are using pip:
                 pip install "langroid[{extra}]"

langroid/parsing/code_parser.py CHANGED Viewed

@@ -65,7 +65,7 @@ class CodeParsingConfig(BaseSettings):
         "bash",
     ]
     chunk_size: int = 500  # tokens
-    token_encoding_model: str = "text-embedding-ada-002"
+    token_encoding_model: str = "text-embedding-3-small"
     n_similar_docs: int = 4

langroid/parsing/document_parser.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import itertools
 import logging
 import re
+import tempfile
 from enum import Enum
 from io import BytesIO
-from typing import TYPE_CHECKING, Any, Generator, List, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
 from langroid.exceptions import LangroidImportError
 from langroid.utils.object_registry import ObjectRegistry
@@ -15,18 +16,24 @@ try:
 except ImportError:
     if not TYPE_CHECKING:
         fitz = None
+try:
+    import pymupdf4llm
+except ImportError:
+    if not TYPE_CHECKING:
+        pymupdf4llm = None
 try:
-    import pypdf
+    import docling
 except ImportError:
     if not TYPE_CHECKING:
-        pypdf = None
+        docling = None
 try:
-    import pdfplumber
+    import pypdf
 except ImportError:
     if not TYPE_CHECKING:
-        pdfplumber = None
+        pypdf = None
 import requests
 from bs4 import BeautifulSoup
@@ -41,6 +48,7 @@ logger = logging.getLogger(__name__)
 class DocumentType(str, Enum):
+    # TODO add `md` (Markdown) and `html`
     PDF = "pdf"
     DOCX = "docx"
     DOC = "doc"
@@ -139,10 +147,12 @@ class DocumentParser(Parser):
         if inferred_doc_type == DocumentType.PDF:
             if config.pdf.library == "fitz":
                 return FitzPDFParser(source, config)
+            elif config.pdf.library == "pymupdf4llm":
+                return PyMuPDF4LLMParser(source, config)
+            elif config.pdf.library == "docling":
+                return DoclingParser(source, config)
             elif config.pdf.library == "pypdf":
                 return PyPDFParser(source, config)
-            elif config.pdf.library == "pdfplumber":
-                return PDFPlumberParser(source, config)
             elif config.pdf.library == "unstructured":
                 return UnstructuredPDFParser(source, config)
             elif config.pdf.library == "pdf2image":
@@ -307,8 +317,11 @@ class DocumentParser(Parser):
         """Yield each page in the PDF."""
         raise NotImplementedError
-    def extract_text_from_page(self, page: Any) -> str:
-        """Extract text from a given page."""
+    def get_document_from_page(self, page: Any) -> Document:
+        """
+        Get Langroid Document object (with possible metadata)
+        corresponding to a given page.
+        """
         raise NotImplementedError
     def fix_text(self, text: str) -> str:
@@ -335,7 +348,10 @@ class DocumentParser(Parser):
         """
         text = "".join(
-            [self.extract_text_from_page(page) for _, page in self.iterate_pages()]
+            [
+                self.get_document_from_page(page).content
+                for _, page in self.iterate_pages()
+            ]
         )
         return Document(content=text, metadata=DocMetaData(source=self.source))
@@ -359,7 +375,10 @@ class DocumentParser(Parser):
         common_id = ObjectRegistry.new_id()
         n_chunks = 0  # how many chunk so far
         for i, page in self.iterate_pages():
-            page_text = self.extract_text_from_page(page)
+            # not used but could be useful, esp to blend the
+            # metadata from the pages into the chunks
+            page_doc = self.get_document_from_page(page)
+            page_text = page_doc.content
             split += self.tokenizer.encode(page_text)
             pages.append(str(i + 1))
             # split could be so long it needs to be split
@@ -422,81 +441,152 @@ class FitzPDFParser(DocumentParser):
             yield i, page
         doc.close()
-    def extract_text_from_page(self, page: "fitz.Page") -> str:
+    def get_document_from_page(self, page: "fitz.Page") -> Document:
         """
-        Extract text from a given `fitz` page.
+        Get Document object from a given `fitz` page.
         Args:
             page (fitz.Page): The `fitz` page object.
         Returns:
-            str: Extracted text from the page.
+            Document: Document object, with content and possible metadata.
         """
-        return self.fix_text(page.get_text())
+        return Document(
+            content=self.fix_text(page.get_text()),
+            metadata=DocMetaData(source=self.source),
+        )
-class PyPDFParser(DocumentParser):
+class PyMuPDF4LLMParser(DocumentParser):
     """
-    Parser for processing PDFs using the `pypdf` library.
+    Parser for processing PDFs using the `pymupdf4llm` library.
     """
-    def iterate_pages(self) -> Generator[Tuple[int, pypdf.PageObject], None, None]:
+    def iterate_pages(self) -> Generator[Tuple[int, "fitz.Page"], None, None]:
         """
-        Yield each page in the PDF using `pypdf`.
+        Yield each page in the PDF using `fitz`.
         Returns:
-            Generator[pypdf.pdf.PageObject]: Generator yielding each page.
+            Generator[fitz.Page]: Generator yielding each page.
         """
-        if pypdf is None:
-            raise LangroidImportError("pypdf", "pdf-parsers")
-        reader = pypdf.PdfReader(self.doc_bytes)
-        for i, page in enumerate(reader.pages):
+        if fitz is None:
+            raise LangroidImportError(
+                "pymupdf4llm", ["pymupdf4llm", "all", "pdf-parsers", "doc-chat"]
+            )
+        doc: fitz.Document = fitz.open(stream=self.doc_bytes, filetype="pdf")
+        pages: List[Dict[str, Any]] = pymupdf4llm.to_markdown(doc, page_chunks=True)
+        for i, page in enumerate(pages):
             yield i, page
+        doc.close()
-    def extract_text_from_page(self, page: pypdf.PageObject) -> str:
+    def get_document_from_page(self, page: Dict[str, Any]) -> Document:
         """
-        Extract text from a given `pypdf` page.
+        Get Document object corresponding to a given "page-chunk"
+        dictionary, see:
+         https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/api.html
         Args:
-            page (pypdf.pdf.PageObject): The `pypdf` page object.
+            page (Dict[str,Any]): The "page-chunk" dictionary.
         Returns:
-            str: Extracted text from the page.
+            Document: Document object, with content and possible metadata.
         """
-        return self.fix_text(page.extract_text())
+        return Document(
+            content=self.fix_text(page.get("text", "")),
+            # TODO could possible use other metadata from page, see above link.
+            metadata=DocMetaData(source=self.source),
+        )
-class PDFPlumberParser(DocumentParser):
+class DoclingParser(DocumentParser):
     """
-    Parser for processing PDFs using the `pdfplumber` library.
+    Parser for processing PDFs using the `docling` library.
     """
-    def iterate_pages(
-        self,
-    ) -> (Generator)[Tuple[int, pdfplumber.pdf.Page], None, None]:  # type: ignore
+    def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
+        """
+        Yield each page in the PDF using `docling`.
+        Returns:
+            Generator[docling.Page]: Generator yielding each page.
         """
-        Yield each page in the PDF using `pdfplumber`.
+        if docling is None:
+            raise LangroidImportError(
+                "docling", ["docling", "pdf-parsers", "all", "doc-chat"]
+            )
+        from docling.datamodel.document import TextItem  # type: ignore
+        from docling.document_converter import (  # type: ignore
+            ConversionResult,
+            DocumentConverter,
+        )
+        converter = DocumentConverter()
+        file_path = self.source
+        if file_path == "bytes":
+            with tempfile.NamedTemporaryFile(delete=False) as tmp:
+                tmp.write(self.doc_bytes.getvalue())
+                file_path = tmp.name
+        result: ConversionResult = converter.convert(file_path)
+        doc = result.document
+        n_pages = doc.num_pages()  # type: ignore
+        for i in range(n_pages):
+            texts = [
+                item[0].text
+                for item in doc.iterate_items(page_no=i + 1)
+                if isinstance(item[0], TextItem)
+            ]
+            text = "\n".join(texts)
+            yield i, text
+    def get_document_from_page(self, page: str) -> Document:
+        """
+        Get Document object from a given `docling` "page" (actually a chunk).
+        Args:
+            page (docling.chunking.DocChunk): The `docling` chunk
         Returns:
-            Generator[pdfplumber.Page]: Generator yielding each page.
+            Document: Document object, with content and possible metadata.
         """
-        if pdfplumber is None:
-            raise LangroidImportError("pdfplumber", "pdf-parsers")
-        with pdfplumber.open(self.doc_bytes) as pdf:
-            for i, page in enumerate(pdf.pages):
-                yield i, page
+        return Document(
+            content=self.fix_text(page),
+            metadata=DocMetaData(source=self.source),
+        )
-    def extract_text_from_page(self, page: pdfplumber.pdf.Page) -> str:  # type: ignore
+class PyPDFParser(DocumentParser):
+    """
+    Parser for processing PDFs using the `pypdf` library.
+    """
+    def iterate_pages(self) -> Generator[Tuple[int, pypdf.PageObject], None, None]:
         """
-        Extract text from a given `pdfplumber` page.
+        Yield each page in the PDF using `pypdf`.
+        Returns:
+            Generator[pypdf.pdf.PageObject]: Generator yielding each page.
+        """
+        if pypdf is None:
+            raise LangroidImportError("pypdf", "pdf-parsers")
+        reader = pypdf.PdfReader(self.doc_bytes)
+        for i, page in enumerate(reader.pages):
+            yield i, page
+    def get_document_from_page(self, page: pypdf.PageObject) -> Document:
+        """
+        Get Document object from a given `pypdf` page.
         Args:
-            page (pdfplumber.Page): The `pdfplumber` page object.
+            page (pypdf.pdf.PageObject): The `pypdf` page object.
         Returns:
-            str: Extracted text from the page.
+            Document: Document object, with content and possible metadata.
         """
-        return self.fix_text(page.extract_text())
+        return Document(
+            content=self.fix_text(page.extract_text()),
+            metadata=DocMetaData(source=self.source),
+        )
 class ImagePdfParser(DocumentParser):
@@ -516,15 +606,15 @@ class ImagePdfParser(DocumentParser):
         for i, image in enumerate(images):
             yield i, image
-    def extract_text_from_page(self, page: "Image") -> str:  # type: ignore
+    def get_document_from_page(self, page: "Image") -> Document:  # type: ignore
         """
-        Extract text from a given `pdf2image` page.
+        Get Document object corresponding to a given `pdf2image` page.
         Args:
             page (Image): The PIL Image object.
         Returns:
-            str: Extracted text from the image.
+            Document: Document object, with content and possible metadata.
         """
         try:
             import pytesseract
@@ -532,7 +622,10 @@ class ImagePdfParser(DocumentParser):
             raise LangroidImportError("pytesseract", "pdf-parsers")
         text = pytesseract.image_to_string(page)
-        return self.fix_text(text)
+        return Document(
+            content=self.fix_text(text),
+            metadata=DocMetaData(source=self.source),
+        )
 class UnstructuredPDFParser(DocumentParser):
@@ -564,8 +657,8 @@ class UnstructuredPDFParser(DocumentParser):
                 The `unstructured` library failed to parse the pdf.
                 Please try a different library by setting the `library` field
                 in the `pdf` section of the `parsing` field in the config file.
-                Supported libraries are:
-                fitz, pypdf, pdfplumber, unstructured
+                Other supported libraries are:
+                fitz, pymupdf4llm, pypdf
                 """
             )
@@ -584,18 +677,21 @@ class UnstructuredPDFParser(DocumentParser):
         if page_elements:
             yield page_number, page_elements
-    def extract_text_from_page(self, page: Any) -> str:
+    def get_document_from_page(self, page: Any) -> Document:
         """
-        Extract text from a given `unstructured` element.
+        Get Document object from a given `unstructured` element.
         Args:
             page (unstructured element): The `unstructured` element object.
         Returns:
-            str: Extracted text from the element.
+            Document: Document object, with content and possible metadata.
         """
         text = " ".join(el.text for el in page)
-        return self.fix_text(text)
+        return Document(
+            content=self.fix_text(text),
+            metadata=DocMetaData(source=self.source),
+        )
 class UnstructuredDocxParser(DocumentParser):
@@ -632,9 +728,9 @@ class UnstructuredDocxParser(DocumentParser):
         if page_elements:
             yield page_number, page_elements
-    def extract_text_from_page(self, page: Any) -> str:
+    def get_document_from_page(self, page: Any) -> Document:
         """
-        Extract text from a given `unstructured` element.
+        Get Document object from a given `unstructured` element.
         Note:
             The concept of "pages" doesn't actually exist in the .docx file format in
@@ -647,10 +743,13 @@ class UnstructuredDocxParser(DocumentParser):
             page (unstructured element): The `unstructured` element object.
         Returns:
-            str: Extracted text from the element.
+            Document object, with content and possible metadata.
         """
         text = " ".join(el.text for el in page)
-        return self.fix_text(text)
+        return Document(
+            content=self.fix_text(text),
+            metadata=DocMetaData(source=self.source),
+        )
 class UnstructuredDocParser(UnstructuredDocxParser):
@@ -704,15 +803,19 @@ class PythonDocxParser(DocumentParser):
         for i, para in enumerate(doc.paragraphs, start=1):
             yield i, [para]
-    def extract_text_from_page(self, page: Any) -> str:
+    def get_document_from_page(self, page: Any) -> Document:
         """
-        Extract text from a given 'page', which in this case is a single paragraph.
+        Get Document object from a given 'page', which in this case is a single
+        paragraph.
         Args:
             page (list): A list containing a single Paragraph object.
         Returns:
-            str: Extracted text from the paragraph.
+            Document: Document object, with content and possible metadata.
         """
         paragraph = page[0]
-        return self.fix_text(paragraph.text)
+        return Document(
+            content=self.fix_text(paragraph.text),
+            metadata=DocMetaData(source=self.source),
+        )

langroid/parsing/parser.py CHANGED Viewed

@@ -23,11 +23,12 @@ class Splitter(str, Enum):
 class PdfParsingConfig(BaseSettings):
     library: Literal[
         "fitz",
-        "pdfplumber",
+        "pymupdf4llm",
+        "docling",
         "pypdf",
         "unstructured",
         "pdf2image",
-    ] = "pdfplumber"
+    ] = "pymupdf4llm"
 class DocxParsingConfig(BaseSettings):
@@ -40,6 +41,7 @@ class DocParsingConfig(BaseSettings):
 class ParsingConfig(BaseSettings):
     splitter: str = Splitter.TOKENS
+    chunk_by_page: bool = False  # split by page?
     chunk_size: int = 200  # aim for this many tokens per chunk
     overlap: int = 50  # overlap between chunks
     max_chunks: int = 10_000
@@ -49,7 +51,7 @@ class ParsingConfig(BaseSettings):
     n_similar_docs: int = 4
     n_neighbor_ids: int = 5  # window size to store around each chunk
     separators: List[str] = ["\n\n", "\n", " ", ""]
-    token_encoding_model: str = "text-embedding-ada-002"
+    token_encoding_model: str = "text-embedding-3-large"
     pdf: PdfParsingConfig = PdfParsingConfig()
     docx: DocxParsingConfig = DocxParsingConfig()
     doc: DocParsingConfig = DocParsingConfig()
@@ -61,7 +63,7 @@ class Parser:
         try:
             self.tokenizer = tiktoken.encoding_for_model(config.token_encoding_model)
         except Exception:
-            self.tokenizer = tiktoken.encoding_for_model("text-embedding-ada-002")
+            self.tokenizer = tiktoken.encoding_for_model("text-embedding-3-small")
     def num_tokens(self, text: str) -> int:
         tokens = self.tokenizer.encode(text)
@@ -267,9 +269,11 @@ class Parser:
                 # Truncate the chunk text at the punctuation mark
                 chunk_text = chunk_text[: last_punctuation + 1]
-            # Remove any newline characters and strip any leading or
-            # trailing whitespace
-            chunk_text_to_append = re.sub(r"\n{2,}", "\n", chunk_text).strip()
+            # Replace redundant (3 or more) newlines with 2 newlines to preser
+            # paragraph separation!
+            # But do NOT strip leading/trailing whitespace, to preserve formatting
+            # (e.g. code blocks, or in case we want to stitch chunks back together)
+            chunk_text_to_append = re.sub(r"\n{3,}", "\n\n", chunk_text)
             if len(chunk_text_to_append) > self.config.discard_chunk_chars:
                 # Append the chunk text to the list of chunks

langroid/parsing/utils.py CHANGED Viewed

@@ -310,9 +310,9 @@ def extract_numbered_segments(s: str, specs: str) -> str:
         ]
         # If we extracted any segments from this paragraph,
-        # join them and append to results
+        # join them with ellipsis (...) and append to results.
         if extracted_segments:
-            extracted_paragraphs.append(" ".join(extracted_segments))
+            extracted_paragraphs.append("...".join(extracted_segments))
     return "\n\n".join(extracted_paragraphs)

langroid/utils/output/citations.py CHANGED Viewed

@@ -17,25 +17,45 @@ def extract_markdown_references(md_string: str) -> list[int]:
     return sorted(set(int(match) for match in matches))
-def format_footnote_text(content: str, width: int = 80) -> str:
+def format_footnote_text(content: str, width: int = 0) -> str:
     """
-    Formats the content part of a footnote (i.e. not the first line that
-    appears right after the reference [^4])
-    It wraps the text so that no line is longer than the specified width and indents
-    lines as necessary for markdown footnotes.
+    Formats the content so that each original line is individually processed.
+    - If width=0, no wrapping is done (lines remain as is).
+    - If width>0, lines are wrapped to that width.
+    - Blank lines remain blank (with indentation).
+    - Everything is indented by 4 spaces (for markdown footnotes).
     Args:
         content (str): The text of the footnote to be formatted.
-        width (int): Maximum width of the text lines.
+        width (int): Maximum width of the text lines. If 0, lines are not wrapped.
     Returns:
         str: Properly formatted markdown footnote text.
     """
     import textwrap
-    # Wrap the text to the specified width
-    wrapped_lines = textwrap.wrap(content, width)
-    if len(wrapped_lines) == 0:
-        return ""
-    indent = "    "  # Indentation for markdown footnotes
-    return indent + ("\n" + indent).join(wrapped_lines)
+    indent = "    "  # 4 spaces for markdown footnotes
+    lines = content.split("\n")  # keep original line structure
+    output_lines = []
+    for line in lines:
+        # If the line is empty (or just spaces), keep it blank (but indented)
+        if not line.strip():
+            output_lines.append(indent)
+            continue
+        if width > 0:
+            # Wrap each non-empty line to the specified width
+            wrapped = textwrap.wrap(line, width=width)
+            if not wrapped:
+                # If textwrap gives nothing, add a blank (indented) line
+                output_lines.append(indent)
+            else:
+                for subline in wrapped:
+                    output_lines.append(indent + subline)
+        else:
+            # No wrapping: just indent the original line
+            output_lines.append(indent + line)
+    # Join them with newline so we preserve the paragraph/blank line structure
+    return "\n".join(output_lines)

langroid/vector_store/base.py CHANGED Viewed

@@ -264,7 +264,7 @@ class VectorStore(ABC):
             metadata = copy.deepcopy(id2metadata[w[0]])
             metadata.window_ids = w
             document = Document(
-                content=" ".join([d.content for d in self.get_documents_by_ids(w)]),
+                content="".join([d.content for d in self.get_documents_by_ids(w)]),
                 metadata=metadata,
             )
             # make a fresh id since content is in general different

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import logging
-from typing import Any, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
 from langroid.embedding_models.base import (
     EmbeddingModelsConfig,
@@ -18,6 +18,10 @@ logger = logging.getLogger(__name__)
 class ChromaDBConfig(VectorStoreConfig):
     collection_name: str = "temp"
     storage_path: str = ".chroma/data"
+    distance: Literal["cosine", "l2", "ip"] = "cosine"
+    construction_ef: int = 100
+    search_ef: int = 100
+    max_neighbors: int = 16
     embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
     host: str = "127.0.0.1"
     port: int = 6333
@@ -109,6 +113,13 @@ class ChromaDB(VectorStore):
             name=self.config.collection_name,
             embedding_function=self.embedding_fn,
             get_or_create=not replace,
+            metadata={
+                "hnsw:space": self.config.distance,
+                "hnsw:construction_ef": self.config.construction_ef,
+                "hnsw:search_ef": self.config.search_ef,
+                # we could expose other configs, see:
+                # https://docs.trychroma.com/docs/collections/configure
+            },
         )
     def add_documents(self, documents: Sequence[Document]) -> None:

langroid/vector_store/qdrantdb.py CHANGED Viewed

@@ -78,7 +78,7 @@ class QdrantDB(VectorStore):
         super().__init__(config)
         self.config: QdrantDBConfig = config
         self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
-        self.embedding_dim = self.embedding_model.embedding_dims
+        self.embedding_dim = len(self.embedding_fn(["test"])[0])
         if self.config.use_sparse_embeddings:
             try:
                 from transformers import AutoModelForMaskedLM, AutoTokenizer

langroid/vector_store/weaviatedb.py CHANGED Viewed

@@ -43,8 +43,8 @@ class WeaviateDB(VectorStore):
         load_dotenv()
         key = os.getenv("WEAVIATE_API_KEY")
         url = os.getenv("WEAVIATE_API_URL")
-        if None in [key, url]:
-            logger.warning(
+        if url is None or key is None:
+            raise ValueError(
                 """WEAVIATE_API_KEY, WEAVIATE_API_URL env variable must be set to use
                 WeaviateDB in cloud mode. Please set these values
                 in your .env file.
@@ -130,9 +130,9 @@ class WeaviateDB(VectorStore):
         vector_index_config = Configure.VectorIndex.hnsw(
             distance_metric=VectorDistances.COSINE,
         )
-        if self.config.embedding == OpenAIEmbeddingsConfig:
+        if isinstance(self.config.embedding, OpenAIEmbeddingsConfig):
             vectorizer_config = Configure.Vectorizer.text2vec_openai(
-                model=self.embedding_model
+                model=self.config.embedding.model_name,
             )
         else:
             vectorizer_config = None
@@ -212,7 +212,7 @@ class WeaviateDB(VectorStore):
             return_metadata=MetadataQuery(distance=True),
         )
         return [
-            (self.weaviate_obj_to_doc(item), 1 - item.metadata.distance)
+            (self.weaviate_obj_to_doc(item), 1 - (item.metadata.distance or 1))
             for item in response.objects
         ]

{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.36.0
+Version: 0.37.0
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT
@@ -12,6 +12,7 @@ Requires-Dist: async-generator<2.0,>=1.10
 Requires-Dist: bs4<1.0.0,>=0.0.1
 Requires-Dist: cerebras-cloud-sdk<2.0.0,>=1.1.0
 Requires-Dist: colorlog<7.0.0,>=6.7.0
+Requires-Dist: docling<3.0.0,>=2.16.0
 Requires-Dist: docstring-parser<1.0,>=0.16
 Requires-Dist: duckduckgo-search<7.0.0,>=6.0.0
 Requires-Dist: faker<19.0.0,>=18.9.0
@@ -32,9 +33,10 @@ Requires-Dist: onnxruntime<2.0.0,>=1.16.1
 Requires-Dist: openai<2.0.0,>=1.45.0
 Requires-Dist: pandas<3.0.0,>=2.0.3
 Requires-Dist: prettytable<4.0.0,>=3.8.0
-Requires-Dist: pydantic<2.10.2,>=1
+Requires-Dist: pydantic<3.0.0,>=1
 Requires-Dist: pygithub<2.0.0,>=1.58.1
 Requires-Dist: pygments<3.0.0,>=2.15.1
+Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17
 Requires-Dist: pyparsing<4.0.0,>=3.0.9
 Requires-Dist: pytest-rerunfailures<16.0,>=15.0
 Requires-Dist: python-dotenv<2.0.0,>=1.0.0
@@ -55,14 +57,15 @@ Provides-Extra: all
 Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'all'
 Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == 'all'
 Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'all'
+Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'all'
 Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'all'
-Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'all'
+Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'all'
 Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'all'
 Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == 'all'
 Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'all'
 Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'all'
-Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'all'
 Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'all'
+Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'all'
 Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'all'
 Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'all'
 Requires-Dist: pypdf>=5.1.0; extra == 'all'
@@ -74,7 +77,7 @@ Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'all'
 Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
 Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
-Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'all'
+Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'all'
 Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
 Provides-Extra: arango
 Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
@@ -89,13 +92,16 @@ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'db'
 Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'db'
 Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'db'
 Provides-Extra: doc-chat
+Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'doc-chat'
 Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'doc-chat'
-Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'doc-chat'
+Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'doc-chat'
 Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'doc-chat'
 Requires-Dist: pypdf>=5.1.0; extra == 'doc-chat'
 Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'doc-chat'
 Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
-Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'doc-chat'
+Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'doc-chat'
+Provides-Extra: docling
+Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
 Provides-Extra: docx
 Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
 Provides-Extra: fastembed
@@ -104,7 +110,7 @@ Provides-Extra: hf-embeddings
 Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-embeddings'
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-embeddings'
 Provides-Extra: hf-transformers
-Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'hf-transformers'
+Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'hf-transformers'
 Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-transformers'
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-transformers'
 Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'hf-transformers'
@@ -125,13 +131,16 @@ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'mysql'
 Provides-Extra: neo4j
 Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'neo4j'
 Provides-Extra: pdf-parsers
+Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'pdf-parsers'
 Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'pdf-parsers'
-Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'pdf-parsers'
+Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
 Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
 Requires-Dist: pypdf>=5.1.0; extra == 'pdf-parsers'
 Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'pdf-parsers'
 Provides-Extra: postgres
 Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'postgres'
+Provides-Extra: pymupdf4llm
+Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pymupdf4llm'
 Provides-Extra: scrapy
 Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == 'scrapy'
 Provides-Extra: sql
@@ -139,11 +148,11 @@ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'sql'
 Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'sql'
 Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'sql'
 Provides-Extra: transformers
-Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'transformers'
+Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'transformers'
 Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'transformers'
 Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'transformers'
 Provides-Extra: unstructured
-Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'unstructured'
+Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'unstructured'
 Provides-Extra: vecdbs
 Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
 Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
@@ -292,20 +301,28 @@ teacher_task.run()
 <summary> <b>Click to expand</b></summary>
 - **Jan 2025:**
-  - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv!
+  - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
+  - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
+    Reasoning LLMs (e.g. DeepSeek, OpenAI o1) in addition to final answer.
+  - [0.34.0](https://github.com/langroid/langroid/releases/tag/0.34.0): DocChatAgent
+    chunk enrichment to improve retrieval. (collaboration with @dfm88).
+  - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv! (thanks @abab-dev).
   - [0.32.0](https://github.com/langroid/langroid/releases/tag/0.32.0) DeepSeek v3 support.
 - **Dec 2024:**
   - [0.31.0](https://github.com/langroid/langroid/releases/tag/0.31.0) Azure OpenAI Embeddings
-  - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
-  - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
+  - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings (thanks @Kwigg).
+  - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client (thanks
+    @johannestang).
   - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
-default handler method name in `request` field.
+default handler method name in `request` field (thanks @alexagr).
   - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
   - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
-  - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
+  - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and
+    user-response (thanks @alexagr).
 - **Nov 2024:**
   - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
      Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.
+    (thanks @nilspalumbo).
   - **[0.23.0](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat)**:
       support for LLMs (e.g. `Qwen2.5-Coder-32b-Instruct`) hosted on glhf.chat
   - **[0.22.0](https://langroid.github.io/langroid/notes/large-tool-results/)**:

{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
-langroid/exceptions.py,sha256=gp6ku4ZLdXXCUQIwUNVFojJNGTzGnkevi2PLvG7HOhc,2555
+langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
 langroid/mytypes.py,sha256=h1eMq1ZwTLVezObPfCseWNWbEOzP7mAKu2XoS63W1cM,2647
 langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
@@ -14,7 +14,7 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
 langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/callbacks/chainlit.py,sha256=RH8qUXaZE5o2WQz3WJQ1SdFtASGlxWCA6_HYz_3meDQ,20822
 langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
-langroid/agent/special/doc_chat_agent.py,sha256=8OYJ7IRepdQ9GpoTX_dhw0NkENmTe9iQsJAN6JI-09c,64670
+langroid/agent/special/doc_chat_agent.py,sha256=qoXp6PKI7oAQs8rgj934NzZaEEKsPICcgYl_iQY0bac,64818
 langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
 langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
 langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
@@ -57,7 +57,7 @@ langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEc
 langroid/cachedb/redis_cachedb.py,sha256=7kgnbf4b5CKsCrlL97mHWKvdvlLt8zgn7lc528jEpiE,5141
 langroid/embedding_models/__init__.py,sha256=XhVIMQJbQRpImcnhA9sJR7h6r7QgPo1SKDCvwEUD9j4,851
 langroid/embedding_models/base.py,sha256=DUhvzALoW2UMbtmLxP4eJTfPii99WjUNX7bwFpj_K-0,2395
-langroid/embedding_models/models.py,sha256=sW6baTvFSeZBZ5w-Kd9Vgo93gokesJ3aHP4x9htoF2E,16776
+langroid/embedding_models/models.py,sha256=YppD52U1lbeygt8_SuPNi6piOV_FgBltZWH5e3l7iso,16776
 langroid/embedding_models/remote_embeds.py,sha256=6_kjXByVbqhY9cGwl9R83ZcYC2km-nGieNNAo1McHaY,5151
 langroid/embedding_models/protoc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/embedding_models/protoc/embeddings.proto,sha256=_O-SgFpTaylQeOTgSpxhEJ7CUw7PeCQQJLaPqpPYKJg,321
@@ -77,11 +77,11 @@ langroid/language_models/prompt_formatter/hf_formatter.py,sha256=PVJppmjRvD-2DF-
 langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeuMENVIVvVqSYuEpvYSTndUe_jd6hVTko4,2899
 langroid/parsing/__init__.py,sha256=ZgSAfgTC6VsTLFlRSWT-TwYco7SQeRMeZG-49MnKYGY,936
 langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
-langroid/parsing/code_parser.py,sha256=AOxb3xbYpTBPP3goOm5dKfJdh5hS_2BhLVCEkifWZN8,3796
-langroid/parsing/document_parser.py,sha256=9xUOyrVNBAS9cpCvCptr2XK4Kq47W574i8zzGEoXc3c,24933
+langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
+langroid/parsing/document_parser.py,sha256=1DjkoiieuPxlPtX-3FGzr3frDSKOjfKM4PhaKbVNQ1c,28570
 langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
 langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
-langroid/parsing/parser.py,sha256=N0jr1Zl_f_rx-8YMmSQftPHquqSQfec-3s7JAhhEe6I,12032
+langroid/parsing/parser.py,sha256=WDv4QnNtAcLSiPe6cPhHOa-aMhrt3OV-kKnVXdgwtmI,12276
 langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
 langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
 langroid/parsing/search.py,sha256=0i_r0ESb5HEQfagA2g7_uMQyxYPADWVbdcN9ixZhS4E,8992
@@ -89,7 +89,7 @@ langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,32
 langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
 langroid/parsing/url_loader.py,sha256=JK48KktLRDBfjrt4nsUfy92M6yGdEeicAqOum2MdULM,4656
 langroid/parsing/urls.py,sha256=XjpaV5onG7gKQ5iQeFTzHSw5P08Aqw0g-rMUu61lR6s,7988
-langroid/parsing/utils.py,sha256=kb9DlHaG1iQB-6JagH1C26SdCNNf8U-2XaXia4_dWCw,12726
+langroid/parsing/utils.py,sha256=YrV2GNL4EOBGknA4AClPGdJ4S5B31radrt-Ou8OAKoU,12749
 langroid/parsing/web_search.py,sha256=8rW8EI3tyHITaB2l9MT_6yLMeQfo8y-Ih-8N2v2uMpk,4931
 langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
 langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
@@ -111,18 +111,18 @@ langroid/utils/types.py,sha256=4GrOnU3HLWh-UwaUPp7LlB3V413q3K5OSzc0ggDoQ6A,2510
 langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
 langroid/utils/algorithms/graph.py,sha256=JbdpPnUOhw4-D6O7ou101JLA3xPCD0Lr3qaPoFCaRfo,2866
 langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87sbBPc,340
-langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMsVkXtc,1425
+langroid/utils/output/citations.py,sha256=mQhRXVN-uhmKd2z32UZQBE0adZGEaQJ7cVXLfkrcZJI,2221
 langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
 langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
 langroid/vector_store/__init__.py,sha256=BcoOm1tG3y0EqjkIGmMOHkY9iTUhDHgyruknWDKgqIg,1214
-langroid/vector_store/base.py,sha256=c9slwOcSWCG0SFGDuPLAQF9vBLDb4Eg8uaUol27Jf9c,14209
-langroid/vector_store/chromadb.py,sha256=9WXW9IoSnhOmGEtMruVhEtVWL_VO6NXnPIz-nzh0gIQ,8235
+langroid/vector_store/base.py,sha256=suBanIt0iKEgnMnGdQOyWS58guG20Jyy-GK4DMMuYL0,14208
+langroid/vector_store/chromadb.py,sha256=XkpW7pnSf6Lk7Nf1BEIw-zjYGYchoWHgrhnJX7YmxD8,8725
 langroid/vector_store/lancedb.py,sha256=b3_vWkTjG8mweZ7ZNlUD-NjmQP_rLBZfyKWcxt2vosA,14855
 langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
 langroid/vector_store/momento.py,sha256=UNHGT6jXuQtqY9f6MdqGU14bVnS0zHgIJUa30ULpUJo,10474
-langroid/vector_store/qdrantdb.py,sha256=HRLCt-FG8y4718omwpFaQZnWeYxPj0XCwS4tjokI1sU,18116
-langroid/vector_store/weaviatedb.py,sha256=Jxe-cp2PyZdQ9NQVNZJ-CnsYsNxgUBdfAOoLZQEN650,10602
-langroid-0.36.0.dist-info/METADATA,sha256=aDLzYdeo80UbUZB0oEv9Rc1WMgWvG_jQtrBZnI2y5Bg,59508
-langroid-0.36.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.36.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.36.0.dist-info/RECORD,,
+langroid/vector_store/qdrantdb.py,sha256=Cen6f-y6witiR53UQ-5a605Reo0gTj3ygXpE_ehYoZo,18116
+langroid/vector_store/weaviatedb.py,sha256=C6jd1Twl5_jux3JYyrcTfQb63Lk9HuiUzVF4NahXuGo,10642
+langroid-0.37.0.dist-info/METADATA,sha256=hlweiAhkVzVb_sVOPF-adwqwDPpAUUsgE1wJFRYNnKg,60524
+langroid-0.37.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.37.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.37.0.dist-info/RECORD,,

{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.36.0.dist-info → langroid-0.37.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.36.0__py3-none-any.whl → 0.37.0__py3-none-any.whl

langroid 0.36.0py3-none-any.whl → 0.37.0py3-none-any.whl