PyPI - langroid - Versions diffs - 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

langroid/__init__.py +95 -0
langroid/agent/__init__.py +40 -0
langroid/agent/base.py +222 -91
langroid/agent/batch.py +264 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +247 -101
langroid/agent/chat_document.py +41 -4
langroid/agent/openai_assistant.py +842 -0
langroid/agent/special/__init__.py +50 -0
langroid/agent/special/doc_chat_agent.py +837 -141
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +127 -0
langroid/agent/special/retriever_agent.py +32 -198
langroid/agent/special/sql/__init__.py +11 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +22 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +475 -122
langroid/agent/tool_message.py +75 -13
langroid/agent/tools/__init__.py +13 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +16 -29
langroid/agent/tools/run_python_code.py +60 -0
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/agent/tools/segment_extract_tool.py +36 -0
langroid/cachedb/__init__.py +9 -0
langroid/cachedb/base.py +22 -2
langroid/cachedb/momento_cachedb.py +26 -2
langroid/cachedb/redis_cachedb.py +78 -11
langroid/embedding_models/__init__.py +34 -0
langroid/embedding_models/base.py +21 -2
langroid/embedding_models/models.py +120 -18
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +45 -0
langroid/language_models/azure_openai.py +80 -27
langroid/language_models/base.py +117 -12
langroid/language_models/config.py +5 -0
langroid/language_models/openai_assistants.py +3 -0
langroid/language_models/openai_gpt.py +558 -174
langroid/language_models/prompt_formatter/__init__.py +15 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +18 -21
langroid/mytypes.py +25 -8
langroid/parsing/__init__.py +46 -0
langroid/parsing/document_parser.py +260 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +122 -59
langroid/parsing/repo_loader.py +114 -52
langroid/parsing/search.py +68 -63
langroid/parsing/spider.py +3 -2
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -11
langroid/parsing/urls.py +85 -37
langroid/parsing/utils.py +298 -4
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +11 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +17 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/algorithms/graph.py +103 -0
langroid/utils/configuration.py +36 -5
langroid/utils/constants.py +4 -0
langroid/utils/globals.py +2 -2
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +21 -0
langroid/utils/output/printing.py +47 -1
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +616 -2
langroid/utils/system.py +98 -0
langroid/vector_store/__init__.py +40 -0
langroid/vector_store/base.py +203 -6
langroid/vector_store/chromadb.py +59 -32
langroid/vector_store/lancedb.py +463 -0
langroid/vector_store/meilisearch.py +10 -7
langroid/vector_store/momento.py +262 -0
langroid/vector_store/qdrantdb.py +104 -22
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
langroid-0.1.219.dist-info/RECORD +127 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.85.dist-info/RECORD +0 -94
/langroid/{scripts → agent/callbacks}/__init__.py +0 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0

langroid/agent/special/doc_chat_agent.py CHANGED Viewed

@@ -12,20 +12,30 @@ langroid with the [hf-embeddings] extra, e.g.:
 pip install "langroid[hf-embeddings]"
 """
 import logging
 from contextlib import ExitStack
-from typing import List, Optional, Tuple, no_type_check
+from functools import cache
+from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
-from rich import print
-from rich.console import Console
+import nest_asyncio
+import numpy as np
+import pandas as pd
+from rich.prompt import Prompt
-from langroid.agent.base import Agent
+from langroid.agent.batch import run_batch_tasks
 from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
 from langroid.agent.chat_document import ChatDocMetaData, ChatDocument
+from langroid.agent.special.relevance_extractor_agent import (
+    RelevanceExtractorAgent,
+    RelevanceExtractorAgentConfig,
+)
+from langroid.agent.task import Task
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
 from langroid.language_models.base import StreamingIfAllowed
 from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
 from langroid.mytypes import DocMetaData, Document, Entity
+from langroid.parsing.document_parser import DocumentType
 from langroid.parsing.parser import Parser, ParsingConfig, PdfParsingConfig, Splitter
 from langroid.parsing.repo_loader import RepoLoader
 from langroid.parsing.search import (
@@ -33,20 +43,26 @@ from langroid.parsing.search import (
     find_fuzzy_matches_in_docs,
     preprocess_text,
 )
+from langroid.parsing.table_loader import describe_dataframe
 from langroid.parsing.url_loader import URLLoader
-from langroid.parsing.urls import get_urls_and_paths
+from langroid.parsing.urls import get_list_from_user, get_urls_paths_bytes_indices
 from langroid.parsing.utils import batched
 from langroid.prompts.prompts_config import PromptsConfig
 from langroid.prompts.templates import SUMMARY_ANSWER_PROMPT_GPT4
 from langroid.utils.configuration import settings
 from langroid.utils.constants import NO_ANSWER
-from langroid.utils.output.printing import show_if_debug
-from langroid.vector_store.base import VectorStoreConfig
-from langroid.vector_store.qdrantdb import QdrantDBConfig
+from langroid.utils.output import show_if_debug, status
+from langroid.utils.pydantic_utils import dataframe_to_documents, extract_fields
+from langroid.vector_store.base import VectorStore, VectorStoreConfig
+from langroid.vector_store.lancedb import LanceDBConfig
-logger = logging.getLogger(__name__)
-console = Console()
+@cache
+def apply_nest_asyncio() -> None:
+    nest_asyncio.apply()
+logger = logging.getLogger(__name__)
 DEFAULT_DOC_CHAT_INSTRUCTIONS = """
 Your task is to answer questions about various documents.
@@ -58,25 +74,29 @@ DEFAULT_DOC_CHAT_SYSTEM_MESSAGE = """
 You are a helpful assistant, helping me understand a collection of documents.
 """
+has_sentence_transformers = False
+try:
+    from sentence_transformer import SentenceTransformer  # noqa: F401
-class DocChatAgentConfig(ChatAgentConfig):
-    """
-    Attributes:
-        max_context_tokens (int): threshold to use for various steps, e.g.
-            if we are able to fit the current stage of doc processing into
-            this many tokens, we skip additional compression steps, and
-            use the current docs as-is in the context
-        conversation_mode (bool): if True, we will accumulate message history,
-            and pass entire history to LLM at each round.
-            If False, each request to LLM will consist only of the
-            initial task messages plus the current query.
-    """
+    has_sentence_transformers = True
+except ImportError:
+    pass
+class DocChatAgentConfig(ChatAgentConfig):
     system_message: str = DEFAULT_DOC_CHAT_SYSTEM_MESSAGE
     user_message: str = DEFAULT_DOC_CHAT_INSTRUCTIONS
     summarize_prompt: str = SUMMARY_ANSWER_PROMPT_GPT4
-    max_context_tokens: int = 1000
-    conversation_mode: bool = True
+    # extra fields to include in content as key=value pairs
+    # (helps retrieval for table-like data)
+    add_fields_to_content: List[str] = []
+    filter_fields: List[str] = []  # fields usable in filter
+    retrieve_only: bool = False  # only retr relevant extracts, don't gen summary answer
+    extraction_granularity: int = 1  # granularity (in sentences) for relev extraction
+    filter: str | None = (
+        None  # filter condition for various lexical/semantic search fns
+    )
+    conversation_mode: bool = True  # accumulate message history?
     # In assistant mode, DocChatAgent receives questions from another Agent,
     # and those will already be in stand-alone form, so in this mode
     # there is no need to convert them to stand-alone form.
@@ -88,14 +108,26 @@ class DocChatAgentConfig(ChatAgentConfig):
     # It is False by default; its benefits depends on the context.
     hypothetical_answer: bool = False
     n_query_rephrases: int = 0
+    n_neighbor_chunks: int = 0  # how many neighbors on either side of match to retrieve
+    n_fuzzy_neighbor_words: int = 100  # num neighbor words to retrieve for fuzzy match
     use_fuzzy_match: bool = True
     use_bm25_search: bool = True
-    cross_encoder_reranking_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    cross_encoder_reranking_model: str = (
+        "cross-encoder/ms-marco-MiniLM-L-6-v2" if has_sentence_transformers else ""
+    )
+    rerank_diversity: bool = True  # rerank to maximize diversity?
+    rerank_periphery: bool = True  # rerank to avoid Lost In the Middle effect?
     embed_batch_size: int = 500  # get embedding of at most this many at a time
     cache: bool = True  # cache results
     debug: bool = False
     stream: bool = True  # allow streaming where needed
-    doc_paths: List[str] = []
+    split: bool = True  # use chunking
+    relevance_extractor_config: None | RelevanceExtractorAgentConfig = (
+        RelevanceExtractorAgentConfig(
+            llm=None  # use the parent's llm unless explicitly set here
+        )
+    )
+    doc_paths: List[str | bytes] = []
     default_paths: List[str] = [
         "https://news.ycombinator.com/item?id=35629033",
         "https://www.newyorker.com/tech/annals-of-technology/chatgpt-is-a-blurry-jpeg-of-the-web",
@@ -115,11 +147,12 @@ class DocChatAgentConfig(ChatAgentConfig):
         min_chunk_chars=200,
         discard_chunk_chars=5,  # discard chunks with fewer than this many chars
         n_similar_docs=3,
+        n_neighbor_ids=0,  # num chunk IDs to store on either side of each chunk
         pdf=PdfParsingConfig(
             # NOTE: PDF parsing is extremely challenging, and each library
             # has its own strengths and weaknesses.
             # Try one that works for your use case.
-            # or "haystack", "unstructured", "pdfplumber", "fitz", "pypdf"
+            # or "unstructured", "pdfplumber", "fitz", "pypdf"
             library="pdfplumber",
         ),
     )
@@ -136,10 +169,11 @@ class DocChatAgentConfig(ChatAgentConfig):
         dims=1536,
     )
-    vecdb: VectorStoreConfig = QdrantDBConfig(
-        collection_name=None,
-        storage_path=".qdrant/data/",
-        embedding=hf_embed_config,
+    vecdb: VectorStoreConfig = LanceDBConfig(
+        collection_name="doc-chat-lancedb",
+        replace_collection=True,
+        storage_path=".lancedb/data/",
+        embedding=hf_embed_config if has_sentence_transformers else oai_embed_config,
     )
     llm: OpenAIGPTConfig = OpenAIGPTConfig(
         type="openai",
@@ -163,14 +197,40 @@ class DocChatAgent(ChatAgent):
     ):
         super().__init__(config)
         self.config: DocChatAgentConfig = config
-        self.original_docs: None | List[Document] = None
+        self.original_docs: List[Document] = []
         self.original_docs_length = 0
-        self.chunked_docs: None | List[Document] = None
-        self.chunked_docs_clean: None | List[Document] = None
+        self.from_dataframe = False
+        self.df_description = ""
+        self.chunked_docs: List[Document] = []
+        self.chunked_docs_clean: List[Document] = []
         self.response: None | Document = None
         if len(config.doc_paths) > 0:
             self.ingest()
+    def clear(self) -> None:
+        """Clear the document collection and the specific collection in vecdb"""
+        if self.vecdb is None:
+            raise ValueError("VecDB not set")
+        self.original_docs = []
+        self.original_docs_length = 0
+        self.chunked_docs = []
+        self.chunked_docs_clean = []
+        collection_name = self.vecdb.config.collection_name
+        if collection_name is None:
+            return
+        try:
+            # Note we may have used a vecdb with a config.collection_name
+            # different from the agent's config.vecdb.collection_name!!
+            self.vecdb.delete_collection(collection_name)
+            self.vecdb = VectorStore.create(self.vecdb.config)
+        except Exception as e:
+            logger.warning(
+                f"""
+                Error while deleting collection {collection_name}:
+                {e}
+                """
+            )
     def ingest(self) -> None:
         """
         Chunk + embed + store docs specified by self.config.doc_paths
@@ -187,59 +247,316 @@ class DocChatAgent(ChatAgent):
             # do keyword and other non-vector searches
             if self.vecdb is None:
                 raise ValueError("VecDB not set")
-            self.chunked_docs = self.vecdb.get_all_documents()
-            self.chunked_docs_clean = [
-                Document(content=preprocess_text(d.content), metadata=d.metadata)
-                for d in self.chunked_docs
-            ]
+            self.setup_documents(filter=self.config.filter)
             return
-        urls, paths = get_urls_and_paths(self.config.doc_paths)
+        self.ingest_doc_paths(self.config.doc_paths)  # type: ignore
+    def ingest_doc_paths(
+        self,
+        paths: str | bytes | List[str | bytes],
+        metadata: (
+            List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
+        ) = [],
+        doc_type: str | DocumentType | None = None,
+    ) -> List[Document]:
+        """Split, ingest docs from specified paths,
+        do not add these to config.doc_paths.
+        Args:
+            paths: document paths, urls or byte-content of docs.
+                The bytes option is intended to support cases where a document
+                has already been read in as bytes (e.g. from an API or a database),
+                and we want to avoid having to write it to a temporary file
+                just to read it back in.
+            metadata: List of metadata dicts, one for each path.
+                If a single dict is passed in, it is used for all paths.
+            doc_type: DocumentType to use for parsing, if known.
+                MUST apply to all docs if specified.
+                This is especially useful when the `paths` are of bytes type,
+                to help with document type detection.
+        Returns:
+            List of Document objects
+        """
+        if isinstance(paths, str) or isinstance(paths, bytes):
+            paths = [paths]
+        all_paths = paths
+        paths_meta: Dict[int, Any] = {}
+        urls_meta: Dict[int, Any] = {}
+        idxs = range(len(all_paths))
+        url_idxs, path_idxs, bytes_idxs = get_urls_paths_bytes_indices(all_paths)
+        urls = [all_paths[i] for i in url_idxs]
+        paths = [all_paths[i] for i in path_idxs]
+        bytes_list = [all_paths[i] for i in bytes_idxs]
+        path_idxs.extend(bytes_idxs)
+        paths.extend(bytes_list)
+        if (isinstance(metadata, list) and len(metadata) > 0) or not isinstance(
+            metadata, list
+        ):
+            if isinstance(metadata, list):
+                idx2meta = {
+                    p: (
+                        m
+                        if isinstance(m, dict)
+                        else (isinstance(m, DocMetaData) and m.dict())
+                    )  # appease mypy
+                    for p, m in zip(idxs, metadata)
+                }
+            elif isinstance(metadata, dict):
+                idx2meta = {p: metadata for p in idxs}
+            else:
+                idx2meta = {p: metadata.dict() for p in idxs}
+            urls_meta = {u: idx2meta[u] for u in url_idxs}
+            paths_meta = {p: idx2meta[p] for p in path_idxs}
         docs: List[Document] = []
         parser = Parser(self.config.parsing)
         if len(urls) > 0:
-            loader = URLLoader(urls=urls, parser=parser)
-            docs = loader.load()
-        if len(paths) > 0:
-            for p in paths:
-                path_docs = RepoLoader.get_documents(p, parser=parser)
+            for ui in url_idxs:
+                meta = urls_meta.get(ui, {})
+                loader = URLLoader(urls=[all_paths[ui]], parser=parser)  # type: ignore
+                url_docs = loader.load()
+                # update metadata of each doc with meta
+                for d in url_docs:
+                    d.metadata = d.metadata.copy(update=meta)
+                docs.extend(url_docs)
+        if len(paths) > 0:  # paths OR bytes are handled similarly
+            for pi in path_idxs:
+                meta = paths_meta.get(pi, {})
+                p = all_paths[pi]
+                path_docs = RepoLoader.get_documents(
+                    p,
+                    parser=parser,
+                    doc_type=doc_type,
+                )
+                # update metadata of each doc with meta
+                for d in path_docs:
+                    d.metadata = d.metadata.copy(update=meta)
                 docs.extend(path_docs)
         n_docs = len(docs)
-        n_splits = self.ingest_docs(docs)
+        n_splits = self.ingest_docs(docs, split=self.config.split)
         if n_docs == 0:
-            return
+            return []
         n_urls = len(urls)
         n_paths = len(paths)
         print(
             f"""
         [green]I have processed the following {n_urls} URLs
-        and {n_paths} paths into {n_splits} parts:
+        and {n_paths} docs into {n_splits} parts:
         """.strip()
         )
-        print("\n".join(urls))
-        print("\n".join(paths))
+        path_reps = [p if isinstance(p, str) else "bytes" for p in paths]
+        print("\n".join([u for u in urls if isinstance(u, str)]))  # appease mypy
+        print("\n".join(path_reps))
+        return docs
-    def ingest_docs(self, docs: List[Document]) -> int:
+    def ingest_docs(
+        self,
+        docs: List[Document],
+        split: bool = True,
+        metadata: (
+            List[Dict[str, Any]] | Dict[str, Any] | DocMetaData | List[DocMetaData]
+        ) = [],
+    ) -> int:
         """
         Chunk docs into pieces, map each chunk to vec-embedding, store in vec-db
+        Args:
+            docs: List of Document objects
+            split: Whether to split docs into chunks. Default is True.
+                If False, docs are treated as "chunks" and are not split.
+            metadata: List of metadata dicts, one for each doc, to augment
+                whatever metadata is already in the doc.
+                [ASSUME no conflicting keys between the two metadata dicts.]
+                If a single dict is passed in, it is used for all docs.
         """
-        self.original_docs = docs
+        if isinstance(metadata, list) and len(metadata) > 0:
+            for d, m in zip(docs, metadata):
+                d.metadata = d.metadata.copy(
+                    update=m if isinstance(m, dict) else m.dict()  # type: ignore
+                )
+        elif isinstance(metadata, dict):
+            for d in docs:
+                d.metadata = d.metadata.copy(update=metadata)
+        elif isinstance(metadata, DocMetaData):
+            for d in docs:
+                d.metadata = d.metadata.copy(update=metadata.dict())
+        self.original_docs.extend(docs)
         if self.parser is None:
             raise ValueError("Parser not set")
-        docs = self.parser.split(docs)
-        self.chunked_docs = docs
-        self.chunked_docs_clean = [
-            Document(content=preprocess_text(d.content), metadata=d.metadata)
-            for d in self.chunked_docs
-        ]
+        for d in docs:
+            if d.metadata.id in [None, ""]:
+                d.metadata.id = d._unique_hash_id()
+        if split:
+            docs = self.parser.split(docs)
+        else:
+            # treat each doc as a chunk
+            for d in docs:
+                d.metadata.is_chunk = True
         if self.vecdb is None:
             raise ValueError("VecDB not set")
+        # If any additional fields need to be added to content,
+        # add them as key=value pairs for all docs, before batching.
+        # This helps retrieval for table-like data.
+        # Note we need to do this at stage so that the embeddings
+        # are computed on the full content with these additional fields.
+        if len(self.config.add_fields_to_content) > 0:
+            fields = [
+                f for f in extract_fields(docs[0], self.config.add_fields_to_content)
+            ]
+            if len(fields) > 0:
+                for d in docs:
+                    key_vals = extract_fields(d, fields)
+                    d.content = (
+                        ",".join(f"{k}={v}" for k, v in key_vals.items())
+                        + ",content="
+                        + d.content
+                    )
+        docs = docs[: self.config.parsing.max_chunks]
         # add embeddings in batches, to stay under limit of embeddings API
         batches = list(batched(docs, self.config.embed_batch_size))
         for batch in batches:
             self.vecdb.add_documents(batch)
         self.original_docs_length = self.doc_length(docs)
+        self.setup_documents(docs, filter=self.config.filter)
         return len(docs)
+    @staticmethod
+    def document_compatible_dataframe(
+        df: pd.DataFrame,
+        content: str = "content",
+        metadata: List[str] = [],
+    ) -> Tuple[pd.DataFrame, List[str]]:
+        """
+        Convert dataframe so it is compatible with Document class:
+        - has "content" column
+        - has an "id" column to be used as Document.metadata.id
+        Args:
+            df: dataframe to convert
+            content: name of content column
+            metadata: list of metadata column names
+        Returns:
+            Tuple[pd.DataFrame, List[str]]: dataframe, metadata
+                - dataframe: dataframe with "content" column and "id" column
+                - metadata: list of metadata column names, including "id"
+        """
+        if content not in df.columns:
+            raise ValueError(
+                f"""
+                Content column {content} not in dataframe,
+                so we cannot ingest into the DocChatAgent.
+                Please specify the `content` parameter as a suitable
+                text-based column in the dataframe.
+                """
+            )
+        if content != "content":
+            # rename content column to "content", leave existing column intact
+            df = df.rename(columns={content: "content"}, inplace=False)
+        actual_metadata = metadata.copy()
+        if "id" not in df.columns:
+            docs = dataframe_to_documents(df, content="content", metadata=metadata)
+            ids = [str(d.id()) for d in docs]
+            df["id"] = ids
+        if "id" not in actual_metadata:
+            actual_metadata += ["id"]
+        return df, actual_metadata
+    def ingest_dataframe(
+        self,
+        df: pd.DataFrame,
+        content: str = "content",
+        metadata: List[str] = [],
+    ) -> int:
+        """
+        Ingest a dataframe into vecdb.
+        """
+        self.from_dataframe = True
+        self.df_description = describe_dataframe(
+            df, filter_fields=self.config.filter_fields, n_vals=5
+        )
+        df, metadata = DocChatAgent.document_compatible_dataframe(df, content, metadata)
+        docs = dataframe_to_documents(df, content="content", metadata=metadata)
+        # When ingesting a dataframe we will no longer do any chunking,
+        # so we mark each doc as a chunk.
+        # TODO - revisit this since we may still want to chunk large text columns
+        for d in docs:
+            d.metadata.is_chunk = True
+        return self.ingest_docs(docs)
+    def set_filter(self, filter: str) -> None:
+        self.config.filter = filter
+        self.setup_documents(filter=filter)
+    def setup_documents(
+        self,
+        docs: List[Document] = [],
+        filter: str | None = None,
+    ) -> None:
+        """
+        Setup `self.chunked_docs` and `self.chunked_docs_clean`
+        based on possible filter.
+        These will be used in various non-vector-based search functions,
+        e.g. self.get_similar_chunks_bm25(), self.get_fuzzy_matches(), etc.
+        Args:
+            docs: List of Document objects. This is empty when we are calling this
+                method after initial doc ingestion.
+            filter: Filter condition for various lexical/semantic search fns.
+        """
+        if filter is None and len(docs) > 0:
+            # no filter, so just use the docs passed in
+            self.chunked_docs.extend(docs)
+        else:
+            if self.vecdb is None:
+                raise ValueError("VecDB not set")
+            self.chunked_docs = self.vecdb.get_all_documents(where=filter or "")
+        self.chunked_docs_clean = [
+            Document(content=preprocess_text(d.content), metadata=d.metadata)
+            for d in self.chunked_docs
+        ]
+    def get_field_values(self, fields: list[str]) -> Dict[str, str]:
+        """Get string-listing of possible values of each filterable field,
+        e.g.
+        {
+            "genre": "crime, drama, mystery, ... (10 more)",
+            "certificate": "R, PG-13, PG, R",
+        }
+        """
+        field_values: Dict[str, Set[str]] = {}
+        # make empty set for each field
+        for f in fields:
+            field_values[f] = set()
+        if self.vecdb is None:
+            raise ValueError("VecDB not set")
+        # get all documents and accumulate possible values of each field until 10
+        docs = self.vecdb.get_all_documents()  # only works for vecdbs that support this
+        for d in docs:
+            # extract fields from d
+            doc_field_vals = extract_fields(d, fields)
+            for field, val in doc_field_vals.items():
+                field_values[field].add(val)
+        # For each field make a string showing list of possible values,
+        # truncate to 20 values, and if there are more, indicate how many
+        # more there are, e.g. Genre: crime, drama, mystery, ... (20 more)
+        field_values_list = {}
+        for f in fields:
+            vals = list(field_values[f])
+            n = len(vals)
+            remaining = n - 20
+            vals = vals[:20]
+            if n > 20:
+                vals.append(f"(...{remaining} more)")
+            # make a string of the values, ensure they are strings
+            field_values_list[f] = ", ".join(str(v) for v in vals)
+        return field_values_list
     def doc_length(self, docs: List[Document]) -> int:
         """
         Calc token-length of a list of docs
@@ -252,7 +569,78 @@ class DocChatAgent(ChatAgent):
             raise ValueError("Parser not set")
         return self.parser.num_tokens(self.doc_string(docs))
-    @no_type_check
+    def user_docs_ingest_dialog(self) -> None:
+        """
+        Ask user to select doc-collection, enter filenames/urls, and ingest into vecdb.
+        """
+        if self.vecdb is None:
+            raise ValueError("VecDB not set")
+        n_deletes = self.vecdb.clear_empty_collections()
+        collections = self.vecdb.list_collections()
+        collection_name = "NEW"
+        is_new_collection = False
+        replace_collection = False
+        if len(collections) > 0:
+            n = len(collections)
+            delete_str = (
+                f"(deleted {n_deletes} empty collections)" if n_deletes > 0 else ""
+            )
+            print(f"Found {n} collections: {delete_str}")
+            for i, option in enumerate(collections, start=1):
+                print(f"{i}. {option}")
+            while True:
+                choice = Prompt.ask(
+                    f"Enter 1-{n} to select a collection, "
+                    "or hit ENTER to create a NEW collection, "
+                    "or -1 to DELETE ALL COLLECTIONS",
+                    default="0",
+                )
+                try:
+                    if -1 <= int(choice) <= n:
+                        break
+                except Exception:
+                    pass
+            if choice == "-1":
+                confirm = Prompt.ask(
+                    "Are you sure you want to delete all collections?",
+                    choices=["y", "n"],
+                    default="n",
+                )
+                if confirm == "y":
+                    self.vecdb.clear_all_collections(really=True)
+                    collection_name = "NEW"
+            if int(choice) > 0:
+                collection_name = collections[int(choice) - 1]
+                print(f"Using collection {collection_name}")
+                choice = Prompt.ask(
+                    "Would you like to replace this collection?",
+                    choices=["y", "n"],
+                    default="n",
+                )
+                replace_collection = choice == "y"
+        if collection_name == "NEW":
+            is_new_collection = True
+            collection_name = Prompt.ask(
+                "What would you like to name the NEW collection?",
+                default="doc-chat",
+            )
+        self.vecdb.set_collection(collection_name, replace=replace_collection)
+        default_urls_str = (
+            " (or leave empty for default URLs)" if is_new_collection else ""
+        )
+        print(f"[blue]Enter some URLs or file/dir paths below {default_urls_str}")
+        inputs = get_list_from_user()
+        if len(inputs) == 0:
+            if is_new_collection:
+                inputs = self.config.default_paths
+        self.config.doc_paths = inputs  # type: ignore
+        self.ingest()
     def llm_response(
         self,
         query: None | str | ChatDocument = None,
@@ -269,10 +657,55 @@ class DocChatAgent(ChatAgent):
             query_str = query_str[1:] if query_str is not None else None
             if self.llm is None:
                 raise ValueError("LLM not set")
-            with StreamingIfAllowed(self.llm):
+            with StreamingIfAllowed(self.llm, self.llm.get_stream()):
                 response = super().llm_response(query_str)
             if query_str is not None:
-                self.update_dialog(query_str, response.content)
+                self.update_dialog(
+                    query_str, "" if response is None else response.content
+                )
+            return response
+        if query_str == "":
+            return None
+        elif query_str == "?" and self.response is not None:
+            return self.justify_response()
+        elif (query_str.startswith(("summar", "?")) and self.response is None) or (
+            query_str == "??"
+        ):
+            return self.summarize_docs()
+        else:
+            self.callbacks.show_start_response(entity="llm")
+            response = self.answer_from_docs(query_str)
+            return ChatDocument(
+                content=response.content,
+                metadata=ChatDocMetaData(
+                    source=response.metadata.source,
+                    sender=Entity.LLM,
+                ),
+            )
+    async def llm_response_async(
+        self,
+        query: None | str | ChatDocument = None,
+    ) -> Optional[ChatDocument]:
+        apply_nest_asyncio()
+        if not self.llm_can_respond(query):
+            return None
+        query_str: str | None
+        if isinstance(query, ChatDocument):
+            query_str = query.content
+        else:
+            query_str = query
+        if query_str is None or query_str.startswith("!"):
+            # direct query to LLM
+            query_str = query_str[1:] if query_str is not None else None
+            if self.llm is None:
+                raise ValueError("LLM not set")
+            with StreamingIfAllowed(self.llm, self.llm.get_stream()):
+                response = await super().llm_response_async(query_str)
+            if query_str is not None:
+                self.update_dialog(
+                    query_str, "" if response is None else response.content
+                )
             return response
         if query_str == "":
             return None
@@ -283,6 +716,7 @@ class DocChatAgent(ChatAgent):
         ):
             return self.summarize_docs()
         else:
+            self.callbacks.show_start_response(entity="llm")
             response = self.answer_from_docs(query_str)
             return ChatDocument(
                 content=response.content,
@@ -314,7 +748,9 @@ class DocChatAgent(ChatAgent):
             ]
         )
-    def get_summary_answer(self, question: str, passages: List[Document]) -> Document:
+    def get_summary_answer(
+        self, question: str, passages: List[Document]
+    ) -> ChatDocument:
         """
         Given a question and a list of (possibly) doc snippets,
         generate an answer if possible
@@ -342,9 +778,6 @@ class DocChatAgent(ChatAgent):
         # 2 new LLMMessage objects:
         # one for `final_prompt`, and one for the LLM response
-        # TODO need to "forget" last two messages in message_history
-        # if we are not in conversation mode
         if self.config.conversation_mode:
             # respond with temporary context
             answer_doc = super()._llm_response_temp_context(question, final_prompt)
@@ -353,16 +786,23 @@ class DocChatAgent(ChatAgent):
         final_answer = answer_doc.content.strip()
         show_if_debug(final_answer, "SUMMARIZE_RESPONSE= ")
-        parts = final_answer.split("SOURCE:", maxsplit=1)
-        if len(parts) > 1:
-            content = parts[0].strip()
-            sources = parts[1].strip()
-        else:
+        if final_answer.startswith("SOURCE"):
+            # sometimes SOURCE may be shown first,
+            # in this case just use final_answer as-is for both content and source
             content = final_answer
-            sources = ""
-        return Document(
+            sources = final_answer
+        else:
+            parts = final_answer.split("SOURCE:", maxsplit=1)
+            if len(parts) > 1:
+                content = parts[0].strip()
+                sources = parts[1].strip()
+            else:
+                content = final_answer
+                sources = ""
+        return ChatDocument(
             content=content,
-            metadata=DocMetaData(
+            metadata=ChatDocMetaData(
                 source="SOURCE: " + sources,
                 sender=Entity.LLM,
                 cached=getattr(answer_doc.metadata, "cached", False),
@@ -372,7 +812,7 @@ class DocChatAgent(ChatAgent):
     def llm_hypothetical_answer(self, query: str) -> str:
         if self.llm is None:
             raise ValueError("LLM not set")
-        with console.status("[cyan]LLM generating hypothetical answer..."):
+        with status("[cyan]LLM generating hypothetical answer..."):
             with StreamingIfAllowed(self.llm, False):
                 # TODO: provide an easy way to
                 # Adjust this prompt depending on context.
@@ -392,7 +832,7 @@ class DocChatAgent(ChatAgent):
     def llm_rephrase_query(self, query: str) -> List[str]:
         if self.llm is None:
             raise ValueError("LLM not set")
-        with console.status("[cyan]LLM generating rephrases of query..."):
+        with status("[cyan]LLM generating rephrases of query..."):
             with StreamingIfAllowed(self.llm, False):
                 rephrases = self.llm_response_forget(
                     f"""
@@ -408,11 +848,13 @@ class DocChatAgent(ChatAgent):
     ) -> List[Tuple[Document, float]]:
         # find similar docs using bm25 similarity:
         # these may sometimes be more likely to contain a relevant verbatim extract
-        with console.status("[cyan]Searching for similar chunks using bm25..."):
-            if self.chunked_docs is None:
-                raise ValueError("No chunked docs")
-            if self.chunked_docs_clean is None:
-                raise ValueError("No cleaned chunked docs")
+        with status("[cyan]Searching for similar chunks using bm25..."):
+            if self.chunked_docs is None or len(self.chunked_docs) == 0:
+                logger.warning("No chunked docs; cannot use bm25-similarity")
+                return []
+            if self.chunked_docs_clean is None or len(self.chunked_docs_clean) == 0:
+                logger.warning("No cleaned chunked docs; cannot use bm25-similarity")
+                return []
             docs_scores = find_closest_matches_with_bm25(
                 self.chunked_docs,
                 self.chunked_docs_clean,  # already pre-processed!
@@ -424,24 +866,27 @@ class DocChatAgent(ChatAgent):
     def get_fuzzy_matches(self, query: str, multiple: int) -> List[Document]:
         # find similar docs using fuzzy matching:
         # these may sometimes be more likely to contain a relevant verbatim extract
-        with console.status("[cyan]Finding fuzzy matches in chunks..."):
+        with status("[cyan]Finding fuzzy matches in chunks..."):
             if self.chunked_docs is None:
-                raise ValueError("No chunked docs")
+                logger.warning("No chunked docs; cannot use fuzzy matching")
+                return []
+            if self.chunked_docs_clean is None:
+                logger.warning("No cleaned chunked docs; cannot use fuzzy-search")
+                return []
             fuzzy_match_docs = find_fuzzy_matches_in_docs(
                 query,
                 self.chunked_docs,
+                self.chunked_docs_clean,
                 k=self.config.parsing.n_similar_docs * multiple,
-                words_before=1000,
-                words_after=1000,
+                words_before=self.config.n_fuzzy_neighbor_words,
+                words_after=self.config.n_fuzzy_neighbor_words,
             )
         return fuzzy_match_docs
     def rerank_with_cross_encoder(
         self, query: str, passages: List[Document]
     ) -> List[Document]:
-        with console.status("[cyan]Re-ranking retrieved chunks using cross-encoder..."):
-            if self.chunked_docs is None:
-                raise ValueError("No chunked docs")
+        with status("[cyan]Re-ranking retrieved chunks using cross-encoder..."):
             try:
                 from sentence_transformers import CrossEncoder
             except ImportError:
@@ -455,6 +900,8 @@ class DocChatAgent(ChatAgent):
             model = CrossEncoder(self.config.cross_encoder_reranking_model)
             scores = model.predict([(query, p.content) for p in passages])
+            # Convert to [0,1] so we might could use a cutoff later.
+            scores = 1.0 / (1 + np.exp(-np.array(scores)))
             # get top k scoring passages
             sorted_pairs = sorted(
                 zip(scores, passages),
@@ -466,66 +913,187 @@ class DocChatAgent(ChatAgent):
             ]
         return passages
-    @no_type_check
-    def get_relevant_extracts(self, query: str) -> Tuple[str, List[Document]]:
+    def rerank_with_diversity(self, passages: List[Document]) -> List[Document]:
         """
-        Get list of extracts from doc-chunks relevant to answering a query.
-        These are the stages (some optional based on config):
-        - use LLM to convert query to stand-alone query
-        - optionally rephrase query to use below
-        - optionally generate hypothetical answer (HyDE) to use below.
-        - get relevant doc-chunks, via:
-            - vector-embedding distance, from vecdb
-            - bm25-ranking (keyword similarity)
-            - fuzzy matching (keyword similarity)
-        - re-ranking of doc-chunks using cross-encoder, pick top k
-        - use LLM to get relevant extracts from doc-chunks
+        Rerank a list of items in such a way that each successive item is least similar
+        (on average) to the earlier items.
         Args:
-            query (str): query to search for
+        query (str): The query for which the passages are relevant.
+        passages (List[Document]): A list of Documents to be reranked.
         Returns:
-            query (str): stand-alone version of input query
-            List[Document]: list of relevant extracts
+        List[Documents]: A reranked list of Documents.
+        """
+        if self.vecdb is None:
+            logger.warning("No vecdb; cannot use rerank_with_diversity")
+            return passages
+        emb_model = self.vecdb.embedding_model
+        emb_fn = emb_model.embedding_fn()
+        embs = emb_fn([p.content for p in passages])
+        embs_arr = [np.array(e) for e in embs]
+        indices = list(range(len(passages)))
+        # Helper function to compute average similarity to
+        # items in the current result list.
+        def avg_similarity_to_result(i: int, result: List[int]) -> float:
+            return sum(  # type: ignore
+                (embs_arr[i] @ embs_arr[j])
+                / (np.linalg.norm(embs_arr[i]) * np.linalg.norm(embs_arr[j]))
+                for j in result
+            ) / len(result)
+        # copy passages to items
+        result = [indices.pop(0)]  # Start with the first item.
+        while indices:
+            # Find the item that has the least average similarity
+            # to items in the result list.
+            least_similar_item = min(
+                indices, key=lambda i: avg_similarity_to_result(i, result)
+            )
+            result.append(least_similar_item)
+            indices.remove(least_similar_item)
+        # return passages in order of result list
+        return [passages[i] for i in result]
+    def rerank_to_periphery(self, passages: List[Document]) -> List[Document]:
         """
-        if len(self.dialog) > 0 and not self.config.assistant_mode:
-            # Regardless of whether we are in conversation mode or not,
-            # for relevant doc/chunk extraction, we must convert the query
-            # to a standalone query to get more relevant results.
-            with console.status("[cyan]Converting to stand-alone query...[/cyan]"):
-                with StreamingIfAllowed(self.llm, False):
-                    query = self.llm.followup_to_standalone(self.dialog, query)
-            print(f"[orange2]New query: {query}")
+        Rerank to avoid Lost In the Middle (LIM) problem,
+        where LLMs pay more attention to items at the ends of a list,
+        rather than the middle. So we re-rank to make the best passages
+        appear at the periphery of the list.
+        https://arxiv.org/abs/2307.03172
+        Example reranking:
+        1 2 3 4 5 6 7 8 9 ==> 1 3 5 7 9 8 6 4 2
+        Args:
+            passages (List[Document]): A list of Documents to be reranked.
+        Returns:
+            List[Documents]: A reranked list of Documents.
+        """
+        # Splitting items into odds and evens based on index, not value
+        odds = passages[::2]
+        evens = passages[1::2][::-1]
+        # Merging them back together
+        return odds + evens
+    def add_context_window(
+        self,
+        docs_scores: List[Tuple[Document, float]],
+    ) -> List[Tuple[Document, float]]:
+        """
+        In each doc's metadata, there may be a window_ids field indicating
+        the ids of the chunks around the current chunk. We use these stored
+        window_ids to retrieve the desired number
+        (self.config.n_neighbor_chunks) of neighbors
+        on either side of the current chunk.
+        Args:
+            docs_scores (List[Tuple[Document, float]]): List of pairs of documents
+                to add context windows to together with their match scores.
+        Returns:
+            List[Tuple[Document, float]]: List of (Document, score) tuples.
+        """
+        if self.vecdb is None or self.config.n_neighbor_chunks == 0:
+            return docs_scores
+        if len(docs_scores) == 0:
+            return []
+        if set(docs_scores[0][0].__fields__) != {"content", "metadata"}:
+            # Do not add context window when there are other fields besides just
+            # content and metadata, since we do not know how to set those other fields
+            # for newly created docs with combined content.
+            return docs_scores
+        return self.vecdb.add_context_window(docs_scores, self.config.n_neighbor_chunks)
+    def get_semantic_search_results(
+        self,
+        query: str,
+        k: int = 10,
+    ) -> List[Tuple[Document, float]]:
+        """
+        Get semantic search results from vecdb.
+        Args:
+            query (str): query to search for
+            k (int): number of results to return
+        Returns:
+            List[Tuple[Document, float]]: List of (Document, score) tuples.
+        """
+        if self.vecdb is None:
+            raise ValueError("VecDB not set")
+        # Note: for dynamic filtering based on a query, users can
+        # use the `temp_update` context-manager to pass in a `filter` to self.config,
+        # e.g.:
+        # with temp_update(self.config, {"filter": "metadata.source=='source1'"}):
+        #     docs_scores = self.get_semantic_search_results(query, k=k)
+        # This avoids having pass the `filter` argument to every function call
+        # upstream of this one.
+        # The `temp_update` context manager is defined in
+        # `langroid/utils/pydantic_utils.py`
+        return self.vecdb.similar_texts_with_scores(
+            query,
+            k=k,
+            where=self.config.filter,
+        )
+    def get_relevant_chunks(
+        self, query: str, query_proxies: List[str] = []
+    ) -> List[Document]:
+        """
+        The retrieval stage in RAG: get doc-chunks that are most "relevant"
+        to the query (and possibly any proxy queries), from the document-store,
+        which currently is the vector store,
+        but in theory could be any document store, or even web-search.
+        This stage does NOT involve an LLM, and the retrieved chunks
+        could either be pre-chunked text (from the initial pre-processing stage
+        where chunks were stored in the vector store), or they could be
+        dynamically retrieved based on a window around a lexical match.
+        These are the steps (some optional based on config):
+        - semantic search based on vector-embedding distance, from vecdb
+        - lexical search using bm25-ranking (keyword similarity)
+        - fuzzy matching (keyword similarity)
+        - re-ranking of doc-chunks by relevance to query, using cross-encoder,
+           and pick top k
+        Args:
+            query: original query (assumed to be in stand-alone form)
+            query_proxies: possible rephrases, or hypothetical answer to query
+                    (e.g. for HyDE-type retrieval)
+        Returns:
+        """
         # if we are using cross-encoder reranking, we can retrieve more docs
         # during retrieval, and leave it to the cross-encoder re-ranking
         # to whittle down to self.config.parsing.n_similar_docs
         retrieval_multiple = 1 if self.config.cross_encoder_reranking_model == "" else 3
-        queries = [query]
-        if self.config.hypothetical_answer:
-            answer = self.llm_hypothetical_answer(query)
-            queries = [query, answer]
-        if self.config.n_query_rephrases > 0:
-            rephrases = self.llm_rephrase_query(query)
-            queries += rephrases
+        if self.vecdb is None:
+            raise ValueError("VecDB not set")
-        with console.status("[cyan]Searching VecDB for relevant doc passages..."):
-            docs_and_scores = []
-            for q in queries:
-                docs_and_scores += self.vecdb.similar_texts_with_scores(
+        with status("[cyan]Searching VecDB for relevant doc passages..."):
+            docs_and_scores: List[Tuple[Document, float]] = []
+            for q in [query] + query_proxies:
+                docs_and_scores += self.get_semantic_search_results(
                     q,
                     k=self.config.parsing.n_similar_docs * retrieval_multiple,
                 )
         # keep only docs with unique d.id()
         id2doc_score = {d.id(): (d, s) for d, s in docs_and_scores}
         docs_and_scores = list(id2doc_score.values())
-        passages = [
-            Document(content=d.content, metadata=d.metadata)
-            for (d, _) in docs_and_scores
-        ]
+        passages = [d for (d, _) in docs_and_scores]
+        # passages = [
+        #     Document(content=d.content, metadata=d.metadata)
+        #     for (d, _) in docs_and_scores
+        # ]
         if self.config.use_bm25_search:
             docs_scores = self.get_similar_chunks_bm25(query, retrieval_multiple)
@@ -539,25 +1107,136 @@ class DocChatAgent(ChatAgent):
         id2passage = {p.id(): p for p in passages}
         passages = list(id2passage.values())
+        if len(passages) == 0:
+            return []
+        passages_scores = [(p, 0.0) for p in passages]
+        passages_scores = self.add_context_window(passages_scores)
+        passages = [p for p, _ in passages_scores]
         # now passages can potentially have a lot of doc chunks,
-        # so we re-rank them using a cross-encoder scoring model
+        # so we re-rank them using a cross-encoder scoring model,
+        # and pick top k where k = config.parsing.n_similar_docs
         # https://www.sbert.net/examples/applications/retrieve_rerank
         if self.config.cross_encoder_reranking_model != "":
             passages = self.rerank_with_cross_encoder(query, passages)
+        if self.config.rerank_diversity:
+            # reorder to increase diversity among top docs
+            passages = self.rerank_with_diversity(passages)
+        if self.config.rerank_periphery:
+            # reorder so most important docs are at periphery
+            # (see Lost In the Middle issue).
+            passages = self.rerank_to_periphery(passages)
+        return passages
+    @no_type_check
+    def get_relevant_extracts(self, query: str) -> Tuple[str, List[Document]]:
+        """
+        Get list of (verbatim) extracts from doc-chunks relevant to answering a query.
+        These are the stages (some optional based on config):
+        - use LLM to convert query to stand-alone query
+        - optionally use LLM to rephrase query to use below
+        - optionally use LLM to generate hypothetical answer (HyDE) to use below.
+        - get_relevant_chunks(): get doc-chunks relevant to query and proxies
+        - use LLM to get relevant extracts from doc-chunks
+        Args:
+            query (str): query to search for
+        Returns:
+            query (str): stand-alone version of input query
+            List[Document]: list of relevant extracts
+        """
+        if len(self.dialog) > 0 and not self.config.assistant_mode:
+            # Regardless of whether we are in conversation mode or not,
+            # for relevant doc/chunk extraction, we must convert the query
+            # to a standalone query to get more relevant results.
+            with status("[cyan]Converting to stand-alone query...[/cyan]"):
+                with StreamingIfAllowed(self.llm, False):
+                    query = self.llm.followup_to_standalone(self.dialog, query)
+            print(f"[orange2]New query: {query}")
+        proxies = []
+        if self.config.hypothetical_answer:
+            answer = self.llm_hypothetical_answer(query)
+            proxies = [answer]
+        if self.config.n_query_rephrases > 0:
+            rephrases = self.llm_rephrase_query(query)
+            proxies += rephrases
+        passages = self.get_relevant_chunks(query, proxies)  # no LLM involved
         if len(passages) == 0:
             return query, []
-        with console.status("[cyan]LLM Extracting verbatim passages..."):
+        with status("[cyan]LLM Extracting verbatim passages..."):
             with StreamingIfAllowed(self.llm, False):
                 # these are async calls, one per passage; turn off streaming
-                extracts = self.llm.get_verbatim_extracts(query, passages)
+                extracts = self.get_verbatim_extracts(query, passages)
                 extracts = [e for e in extracts if e.content != NO_ANSWER]
         return query, extracts
-    @no_type_check
-    def answer_from_docs(self, query: str) -> Document:
+    def get_verbatim_extracts(
+        self,
+        query: str,
+        passages: List[Document],
+    ) -> List[Document]:
+        """
+        Run RelevanceExtractorAgent in async/concurrent mode on passages,
+        to extract portions relevant to answering query, from each passage.
+        Args:
+            query (str): query to answer
+            passages (List[Documents]): list of passages to extract from
+        Returns:
+            List[Document]: list of Documents containing extracts and metadata.
+        """
+        agent_cfg = self.config.relevance_extractor_config
+        if agent_cfg is None:
+            # no relevance extraction: simply return passages
+            return passages
+        if agent_cfg.llm is None:
+            # Use main DocChatAgent's LLM if not provided explicitly:
+            # this reduces setup burden on the user
+            agent_cfg.llm = self.config.llm
+        agent_cfg.query = query
+        agent_cfg.segment_length = self.config.extraction_granularity
+        agent_cfg.llm.stream = False  # disable streaming for concurrent calls
+        agent = RelevanceExtractorAgent(agent_cfg)
+        task = Task(
+            agent,
+            name="Relevance-Extractor",
+            interactive=False,
+        )
+        extracts = run_batch_tasks(
+            task,
+            passages,
+            input_map=lambda msg: msg.content,
+            output_map=lambda ans: ans.content if ans is not None else NO_ANSWER,
+        )
+        # Caution: Retain ALL other fields in the Documents (which could be
+        # other than just `content` and `metadata`), while simply replacing
+        # `content` with the extracted portions
+        passage_extracts = []
+        for p, e in zip(passages, extracts):
+            if e == NO_ANSWER or len(e) == 0:
+                continue
+            p_copy = p.copy()
+            p_copy.content = e
+            passage_extracts.append(p_copy)
+        return passage_extracts
+    def answer_from_docs(self, query: str) -> ChatDocument:
         """
         Answer query based on relevant docs from the VecDB
@@ -567,24 +1246,38 @@ class DocChatAgent(ChatAgent):
         Returns:
             Document: answer
         """
-        response = Document(
+        response = ChatDocument(
             content=NO_ANSWER,
-            metadata=DocMetaData(
+            metadata=ChatDocMetaData(
                 source="None",
+                sender=Entity.LLM,
             ),
         )
         # query may be updated to a stand-alone version
         query, extracts = self.get_relevant_extracts(query)
         if len(extracts) == 0:
             return response
+        if self.llm is None:
+            raise ValueError("LLM not set")
+        if self.config.retrieve_only:
+            # only return extracts, skip LLM-based summary answer
+            meta = dict(
+                sender=Entity.LLM,
+            )
+            # copy metadata from first doc, unclear what to do here.
+            meta.update(extracts[0].metadata)
+            return ChatDocument(
+                content="\n\n".join([e.content for e in extracts]),
+                metadata=ChatDocMetaData(**meta),
+            )
         with ExitStack() as stack:
             # conditionally use Streaming or rich console context
             cm = (
                 StreamingIfAllowed(self.llm)
                 if settings.stream
-                else (console.status("LLM Generating final answer..."))
+                else (status("LLM Generating final answer..."))
             )
-            stack.enter_context(cm)
+            stack.enter_context(cm)  # type: ignore
             response = self.get_summary_answer(query, extracts)
         self.update_dialog(query, response.content)
@@ -598,7 +1291,7 @@ class DocChatAgent(ChatAgent):
         """Summarize all docs"""
         if self.llm is None:
             raise ValueError("LLM not set")
-        if self.original_docs is None:
+        if len(self.original_docs) == 0:
             logger.warning(
                 """
                 No docs to summarize! Perhaps you are re-using a previously
@@ -627,19 +1320,22 @@ class DocChatAgent(ChatAgent):
             )
         prompt = f"""
         {instruction}
+        FULL TEXT:
         {full_text}
         """.strip()
         with StreamingIfAllowed(self.llm):
-            summary = Agent.llm_response(self, prompt)
-            return summary  # type: ignore
+            summary = ChatAgent.llm_response(self, prompt)
+            return summary
-    def justify_response(self) -> None:
+    def justify_response(self) -> ChatDocument | None:
         """Show evidence for last response"""
         if self.response is None:
             print("[magenta]No response yet")
-            return
+            return None
         source = self.response.metadata.source
         if len(source) > 0:
             print("[magenta]" + source)
         else:
             print("[magenta]No source found")
+        return None

langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl