PyPI - langroid - Versions diffs - 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

langroid/__init__.py +95 -0
langroid/agent/__init__.py +40 -0
langroid/agent/base.py +222 -91
langroid/agent/batch.py +264 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +247 -101
langroid/agent/chat_document.py +41 -4
langroid/agent/openai_assistant.py +842 -0
langroid/agent/special/__init__.py +50 -0
langroid/agent/special/doc_chat_agent.py +837 -141
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +127 -0
langroid/agent/special/retriever_agent.py +32 -198
langroid/agent/special/sql/__init__.py +11 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +22 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +475 -122
langroid/agent/tool_message.py +75 -13
langroid/agent/tools/__init__.py +13 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +16 -29
langroid/agent/tools/run_python_code.py +60 -0
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/agent/tools/segment_extract_tool.py +36 -0
langroid/cachedb/__init__.py +9 -0
langroid/cachedb/base.py +22 -2
langroid/cachedb/momento_cachedb.py +26 -2
langroid/cachedb/redis_cachedb.py +78 -11
langroid/embedding_models/__init__.py +34 -0
langroid/embedding_models/base.py +21 -2
langroid/embedding_models/models.py +120 -18
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +45 -0
langroid/language_models/azure_openai.py +80 -27
langroid/language_models/base.py +117 -12
langroid/language_models/config.py +5 -0
langroid/language_models/openai_assistants.py +3 -0
langroid/language_models/openai_gpt.py +558 -174
langroid/language_models/prompt_formatter/__init__.py +15 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +18 -21
langroid/mytypes.py +25 -8
langroid/parsing/__init__.py +46 -0
langroid/parsing/document_parser.py +260 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +122 -59
langroid/parsing/repo_loader.py +114 -52
langroid/parsing/search.py +68 -63
langroid/parsing/spider.py +3 -2
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -11
langroid/parsing/urls.py +85 -37
langroid/parsing/utils.py +298 -4
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +11 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +17 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/algorithms/graph.py +103 -0
langroid/utils/configuration.py +36 -5
langroid/utils/constants.py +4 -0
langroid/utils/globals.py +2 -2
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +21 -0
langroid/utils/output/printing.py +47 -1
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +616 -2
langroid/utils/system.py +98 -0
langroid/vector_store/__init__.py +40 -0
langroid/vector_store/base.py +203 -6
langroid/vector_store/chromadb.py +59 -32
langroid/vector_store/lancedb.py +463 -0
langroid/vector_store/meilisearch.py +10 -7
langroid/vector_store/momento.py +262 -0
langroid/vector_store/qdrantdb.py +104 -22
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
langroid-0.1.219.dist-info/RECORD +127 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.85.dist-info/RECORD +0 -94
/langroid/{scripts → agent/callbacks}/__init__.py +0 -0
{langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0

langroid/utils/system.py CHANGED Viewed

@@ -1,15 +1,55 @@
+import getpass
+import hashlib
+import importlib
 import inspect
 import logging
 import shutil
+import socket
+import traceback
+from typing import Any
 logger = logging.getLogger(__name__)
 DELETION_ALLOWED_PATHS = [
     ".qdrant",
     ".chroma",
+    ".lancedb",
 ]
+class LazyLoad:
+    """Lazy loading of modules or classes."""
+    def __init__(self, import_path: str) -> None:
+        self.import_path = import_path
+        self._target = None
+        self._is_target_loaded = False
+    def _load_target(self) -> None:
+        if not self._is_target_loaded:
+            try:
+                # Attempt to import as a module
+                self._target = importlib.import_module(self.import_path)  # type: ignore
+            except ImportError:
+                # If module import fails, attempt to import as a
+                # class or function from a module
+                module_path, attr_name = self.import_path.rsplit(".", 1)
+                module = importlib.import_module(module_path)
+                self._target = getattr(module, attr_name)
+            self._is_target_loaded = True
+    def __getattr__(self, name: str) -> Any:
+        self._load_target()
+        return getattr(self._target, name)
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        self._load_target()
+        if callable(self._target):
+            return self._target(*args, **kwargs)
+        else:
+            raise TypeError(f"{self.import_path!r} object is not callable")
 def rmdir(path: str) -> bool:
     """
     Remove a directory recursively.
@@ -55,3 +95,61 @@ def caller_name() -> str:
         return ""
     return caller_frame.f_code.co_name
+def friendly_error(e: Exception, msg: str = "An error occurred.") -> str:
+    tb = traceback.format_exc()
+    original_error_message: str = str(e)
+    full_error_message: str = (
+        f"{msg}\nOriginal error: {original_error_message}\nTraceback:\n{tb}"
+    )
+    return full_error_message
+def generate_user_id(org: str = "") -> str:
+    """
+    Generate a unique user ID based on the username and machine name.
+    Returns:
+    """
+    # Get the username
+    username = getpass.getuser()
+    # Get the machine's name
+    machine_name = socket.gethostname()
+    org_pfx = f"{org}_" if org else ""
+    # Create a consistent unique ID based on the username and machine name
+    unique_string = f"{org_pfx}{username}@{machine_name}"
+    # Generate a SHA-256 hash of the unique string
+    user_id = hashlib.sha256(unique_string.encode()).hexdigest()
+    return user_id
+def update_hash(hash: str | None = None, s: str = "") -> str:
+    """
+    Takes a SHA256 hash string and a new string, updates the hash with the new string,
+    and returns the updated hash string.
+    Args:
+        hash (str): A SHA256 hash string.
+        s (str): A new string to update the hash with.
+    Returns:
+        The updated hash in hexadecimal format.
+    """
+    # Create a new hash object if no hash is provided
+    if hash is None:
+        hash_obj = hashlib.sha256()
+    else:
+        # Convert the hexadecimal hash string to a byte object
+        hash_bytes = bytes.fromhex(hash)
+        hash_obj = hashlib.sha256(hash_bytes)
+    # Update the hash with the new string
+    hash_obj.update(s.encode("utf-8"))
+    # Return the updated hash in hexadecimal format and the original string
+    return hash_obj.hexdigest()

langroid/vector_store/__init__.py CHANGED Viewed

@@ -0,0 +1,40 @@
+from . import base
+from . import qdrantdb
+from . import meilisearch
+from . import lancedb
+from .base import VectorStoreConfig, VectorStore
+from .qdrantdb import QdrantDBConfig, QdrantDB
+from .meilisearch import MeiliSearch, MeiliSearchConfig
+from .lancedb import LanceDB, LanceDBConfig
+has_chromadb = False
+try:
+    from . import chromadb
+    from .chromadb import ChromaDBConfig, ChromaDB
+    chromadb  # silence linters
+    ChromaDB
+    ChromaDBConfig
+    has_chromadb = True
+except ImportError:
+    pass
+__all__ = [
+    "base",
+    "VectorStore",
+    "VectorStoreConfig",
+    "qdrantdb",
+    "meilisearch",
+    "lancedb",
+    "QdrantDBConfig",
+    "QdrantDB",
+    "MeiliSearch",
+    "MeiliSearchConfig",
+    "LanceDB",
+    "LanceDBConfig",
+]
+if has_chromadb:
+    __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])

langroid/vector_store/base.py CHANGED Viewed

@@ -1,21 +1,26 @@
+import copy
 import logging
 from abc import ABC, abstractmethod
-from typing import List, Optional, Sequence, Tuple
+from typing import Dict, List, Optional, Sequence, Tuple
+import numpy as np
+import pandas as pd
 from pydantic import BaseSettings
-from langroid.embedding_models.base import EmbeddingModelsConfig
+from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
 from langroid.embedding_models.models import OpenAIEmbeddingsConfig
 from langroid.mytypes import Document
+from langroid.utils.algorithms.graph import components, topological_sort
 from langroid.utils.configuration import settings
 from langroid.utils.output.printing import print_long_text
+from langroid.utils.pandas_utils import stringify
 logger = logging.getLogger(__name__)
 class VectorStoreConfig(BaseSettings):
-    type: str = "qdrant"  # deprecated, keeping it for backward compatibility
-    collection_name: str | None = None
+    type: str = ""  # deprecated, keeping it for backward compatibility
+    collection_name: str | None = "temp"
     replace_collection: bool = False  # replace collection if it already exists
     storage_path: str = ".qdrant/data"
     cloud: bool = False
@@ -36,16 +41,27 @@ class VectorStore(ABC):
     def __init__(self, config: VectorStoreConfig):
         self.config = config
+        self.embedding_model = EmbeddingModel.create(config.embedding)
     @staticmethod
     def create(config: VectorStoreConfig) -> Optional["VectorStore"]:
         from langroid.vector_store.chromadb import ChromaDB, ChromaDBConfig
+        from langroid.vector_store.lancedb import LanceDB, LanceDBConfig
+        from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
+        from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
         from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
         if isinstance(config, QdrantDBConfig):
             return QdrantDB(config)
         elif isinstance(config, ChromaDBConfig):
             return ChromaDB(config)
+        elif isinstance(config, MomentoVIConfig):
+            return MomentoVI(config)
+        elif isinstance(config, LanceDBConfig):
+            return LanceDB(config)
+        elif isinstance(config, MeiliSearchConfig):
+            return MeiliSearch(config)
         else:
             logger.warning(
                 f"""
@@ -113,6 +129,42 @@ class VectorStore(ABC):
     def add_documents(self, documents: Sequence[Document]) -> None:
         pass
+    def compute_from_docs(self, docs: List[Document], calc: str) -> str:
+        """Compute a result on a set of documents,
+        using a dataframe calc string like `df.groupby('state')['income'].mean()`.
+        """
+        dicts = [doc.dict() for doc in docs]
+        df = pd.DataFrame(dicts)
+        try:
+            result = pd.eval(  # safer than eval but limited to single expression
+                calc,
+                engine="python",
+                parser="pandas",
+                local_dict={"df": df},
+            )
+        except Exception as e:
+            # return error message so LLM can fix the calc string if needed
+            err = f"""
+            Error encountered in pandas eval: {str(e)}
+            """
+            if isinstance(e, KeyError) and "not in index" in str(e):
+                # Pd.eval sometimes fails on a perfectly valid exprn like
+                # df.loc[..., 'column'] with a KeyError.
+                err += """
+                Maybe try a different way, e.g.
+                instead of df.loc[..., 'column'], try df.loc[...]['column']
+                """
+            return err
+        return stringify(result)
+    def maybe_add_ids(self, documents: Sequence[Document]) -> None:
+        """Add ids to metadata if absent, since some
+        vecdbs don't like having blank ids."""
+        for d in documents:
+            if d.metadata.id in [None, ""]:
+                d.metadata.id = d._unique_hash_id()
     @abstractmethod
     def similar_texts_with_scores(
         self,
@@ -120,12 +172,157 @@ class VectorStore(ABC):
         k: int = 1,
         where: Optional[str] = None,
     ) -> List[Tuple[Document, float]]:
+        """
+        Find k most similar texts to the given text, in terms of vector distance metric
+        (e.g., cosine similarity).
+        Args:
+            text (str): The text to find similar texts for.
+            k (int, optional): Number of similar texts to retrieve. Defaults to 1.
+            where (Optional[str], optional): Where clause to filter the search.
+        Returns:
+            List[Tuple[Document,float]]: List of (Document, score) tuples.
+        """
         pass
+    def add_context_window(
+        self, docs_scores: List[Tuple[Document, float]], neighbors: int = 0
+    ) -> List[Tuple[Document, float]]:
+        """
+        In each doc's metadata, there may be a window_ids field indicating
+        the ids of the chunks around the current chunk.
+        These window_ids may overlap, so we
+        - coalesce each overlapping groups into a single window (maintaining ordering),
+        - create a new document for each part, preserving metadata,
+        We may have stored a longer set of window_ids than we need during chunking.
+        Now, we just want `neighbors` on each side of the center of the window_ids list.
+        Args:
+            docs_scores (List[Tuple[Document, float]]): List of pairs of documents
+                to add context windows to together with their match scores.
+            neighbors (int, optional): Number of neighbors on "each side" of match to
+                retrieve. Defaults to 0.
+                "Each side" here means before and after the match,
+                in the original text.
+        Returns:
+            List[Tuple[Document, float]]: List of (Document, score) tuples.
+        """
+        # We return a larger context around each match, i.e.
+        # a window of `neighbors` on each side of the match.
+        docs = [d for d, s in docs_scores]
+        scores = [s for d, s in docs_scores]
+        if neighbors == 0:
+            return docs_scores
+        doc_chunks = [d for d in docs if d.metadata.is_chunk]
+        if len(doc_chunks) == 0:
+            return docs_scores
+        window_ids_list = []
+        id2metadata = {}
+        # id -> highest score of a doc it appears in
+        id2max_score: Dict[int | str, float] = {}
+        for i, d in enumerate(docs):
+            window_ids = d.metadata.window_ids
+            if len(window_ids) == 0:
+                window_ids = [d.id()]
+            id2metadata.update({id: d.metadata for id in window_ids})
+            id2max_score.update(
+                {id: max(id2max_score.get(id, 0), scores[i]) for id in window_ids}
+            )
+            n = len(window_ids)
+            chunk_idx = window_ids.index(d.id())
+            neighbor_ids = window_ids[
+                max(0, chunk_idx - neighbors) : min(n, chunk_idx + neighbors + 1)
+            ]
+            window_ids_list += [neighbor_ids]
+        # window_ids could be from different docs,
+        # and they may overlap, so we coalesce overlapping groups into
+        # separate windows.
+        window_ids_list = self.remove_overlaps(window_ids_list)
+        final_docs = []
+        final_scores = []
+        for w in window_ids_list:
+            metadata = copy.deepcopy(id2metadata[w[0]])
+            metadata.window_ids = w
+            document = Document(
+                content=" ".join([d.content for d in self.get_documents_by_ids(w)]),
+                metadata=metadata,
+            )
+            # make a fresh id since content is in general different
+            document.metadata.id = document.hash_id(document.content)
+            final_docs += [document]
+            final_scores += [max(id2max_score[id] for id in w)]
+        return list(zip(final_docs, final_scores))
+    @staticmethod
+    def remove_overlaps(windows: List[List[str]]) -> List[List[str]]:
+        """
+        Given a collection of windows, where each window is a sequence of ids,
+        identify groups of overlapping windows, and for each overlapping group,
+        order the chunk-ids using topological sort so they appear in the original
+        order in the text.
+        Args:
+            windows (List[int|str]): List of windows, where each window is a
+                sequence of ids.
+        Returns:
+            List[int|str]: List of windows, where each window is a sequence of ids,
+                and no two windows overlap.
+        """
+        ids = set(id for w in windows for id in w)
+        # id -> {win -> # pos}
+        id2win2pos: Dict[str, Dict[int, int]] = {id: {} for id in ids}
+        for i, w in enumerate(windows):
+            for j, id in enumerate(w):
+                id2win2pos[id][i] = j
+        n = len(windows)
+        # relation between windows:
+        order = np.zeros((n, n), dtype=np.int8)
+        for i, w in enumerate(windows):
+            for j, x in enumerate(windows):
+                if i == j:
+                    continue
+                if len(set(w).intersection(x)) == 0:
+                    continue
+                id = list(set(w).intersection(x))[0]  # any common id
+                if id2win2pos[id][i] > id2win2pos[id][j]:
+                    order[i, j] = -1  # win i is before win j
+                else:
+                    order[i, j] = 1  # win i is after win j
+        # find groups of windows that overlap, like connected components in a graph
+        groups = components(np.abs(order))
+        # order the chunk-ids in each group using topological sort
+        new_windows = []
+        for g in groups:
+            # find total ordering among windows in group based on order matrix
+            # (this is a topological sort)
+            _g = np.array(g)
+            order_matrix = order[_g][:, _g]
+            ordered_window_indices = topological_sort(order_matrix)
+            ordered_window_ids = [windows[i] for i in _g[ordered_window_indices]]
+            flattened = [id for w in ordered_window_ids for id in w]
+            flattened_deduped = list(dict.fromkeys(flattened))
+            # Note we are not going to split these, and instead we'll return
+            # larger windows from concatenating the connected groups.
+            # This ensures context is retained for LLM q/a
+            new_windows += [flattened_deduped]
+        return new_windows
     @abstractmethod
-    def get_all_documents(self) -> List[Document]:
+    def get_all_documents(self, where: str = "") -> List[Document]:
         """
-        Get all documents in the current collection.
+        Get all documents in the current collection, possibly filtered by `where`.
         """
         pass

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -1,8 +1,7 @@
+import json
 import logging
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-import chromadb
 from langroid.embedding_models.base import (
     EmbeddingModel,
     EmbeddingModelsConfig,
@@ -17,7 +16,7 @@ logger = logging.getLogger(__name__)
 class ChromaDBConfig(VectorStoreConfig):
-    collection_name: str = "chroma-langroid"
+    collection_name: str = "temp"
     storage_path: str = ".chroma/data"
     embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
     host: str = "127.0.0.1"
@@ -25,8 +24,19 @@ class ChromaDBConfig(VectorStoreConfig):
 class ChromaDB(VectorStore):
-    def __init__(self, config: ChromaDBConfig):
+    def __init__(self, config: ChromaDBConfig = ChromaDBConfig()):
         super().__init__(config)
+        try:
+            import chromadb
+        except ImportError:
+            raise ImportError(
+                """
+                ChromaDB is not installed by default with Langroid.
+                If you want to use it, please install it with the `chromadb` extra, e.g.
+                pip install "langroid[chromadb]"
+                or an equivalent command.
+                """
+            )
         self.config = config
         emb_model = EmbeddingModel.create(config.embedding)
         self.embedding_fn = emb_model.embedding_fn()
@@ -99,53 +109,78 @@ class ChromaDB(VectorStore):
         """
         self.config.collection_name = collection_name
+        if collection_name in self.list_collections(empty=True) and replace:
+            logger.warning(f"Replacing existing collection {collection_name}")
+            self.client.delete_collection(collection_name)
         self.collection = self.client.create_collection(
             name=self.config.collection_name,
             embedding_function=self.embedding_fn,
             get_or_create=not replace,
         )
-    def add_documents(self, documents: Optional[Sequence[Document]] = None) -> None:
+    def add_documents(self, documents: Sequence[Document]) -> None:
+        super().maybe_add_ids(documents)
         if documents is None:
             return
         contents: List[str] = [document.content for document in documents]
-        metadatas: List[dict[str, Any]] = [
-            document.metadata.dict() for document in documents
+        # convert metadatas to dicts so chroma can handle them
+        metadata_dicts: List[dict[str, Any]] = [
+            d.metadata.dict_bool_int() for d in documents
         ]
+        for m in metadata_dicts:
+            # chroma does not handle non-atomic types in metadata
+            m["window_ids"] = ",".join(m["window_ids"])
         ids = [str(d.id()) for d in documents]
         self.collection.add(
             # embedding_models=embedding_models,
             documents=contents,
-            metadatas=metadatas,
+            metadatas=metadata_dicts,
             ids=ids,
         )
-    def get_all_documents(self) -> List[Document]:
-        results = self.collection.get(include=["documents", "metadatas"])
+    def get_all_documents(self, where: str = "") -> List[Document]:
+        filter = json.loads(where) if where else None
+        results = self.collection.get(
+            include=["documents", "metadatas"],
+            where=filter,
+        )
         results["documents"] = [results["documents"]]
         results["metadatas"] = [results["metadatas"]]
         return self._docs_from_results(results)
     def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
-        results = self.collection.get(ids=ids, include=["documents", "metadatas"])
-        results["documents"] = [results["documents"]]
-        results["metadatas"] = [results["metadatas"]]
-        return self._docs_from_results(results)
+        # get them one by one since chroma mangles the order of the results
+        # when fetched from a list of ids.
+        results = [
+            self.collection.get(ids=[id], include=["documents", "metadatas"])
+            for id in ids
+        ]
+        final_results = {}
+        final_results["documents"] = [[r["documents"][0] for r in results]]
+        final_results["metadatas"] = [[r["metadatas"][0] for r in results]]
+        return self._docs_from_results(final_results)
     def delete_collection(self, collection_name: str) -> None:
-        self.client.delete_collection(name=collection_name)
+        try:
+            self.client.delete_collection(name=collection_name)
+        except Exception:
+            pass
     def similar_texts_with_scores(
         self, text: str, k: int = 1, where: Optional[str] = None
     ) -> List[Tuple[Document, float]]:
+        n = self.collection.count()
+        filter = json.loads(where) if where else None
         results = self.collection.query(
             query_texts=[text],
-            n_results=k,
-            where=where,
+            n_results=min(n, k),
+            where=filter,
             include=["documents", "distances", "metadatas"],
         )
         docs = self._docs_from_results(results)
-        scores = results["distances"][0]
+        # chroma distances are 1 - cosine.
+        scores = [1 - s for s in results["distances"][0]]
         return list(zip(docs, scores))
     def _docs_from_results(self, results: Dict[str, Any]) -> List[Document]:
@@ -164,22 +199,14 @@ class ChromaDB(VectorStore):
             for i, c in enumerate(contents):
                 print_long_text("red", "italic red", f"MATCH-{i}", c)
         metadatas = results["metadatas"][0]
+        for m in metadatas:
+            # restore the stringified list of window_ids into the original List[str]
+            if m["window_ids"].strip() == "":
+                m["window_ids"] = []
+            else:
+                m["window_ids"] = m["window_ids"].split(",")
         docs = [
             Document(content=d, metadata=DocMetaData(**m))
             for d, m in zip(contents, metadatas)
         ]
         return docs
-# Example usage and testing
-# chroma_db = ChromaDB.from_documents(
-#     collection_name="all-my-documents",
-#     documents=["doc1000101", "doc288822"],
-#     metadatas=[{"style": "style1"}, {"style": "style2"}],
-#     ids=["uri9", "uri10"]
-# )
-# results = chroma_db.query(
-#     query_texts=["This is a query document"],
-#     n_results=2
-# )
-# print(results)

langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.85py3-none-any.whl → 0.1.219py3-none-any.whl