PyPI - langroid - Versions diffs - 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl - Mend

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

langroid/__init__.py +70 -0
langroid/agent/__init__.py +22 -0
langroid/agent/base.py +120 -33
langroid/agent/batch.py +134 -35
langroid/agent/callbacks/__init__.py +0 -0
langroid/agent/callbacks/chainlit.py +608 -0
langroid/agent/chat_agent.py +164 -100
langroid/agent/chat_document.py +19 -2
langroid/agent/openai_assistant.py +20 -10
langroid/agent/special/__init__.py +33 -10
langroid/agent/special/doc_chat_agent.py +521 -108
langroid/agent/special/lance_doc_chat_agent.py +258 -0
langroid/agent/special/lance_rag/__init__.py +9 -0
langroid/agent/special/lance_rag/critic_agent.py +136 -0
langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
langroid/agent/special/lance_tools.py +44 -0
langroid/agent/special/neo4j/__init__.py +0 -0
langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
langroid/agent/special/neo4j/utils/__init__.py +0 -0
langroid/agent/special/neo4j/utils/system_message.py +46 -0
langroid/agent/special/relevance_extractor_agent.py +23 -7
langroid/agent/special/retriever_agent.py +29 -174
langroid/agent/special/sql/__init__.py +7 -0
langroid/agent/special/sql/sql_chat_agent.py +47 -23
langroid/agent/special/sql/utils/__init__.py +11 -0
langroid/agent/special/sql/utils/description_extractors.py +95 -46
langroid/agent/special/sql/utils/populate_metadata.py +28 -21
langroid/agent/special/table_chat_agent.py +43 -9
langroid/agent/task.py +423 -114
langroid/agent/tool_message.py +67 -10
langroid/agent/tools/__init__.py +8 -0
langroid/agent/tools/duckduckgo_search_tool.py +66 -0
langroid/agent/tools/google_search_tool.py +11 -0
langroid/agent/tools/metaphor_search_tool.py +67 -0
langroid/agent/tools/recipient_tool.py +6 -24
langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
langroid/cachedb/__init__.py +6 -0
langroid/embedding_models/__init__.py +24 -0
langroid/embedding_models/base.py +9 -1
langroid/embedding_models/models.py +117 -17
langroid/embedding_models/protoc/embeddings.proto +19 -0
langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
langroid/embedding_models/remote_embeds.py +153 -0
langroid/language_models/__init__.py +22 -0
langroid/language_models/azure_openai.py +47 -4
langroid/language_models/base.py +26 -10
langroid/language_models/config.py +5 -0
langroid/language_models/openai_gpt.py +407 -121
langroid/language_models/prompt_formatter/__init__.py +9 -0
langroid/language_models/prompt_formatter/base.py +4 -6
langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
langroid/language_models/utils.py +10 -9
langroid/mytypes.py +10 -4
langroid/parsing/__init__.py +33 -1
langroid/parsing/document_parser.py +259 -63
langroid/parsing/image_text.py +32 -0
langroid/parsing/parse_json.py +143 -0
langroid/parsing/parser.py +20 -7
langroid/parsing/repo_loader.py +108 -46
langroid/parsing/search.py +8 -0
langroid/parsing/table_loader.py +44 -0
langroid/parsing/url_loader.py +59 -13
langroid/parsing/urls.py +18 -9
langroid/parsing/utils.py +130 -9
langroid/parsing/web_search.py +73 -0
langroid/prompts/__init__.py +7 -0
langroid/prompts/chat-gpt4-system-prompt.md +68 -0
langroid/prompts/prompts_config.py +1 -1
langroid/utils/__init__.py +10 -0
langroid/utils/algorithms/__init__.py +3 -0
langroid/utils/configuration.py +0 -1
langroid/utils/constants.py +4 -0
langroid/utils/logging.py +2 -5
langroid/utils/output/__init__.py +15 -2
langroid/utils/output/status.py +33 -0
langroid/utils/pandas_utils.py +30 -0
langroid/utils/pydantic_utils.py +446 -4
langroid/utils/system.py +36 -1
langroid/vector_store/__init__.py +34 -2
langroid/vector_store/base.py +33 -2
langroid/vector_store/chromadb.py +42 -13
langroid/vector_store/lancedb.py +226 -60
langroid/vector_store/meilisearch.py +7 -6
langroid/vector_store/momento.py +3 -2
langroid/vector_store/qdrantdb.py +82 -11
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
langroid-0.1.219.dist-info/RECORD +127 -0
langroid/agent/special/recipient_validator_agent.py +0 -157
langroid/parsing/json.py +0 -64
langroid/utils/web/selenium_login.py +0 -36
langroid-0.1.139.dist-info/RECORD +0 -103
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
{langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0

langroid/vector_store/chromadb.py CHANGED Viewed

@@ -1,8 +1,7 @@
+import json
 import logging
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-import chromadb
 from langroid.embedding_models.base import (
     EmbeddingModel,
     EmbeddingModelsConfig,
@@ -25,8 +24,19 @@ class ChromaDBConfig(VectorStoreConfig):
 class ChromaDB(VectorStore):
-    def __init__(self, config: ChromaDBConfig):
+    def __init__(self, config: ChromaDBConfig = ChromaDBConfig()):
         super().__init__(config)
+        try:
+            import chromadb
+        except ImportError:
+            raise ImportError(
+                """
+                ChromaDB is not installed by default with Langroid.
+                If you want to use it, please install it with the `chromadb` extra, e.g.
+                pip install "langroid[chromadb]"
+                or an equivalent command.
+                """
+            )
         self.config = config
         emb_model = EmbeddingModel.create(config.embedding)
         self.embedding_fn = emb_model.embedding_fn()
@@ -114,7 +124,9 @@ class ChromaDB(VectorStore):
             return
         contents: List[str] = [document.content for document in documents]
         # convert metadatas to dicts so chroma can handle them
-        metadata_dicts: List[dict[str, Any]] = [d.metadata.dict() for d in documents]
+        metadata_dicts: List[dict[str, Any]] = [
+            d.metadata.dict_bool_int() for d in documents
+        ]
         for m in metadata_dicts:
             # chroma does not handle non-atomic types in metadata
             m["window_ids"] = ",".join(m["window_ids"])
@@ -127,29 +139,43 @@ class ChromaDB(VectorStore):
             ids=ids,
         )
-    def get_all_documents(self) -> List[Document]:
-        results = self.collection.get(include=["documents", "metadatas"])
+    def get_all_documents(self, where: str = "") -> List[Document]:
+        filter = json.loads(where) if where else None
+        results = self.collection.get(
+            include=["documents", "metadatas"],
+            where=filter,
+        )
         results["documents"] = [results["documents"]]
         results["metadatas"] = [results["metadatas"]]
         return self._docs_from_results(results)
     def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
-        results = self.collection.get(ids=ids, include=["documents", "metadatas"])
-        results["documents"] = [results["documents"]]
-        results["metadatas"] = [results["metadatas"]]
-        return self._docs_from_results(results)
+        # get them one by one since chroma mangles the order of the results
+        # when fetched from a list of ids.
+        results = [
+            self.collection.get(ids=[id], include=["documents", "metadatas"])
+            for id in ids
+        ]
+        final_results = {}
+        final_results["documents"] = [[r["documents"][0] for r in results]]
+        final_results["metadatas"] = [[r["metadatas"][0] for r in results]]
+        return self._docs_from_results(final_results)
     def delete_collection(self, collection_name: str) -> None:
-        self.client.delete_collection(name=collection_name)
+        try:
+            self.client.delete_collection(name=collection_name)
+        except Exception:
+            pass
     def similar_texts_with_scores(
         self, text: str, k: int = 1, where: Optional[str] = None
     ) -> List[Tuple[Document, float]]:
         n = self.collection.count()
+        filter = json.loads(where) if where else None
         results = self.collection.query(
             query_texts=[text],
             n_results=min(n, k),
-            where=where,
+            where=filter,
             include=["documents", "distances", "metadatas"],
         )
         docs = self._docs_from_results(results)
@@ -175,7 +201,10 @@ class ChromaDB(VectorStore):
         metadatas = results["metadatas"][0]
         for m in metadatas:
             # restore the stringified list of window_ids into the original List[str]
-            m["window_ids"] = m["window_ids"].split(",")
+            if m["window_ids"].strip() == "":
+                m["window_ids"] = []
+            else:
+                m["window_ids"] = m["window_ids"].split(",")
         docs = [
             Document(content=d, metadata=DocMetaData(**m))
             for d, m in zip(contents, metadatas)

langroid/vector_store/lancedb.py CHANGED Viewed

@@ -2,9 +2,11 @@ import logging
 from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Type
 import lancedb
+import pandas as pd
 from dotenv import load_dotenv
 from lancedb.pydantic import LanceModel, Vector
-from pydantic import BaseModel, create_model
+from lancedb.query import LanceVectorQueryBuilder
+from pydantic import BaseModel, ValidationError, create_model
 from langroid.embedding_models.base import (
     EmbeddingModel,
@@ -14,6 +16,10 @@ from langroid.embedding_models.models import OpenAIEmbeddingsConfig
 from langroid.mytypes import Document, EmbeddingFunction
 from langroid.utils.configuration import settings
 from langroid.utils.pydantic_utils import (
+    dataframe_to_document_model,
+    dataframe_to_documents,
+    extend_document_class,
+    extra_metadata,
     flatten_pydantic_instance,
     flatten_pydantic_model,
     nested_dict_from_flat,
@@ -29,11 +35,14 @@ class LanceDBConfig(VectorStoreConfig):
     storage_path: str = ".lancedb/data"
     embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
     distance: str = "cosine"
+    # document_class is used to store in lancedb with right schema,
+    # and also to retrieve the right type of Documents when searching.
     document_class: Type[Document] = Document
+    flatten: bool = False  # flatten Document class into LanceSchema ?
 class LanceDB(VectorStore):
-    def __init__(self, config: LanceDBConfig):
+    def __init__(self, config: LanceDBConfig = LanceDBConfig()):
         super().__init__(config)
         self.config: LanceDBConfig = config
         emb_model = EmbeddingModel.create(config.embedding)
@@ -41,8 +50,10 @@ class LanceDB(VectorStore):
         self.embedding_dim = emb_model.embedding_dims
         self.host = config.host
         self.port = config.port
-        self.schema = self._create_lance_schema(self.config.document_class)
-        self.flat_schema = self._create_flat_lance_schema(self.config.document_class)
+        self.is_from_dataframe = False  # were docs ingested from a dataframe?
+        self.df_metadata_columns: List[str] = []  # metadata columns from dataframe
+        self._setup_schemas(config.document_class)
         load_dotenv()
         if self.config.cloud:
             logger.warning(
@@ -75,6 +86,15 @@ class LanceDB(VectorStore):
                 config.collection_name, replace=config.replace_collection
             )
+    def _setup_schemas(self, doc_cls: Type[Document] | None) -> None:
+        doc_cls = doc_cls or self.config.document_class
+        self.unflattened_schema = self._create_lance_schema(doc_cls)
+        self.schema = (
+            self._create_flat_lance_schema(doc_cls)
+            if self.config.flatten
+            else self.unflattened_schema
+        )
     def clear_empty_collections(self) -> int:
         coll_names = self.list_collections()
         n_deletes = 0
@@ -119,7 +139,7 @@ class LanceDB(VectorStore):
         Args:
             empty (bool, optional): Whether to include empty collections.
         """
-        colls = self.client.table_names()
+        colls = self.client.table_names(limit=None)
         if len(colls) == 0:
             return []
         if empty:  # include empty tbls
@@ -134,7 +154,7 @@ class LanceDB(VectorStore):
          - Vector field that has dims equal to
             the embedding dimension of the embedding model, and a data field of type
             DocClass.
-         - payload of type `doc_cls`
+         - other fields from doc_cls
         Args:
             doc_cls (Type[Document]): A Pydantic model which should be a subclass of
@@ -152,13 +172,20 @@ class LanceDB(VectorStore):
         n = self.embedding_dim
-        NewModel = create_model(
-            "NewModel",
-            __base__=LanceModel,
-            id=(str, ...),
-            vector=(Vector(n), ...),
-            payload=(doc_cls, ...),
+        # Prepare fields for the new model
+        fields = {"id": (str, ...), "vector": (Vector(n), ...)}
+        sorted_fields = dict(
+            sorted(doc_cls.__fields__.items(), key=lambda item: item[0])
         )
+        # Add both statically and dynamically defined fields from doc_cls
+        for field_name, field in sorted_fields.items():
+            fields[field_name] = (field.outer_type_, field.default)
+        # Create the new model with dynamic fields
+        NewModel = create_model(
+            "NewModel", __base__=LanceModel, **fields
+        )  # type: ignore
         return NewModel  # type: ignore
     def _create_flat_lance_schema(self, doc_cls: Type[Document]) -> Type[BaseModel]:
@@ -190,76 +217,218 @@ class LanceDB(VectorStore):
                     return
                 else:
                     logger.warning("Recreating fresh collection")
-        tbl = self.client.create_table(
-            collection_name, schema=self.flat_schema, mode="overwrite"
-        )
+        self.client.create_table(collection_name, schema=self.schema, mode="overwrite")
         if settings.debug:
             level = logger.getEffectiveLevel()
             logger.setLevel(logging.INFO)
-            logger.info(tbl.schema)
             logger.setLevel(level)
+    def _maybe_set_doc_class_schema(self, doc: Document) -> None:
+        """
+        Set the config.document_class and self.schema based on doc if needed
+        Args:
+            doc: an instance of Document, to be added to a collection
+        """
+        extra_metadata_fields = extra_metadata(doc, self.config.document_class)
+        if len(extra_metadata_fields) > 0:
+            logger.warning(
+                f"""
+                    Added documents contain extra metadata fields:
+                    {extra_metadata_fields}
+                    which were not present in the original config.document_class.
+                    Trying to change document_class and corresponding schemas.
+                    Overriding LanceDBConfig.document_class with an auto-generated
+                    Pydantic class that includes these extra fields.
+                    If this fails, or you see odd results, it is recommended that you
+                    define a subclass of Document, with metadata of class derived from
+                    DocMetaData, with extra fields defined via
+                    `Field(..., description="...")` declarations,
+                    and set this document class as the value of the
+                    LanceDBConfig.document_class attribute.
+                    """
+            )
+            doc_cls = extend_document_class(doc)
+            self.config.document_class = doc_cls
+            self._setup_schemas(doc_cls)
     def add_documents(self, documents: Sequence[Document]) -> None:
         super().maybe_add_ids(documents)
         colls = self.list_collections(empty=True)
         if len(documents) == 0:
             return
         embedding_vecs = self.embedding_fn([doc.content for doc in documents])
-        if self.config.collection_name is None:
+        coll_name = self.config.collection_name
+        if coll_name is None:
             raise ValueError("No collection name set, cannot ingest docs")
-        if self.config.collection_name not in colls:
-            self.create_collection(self.config.collection_name, replace=True)
+        self._maybe_set_doc_class_schema(documents[0])
+        if (
+            coll_name not in colls
+            or self.client.open_table(coll_name).head(1).shape[0] == 0
+        ):
+            # collection either doesn't exist or is empty, so replace it,
+            self.create_collection(coll_name, replace=True)
         ids = [str(d.id()) for d in documents]
         # don't insert all at once, batch in chunks of b,
         # else we get an API error
         b = self.config.batch_size
-        def make_batches() -> Generator[List[Dict[str, Any]], None, None]:
+        def make_batches() -> Generator[List[BaseModel], None, None]:
             for i in range(0, len(ids), b):
-                yield [
-                    flatten_pydantic_instance(
-                        self.schema(
-                            id=ids[i],
-                            vector=embedding_vecs[i],
-                            payload=doc,
-                        )
+                batch = [
+                    self.unflattened_schema(
+                        id=ids[i + j],
+                        vector=embedding_vecs[i + j],
+                        **doc.dict(),
                     )
-                    for i, doc in enumerate(documents[i : i + b])
+                    for j, doc in enumerate(documents[i : i + b])
                 ]
+                if self.config.flatten:
+                    batch = [
+                        flatten_pydantic_instance(instance)  # type: ignore
+                        for instance in batch
+                    ]
+                yield batch
         tbl = self.client.open_table(self.config.collection_name)
-        tbl.add(make_batches())
+        try:
+            tbl.add(make_batches())
+        except Exception as e:
+            logger.error(
+                f"""
+                Error adding documents to LanceDB: {e}
+                POSSIBLE REMEDY: Delete the LancdDB storage directory
+                {self.config.storage_path} and try again.
+                """
+            )
+    def add_dataframe(
+        self,
+        df: pd.DataFrame,
+        content: str = "content",
+        metadata: List[str] = [],
+    ) -> None:
+        """
+        Add a dataframe to the collection.
+        Args:
+            df (pd.DataFrame): A dataframe
+            content (str): The name of the column in the dataframe that contains the
+                text content to be embedded using the embedding model.
+            metadata (List[str]): A list of column names in the dataframe that contain
+                metadata to be stored in the database. Defaults to [].
+        """
+        self.is_from_dataframe = True
+        actual_metadata = metadata.copy()
+        self.df_metadata_columns = actual_metadata  # could be updated below
+        # get content column
+        content_values = df[content].values.tolist()
+        embedding_vecs = self.embedding_fn(content_values)
+        # add vector column
+        df["vector"] = embedding_vecs
+        if content != "content":
+            # rename content column to "content", leave existing column intact
+            df = df.rename(columns={content: "content"}, inplace=False)
+        if "id" not in df.columns:
+            docs = dataframe_to_documents(df, content="content", metadata=metadata)
+            ids = [str(d.id()) for d in docs]
+            df["id"] = ids
+        if "id" not in actual_metadata:
+            actual_metadata += ["id"]
+        colls = self.list_collections(empty=True)
+        coll_name = self.config.collection_name
+        if (
+            coll_name not in colls
+            or self.client.open_table(coll_name).head(1).shape[0] == 0
+        ):
+            # collection either doesn't exist or is empty, so replace it
+            # and set new schema from df
+            self.client.create_table(
+                self.config.collection_name,
+                data=df,
+                mode="overwrite",
+            )
+            doc_cls = dataframe_to_document_model(
+                df,
+                content=content,
+                metadata=actual_metadata,
+                exclude=["vector"],
+            )
+            self.config.document_class = doc_cls  # type: ignore
+            self._setup_schemas(doc_cls)  # type: ignore
+        else:
+            # collection exists and is not empty, so append to it
+            tbl = self.client.open_table(self.config.collection_name)
+            tbl.add(df)
     def delete_collection(self, collection_name: str) -> None:
-        self.client.drop_table(collection_name)
+        self.client.drop_table(collection_name, ignore_missing=True)
+    def _lance_result_to_docs(self, result: LanceVectorQueryBuilder) -> List[Document]:
+        if self.is_from_dataframe:
+            df = result.to_pandas()
+            return dataframe_to_documents(
+                df,
+                content="content",
+                metadata=self.df_metadata_columns,
+                doc_cls=self.config.document_class,
+            )
+        else:
+            records = result.to_arrow().to_pylist()
+            return self._records_to_docs(records)
-    def get_all_documents(self) -> List[Document]:
+    def _records_to_docs(self, records: List[Dict[str, Any]]) -> List[Document]:
+        if self.config.flatten:
+            docs = [
+                self.unflattened_schema(**nested_dict_from_flat(rec)) for rec in records
+            ]
+        else:
+            try:
+                docs = [self.schema(**rec) for rec in records]
+            except ValidationError as e:
+                raise ValueError(
+                    f"""
+                Error validating LanceDB result: {e}
+                HINT: This could happen when you're re-using an
+                existing LanceDB store with a different schema.
+                Try deleting your local lancedb storage at `{self.config.storage_path}`
+                re-ingesting your documents and/or replacing the collections.
+                """
+                )
+        doc_cls = self.config.document_class
+        doc_cls_field_names = doc_cls.__fields__.keys()
+        return [
+            doc_cls(
+                **{
+                    field_name: getattr(doc, field_name)
+                    for field_name in doc_cls_field_names
+                }
+            )
+            for doc in docs
+        ]
+    def get_all_documents(self, where: str = "") -> List[Document]:
         if self.config.collection_name is None:
             raise ValueError("No collection name set, cannot retrieve docs")
         tbl = self.client.open_table(self.config.collection_name)
-        records = tbl.search(None).to_arrow().to_pylist()
-        docs = [
-            self.config.document_class(
-                **(nested_dict_from_flat(rec, sub_dict="payload"))
-            )
-            for rec in records
-        ]
-        return docs
+        pre_result = tbl.search(None).where(where or None).limit(None)
+        return self._lance_result_to_docs(pre_result)
     def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
         if self.config.collection_name is None:
             raise ValueError("No collection name set, cannot retrieve docs")
         _ids = [str(id) for id in ids]
         tbl = self.client.open_table(self.config.collection_name)
-        records = [
-            tbl.search().where(f"id == '{_id}'").to_arrow().to_pylist()[0]
-            for _id in _ids
-        ]
-        doc_cls = self.config.document_class
-        docs = [
-            doc_cls(**(nested_dict_from_flat(rec, sub_dict="payload")))
-            for rec in records
-        ]
+        docs = []
+        for _id in _ids:
+            results = self._lance_result_to_docs(tbl.search().where(f"id == '{_id}'"))
+            if len(results) > 0:
+                docs.append(results[0])
         return docs
     def similar_texts_with_scores(
@@ -270,23 +439,20 @@ class LanceDB(VectorStore):
     ) -> List[Tuple[Document, float]]:
         embedding = self.embedding_fn([text])[0]
         tbl = self.client.open_table(self.config.collection_name)
-        records = (
+        result = (
             tbl.search(embedding)
             .metric(self.config.distance)
-            .where(where)
+            .where(where, prefilter=True)
             .limit(k)
-            .to_arrow()
-            .to_pylist()
         )
+        docs = self._lance_result_to_docs(result)
         # note _distance is 1 - cosine
-        scores = [1 - rec["_distance"] for rec in records]
-        docs = [
-            self.config.document_class(
-                **(nested_dict_from_flat(rec, sub_dict="payload"))
-            )
-            for rec in records
-        ]
+        if self.is_from_dataframe:
+            scores = [
+                1 - rec["_distance"] for rec in result.to_pandas().to_dict("records")
+            ]
+        else:
+            scores = [1 - rec["_distance"] for rec in result.to_arrow().to_pylist()]
         if len(docs) == 0:
             logger.warning(f"No matches found for {text}")
             return []

langroid/vector_store/meilisearch.py CHANGED Viewed

@@ -32,7 +32,7 @@ class MeiliSearchConfig(VectorStoreConfig):
 class MeiliSearch(VectorStore):
-    def __init__(self, config: MeiliSearchConfig):
+    def __init__(self, config: MeiliSearchConfig = MeiliSearchConfig()):
         super().__init__(config)
         self.config: MeiliSearchConfig = config
         self.host = config.host
@@ -165,7 +165,7 @@ class MeiliSearch(VectorStore):
         async with self.client() as client:
             index = client.index(collection_name)
             await index.add_documents_in_batches(
-                documents=documents,  # type: ignore
+                documents=documents,
                 batch_size=self.config.batch_size,
                 primary_key=self.config.primary_key,
             )
@@ -198,18 +198,19 @@ class MeiliSearch(VectorStore):
         except ValueError:
             return id
-    async def _async_get_documents(self) -> DocumentsInfo:
+    async def _async_get_documents(self, where: str = "") -> DocumentsInfo:
         if self.config.collection_name is None:
             raise ValueError("No collection name set, cannot retrieve docs")
+        filter = [] if where is None else where
         async with self.client() as client:
             index = client.index(self.config.collection_name)
-            documents = await index.get_documents(limit=10_000)
+            documents = await index.get_documents(limit=10_000, filter=filter)
         return documents
-    def get_all_documents(self) -> List[Document]:
+    def get_all_documents(self, where: str = "") -> List[Document]:
         if self.config.collection_name is None:
             raise ValueError("No collection name set, cannot retrieve docs")
-        docs = asyncio.run(self._async_get_documents())
+        docs = asyncio.run(self._async_get_documents(where))
         if docs is None:
             return []
         doc_results = docs.results

langroid/vector_store/momento.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Momento Vector Index.
 https://docs.momentohq.com/vector-index/develop/api-reference
 """
 import logging
 import os
 from typing import List, Optional, Sequence, Tuple, no_type_check
@@ -44,7 +45,7 @@ class MomentoVIConfig(VectorStoreConfig):
 class MomentoVI(VectorStore):
-    def __init__(self, config: MomentoVIConfig):
+    def __init__(self, config: MomentoVIConfig = MomentoVIConfig()):
         super().__init__(config)
         self.config: MomentoVIConfig = config
         emb_model = EmbeddingModel.create(config.embedding)
@@ -201,7 +202,7 @@ class MomentoVI(VectorStore):
         except ValueError:
             return id
-    def get_all_documents(self) -> List[Document]:
+    def get_all_documents(self, where: str = "") -> List[Document]:
         raise NotImplementedError(
             """
             MomentoVI does not support get_all_documents().

langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

langroid 0.1.139py3-none-any.whl → 0.1.219py3-none-any.whl