PyPI - langchain-postgres - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

langchain-postgres 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

langchain_postgres/vectorstores.py CHANGED Viewed

@@ -1,36 +1,50 @@
+# pylint: disable=too-many-lines
 from __future__ import annotations
+import contextlib
 import enum
 import logging
 import uuid
 from typing import (
     Any,
+    AsyncGenerator,
     Callable,
     Dict,
+    Generator,
     Iterable,
     List,
     Optional,
+    Sequence,
     Tuple,
     Type,
     Union,
 )
+from typing import (
+    cast as typing_cast,
+)
 import numpy as np
 import sqlalchemy
-from sqlalchemy import SQLColumnExpression, cast, delete, func
-from sqlalchemy.dialects.postgresql import JSON, JSONB, JSONPATH, UUID, insert
-from sqlalchemy.orm import Session, relationship, sessionmaker
-try:
-    from sqlalchemy.orm import declarative_base
-except ImportError:
-    from sqlalchemy.ext.declarative import declarative_base
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
-from langchain_core.runnables.config import run_in_executor
 from langchain_core.utils import get_from_dict_or_env
 from langchain_core.vectorstores import VectorStore
+from sqlalchemy import SQLColumnExpression, cast, create_engine, delete, func, select
+from sqlalchemy.dialects.postgresql import JSON, JSONB, JSONPATH, UUID, insert
+from sqlalchemy.engine import Connection, Engine
+from sqlalchemy.ext.asyncio import (
+    AsyncEngine,
+    AsyncSession,
+    async_sessionmaker,
+    create_async_engine,
+)
+from sqlalchemy.orm import (
+    Session,
+    declarative_base,
+    relationship,
+    scoped_session,
+    sessionmaker,
+)
 from langchain_postgres._utils import maximal_marginal_relevance
@@ -74,7 +88,7 @@ TEXT_OPERATORS = {
     "$ilike",
 }
-LOGICAL_OPERATORS = {"$and", "$or"}
+LOGICAL_OPERATORS = {"$and", "$or", "$not"}
 SUPPORTED_OPERATORS = (
     set(COMPARISONS_TO_NATIVE)
@@ -112,7 +126,27 @@ def _get_embedding_collection_store(vector_dimension: Optional[int] = None) -> A
         def get_by_name(
             cls, session: Session, name: str
         ) -> Optional["CollectionStore"]:
-            return session.query(cls).filter(cls.name == name).first()  # type: ignore
+            return (
+                session.query(cls)
+                .filter(typing_cast(sqlalchemy.Column, cls.name) == name)
+                .first()
+            )
+        @classmethod
+        async def aget_by_name(
+            cls, session: AsyncSession, name: str
+        ) -> Optional["CollectionStore"]:
+            return (
+                (
+                    await session.execute(
+                        select(CollectionStore).where(
+                            typing_cast(sqlalchemy.Column, cls.name) == name
+                        )
+                    )
+                )
+                .scalars()
+                .first()
+            )
         @classmethod
         def get_or_create(
@@ -136,6 +170,28 @@ def _get_embedding_collection_store(vector_dimension: Optional[int] = None) -> A
             created = True
             return collection, created
+        @classmethod
+        async def aget_or_create(
+            cls,
+            session: AsyncSession,
+            name: str,
+            cmetadata: Optional[dict] = None,
+        ) -> Tuple["CollectionStore", bool]:
+            """
+            Get or create a collection.
+            Returns [Collection, bool] where the bool is True if the collection was created.
+            """  # noqa: E501
+            created = False
+            collection = await cls.aget_by_name(session, name)
+            if collection:
+                return collection, created
+            collection = cls(name=name, cmetadata=cmetadata)
+            session.add(collection)
+            await session.commit()
+            created = True
+            return collection, created
     class EmbeddingStore(Base):
         """Embedding store."""
@@ -177,7 +233,16 @@ def _results_to_docs(docs_and_scores: Any) -> List[Document]:
     return [doc for doc, _ in docs_and_scores]
-Connection = Union[sqlalchemy.engine.Engine, str]
+def _create_vector_extension(conn: Connection) -> None:
+    statement = sqlalchemy.text(
+        "SELECT pg_advisory_xact_lock(1573678846307946496);"
+        "CREATE EXTENSION IF NOT EXISTS vector;"
+    )
+    conn.execute(statement)
+    conn.commit()
+DBConnection = Union[sqlalchemy.engine.Engine, str]
 class PGVector(VectorStore):
@@ -215,6 +280,7 @@ class PGVector(VectorStore):
                 connection=connection_string,
                 collection_name=collection_name,
                 use_jsonb=True,
+                async_mode=False,
             )
@@ -232,13 +298,52 @@ class PGVector(VectorStore):
       You will need to recreate the tables if you are using an existing database.
     * A Connection object has to be provided explicitly. Connections will not be
       picked up automatically based on env variables.
+    * langchain_postgres now accept async connections. If you want to use the async
+        version, you need to set `async_mode=True` when initializing the store or
+        use an async engine.
+    Supported filter operators:
+    * $eq: Equality operator
+    * $ne: Not equal operator
+    * $lt: Less than operator
+    * $lte: Less than or equal operator
+    * $gt: Greater than operator
+    * $gte: Greater than or equal operator
+    * $in: In operator
+    * $nin: Not in operator
+    * $between: Between operator
+    * $exists: Exists operator
+    * $like: Like operator
+    * $ilike: Case insensitive like operator
+    * $and: Logical AND operator
+    * $or: Logical OR operator
+    * $not: Logical NOT operator
+    Example:
+    .. code-block:: python
+        vectorstore.similarity_search('kitty', k=10, filter={
+            'id': {'$in': [1, 5, 2, 9]}
+        })
+        #%% md
+        If you provide a dict with multiple fields, but no operators,
+        the top level will be interpreted as a logical **AND** filter
+        vectorstore.similarity_search('ducks', k=10, filter={
+            'id': {'$in': [1, 5, 2, 9]},
+            'location': {'$in': ["pond", "market"]}
+        })
     """
     def __init__(
         self,
         embeddings: Embeddings,
         *,
-        connection: Optional[Connection] = None,
+        connection: Union[None, DBConnection, Engine, AsyncEngine, str] = None,
         embedding_length: Optional[int] = None,
         collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
         collection_metadata: Optional[dict] = None,
@@ -249,11 +354,13 @@ class PGVector(VectorStore):
         engine_args: Optional[dict[str, Any]] = None,
         use_jsonb: bool = True,
         create_extension: bool = True,
+        async_mode: bool = False,
     ) -> None:
         """Initialize the PGVector store.
+        For an async version, use `PGVector.acreate()` instead.
         Args:
-            connection: Postgres connection string.
+            connection: Postgres connection string or (async)engine.
             embeddings: Any embedding function implementing
                 `langchain.embeddings.base.Embeddings` interface.
             embedding_length: The length of the embedding vector. (default: None)
@@ -277,6 +384,7 @@ class PGVector(VectorStore):
                 doesn't exist. disabling creation is useful when using ReadOnly
                 Databases.
         """
+        self.async_mode = async_mode
         self.embedding_function = embeddings
         self._embedding_length = embedding_length
         self.collection_name = collection_name
@@ -285,20 +393,33 @@ class PGVector(VectorStore):
         self.pre_delete_collection = pre_delete_collection
         self.logger = logger or logging.getLogger(__name__)
         self.override_relevance_score_fn = relevance_score_fn
+        self._engine: Optional[Engine] = None
+        self._async_engine: Optional[AsyncEngine] = None
+        self._async_init = False
         if isinstance(connection, str):
-            self._engine = sqlalchemy.create_engine(
-                url=connection, **(engine_args or {})
-            )
-        elif isinstance(connection, sqlalchemy.engine.Engine):
+            if async_mode:
+                self._async_engine = create_async_engine(
+                    connection, **(engine_args or {})
+                )
+            else:
+                self._engine = create_engine(url=connection, **(engine_args or {}))
+        elif isinstance(connection, Engine):
+            self.async_mode = False
             self._engine = connection
+        elif isinstance(connection, AsyncEngine):
+            self.async_mode = True
+            self._async_engine = connection
         else:
             raise ValueError(
                 "connection should be a connection string or an instance of "
-                "sqlalchemy.engine.Engine"
+                "sqlalchemy.engine.Engine or sqlalchemy.ext.asyncio.engine.AsyncEngine"
             )
-        self._session_maker = sessionmaker(bind=self._engine)
+        self.session_maker: Union[scoped_session, async_sessionmaker]
+        if self.async_mode:
+            self.session_maker = async_sessionmaker(bind=self._async_engine)
+        else:
+            self.session_maker = scoped_session(sessionmaker(bind=self._engine))
         self.use_jsonb = use_jsonb
         self.create_extension = create_extension
@@ -306,7 +427,8 @@ class PGVector(VectorStore):
         if not use_jsonb:
             # Replace with a deprecation warning.
             raise NotImplementedError("use_jsonb=False is no longer supported.")
-        self.__post_init__()
+        if not self.async_mode:
+            self.__post_init__()
     def __post_init__(
         self,
@@ -323,52 +445,99 @@ class PGVector(VectorStore):
         self.create_tables_if_not_exists()
         self.create_collection()
-    def __del__(self) -> None:
-        if isinstance(self._engine, sqlalchemy.engine.Connection):
-            self._engine.close()
+    async def __apost_init__(
+        self,
+    ) -> None:
+        """Async initialize the store (use lazy approach)."""
+        if self._async_init:  # Warning: possible race condition
+            return
+        self._async_init = True
+        EmbeddingStore, CollectionStore = _get_embedding_collection_store(
+            self._embedding_length
+        )
+        self.CollectionStore = CollectionStore
+        self.EmbeddingStore = EmbeddingStore
+        if self.create_extension:
+            await self.acreate_vector_extension()
+        await self.acreate_tables_if_not_exists()
+        await self.acreate_collection()
     @property
     def embeddings(self) -> Embeddings:
         return self.embedding_function
     def create_vector_extension(self) -> None:
+        assert self._engine, "engine not found"
         try:
-            with self._session_maker() as session:  # type: ignore[arg-type]
-                # The advisor lock fixes issue arising from concurrent
-                # creation of the vector extension.
-                # https://github.com/langchain-ai/langchain/issues/12933
-                # For more information see:
-                # https://www.postgresql.org/docs/16/explicit-locking.html#ADVISORY-LOCKS
-                statement = sqlalchemy.text(
-                    "BEGIN;"
-                    "SELECT pg_advisory_xact_lock(1573678846307946496);"
-                    "CREATE EXTENSION IF NOT EXISTS vector;"
-                    "COMMIT;"
-                )
-                session.execute(statement)
-                session.commit()
+            with self._engine.connect() as conn:
+                _create_vector_extension(conn)
         except Exception as e:
             raise Exception(f"Failed to create vector extension: {e}") from e
+    async def acreate_vector_extension(self) -> None:
+        assert self._async_engine, "_async_engine not found"
+        async with self._async_engine.begin() as conn:
+            await conn.run_sync(_create_vector_extension)
     def create_tables_if_not_exists(self) -> None:
-        with self._session_maker() as session:
+        with self._make_sync_session() as session:
             Base.metadata.create_all(session.get_bind())
+            session.commit()
+    async def acreate_tables_if_not_exists(self) -> None:
+        assert self._async_engine, "This method must be called with async_mode"
+        async with self._async_engine.begin() as conn:
+            await conn.run_sync(Base.metadata.create_all)
     def drop_tables(self) -> None:
-        with self._session_maker() as session:
+        with self._make_sync_session() as session:
             Base.metadata.drop_all(session.get_bind())
+            session.commit()
+    async def adrop_tables(self) -> None:
+        assert self._async_engine, "This method must be called with async_mode"
+        await self.__apost_init__()  # Lazy async init
+        async with self._async_engine.begin() as conn:
+            await conn.run_sync(Base.metadata.drop_all)
     def create_collection(self) -> None:
         if self.pre_delete_collection:
             self.delete_collection()
-        with self._session_maker() as session:  # type: ignore[arg-type]
+        with self._make_sync_session() as session:
             self.CollectionStore.get_or_create(
                 session, self.collection_name, cmetadata=self.collection_metadata
             )
+            session.commit()
+    async def acreate_collection(self) -> None:
+        await self.__apost_init__()  # Lazy async init
+        async with self._make_async_session() as session:
+            if self.pre_delete_collection:
+                await self._adelete_collection(session)
+            await self.CollectionStore.aget_or_create(
+                session, self.collection_name, cmetadata=self.collection_metadata
+            )
+            await session.commit()
+    def _delete_collection(self, session: Session) -> None:
+        collection = self.get_collection(session)
+        if not collection:
+            self.logger.warning("Collection not found")
+            return
+        session.delete(collection)
+    async def _adelete_collection(self, session: AsyncSession) -> None:
+        collection = await self.aget_collection(session)
+        if not collection:
+            self.logger.warning("Collection not found")
+            return
+        await session.delete(collection)
     def delete_collection(self) -> None:
-        self.logger.debug("Trying to delete collection")
-        with self._session_maker() as session:  # type: ignore[arg-type]
+        with self._make_sync_session() as session:
             collection = self.get_collection(session)
             if not collection:
                 self.logger.warning("Collection not found")
@@ -376,6 +545,16 @@ class PGVector(VectorStore):
             session.delete(collection)
             session.commit()
+    async def adelete_collection(self) -> None:
+        await self.__apost_init__()  # Lazy async init
+        async with self._make_async_session() as session:
+            collection = await self.aget_collection(session)
+            if not collection:
+                self.logger.warning("Collection not found")
+                return
+            await session.delete(collection)
+            await session.commit()
     def delete(
         self,
         ids: Optional[List[str]] = None,
@@ -388,7 +567,7 @@ class PGVector(VectorStore):
             ids: List of ids to delete.
             collection_only: Only delete ids in the collection.
         """
-        with self._session_maker() as session:
+        with self._make_sync_session() as session:
             if ids is not None:
                 self.logger.debug(
                     "Trying to delete vectors by ids (represented by the model "
@@ -411,9 +590,51 @@ class PGVector(VectorStore):
                 session.execute(stmt)
             session.commit()
+    async def adelete(
+        self,
+        ids: Optional[List[str]] = None,
+        collection_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """Async delete vectors by ids or uuids.
+        Args:
+            ids: List of ids to delete.
+            collection_only: Only delete ids in the collection.
+        """
+        await self.__apost_init__()  # Lazy async init
+        async with self._make_async_session() as session:
+            if ids is not None:
+                self.logger.debug(
+                    "Trying to delete vectors by ids (represented by the model "
+                    "using the custom ids field)"
+                )
+                stmt = delete(self.EmbeddingStore)
+                if collection_only:
+                    collection = await self.aget_collection(session)
+                    if not collection:
+                        self.logger.warning("Collection not found")
+                        return
+                    stmt = stmt.where(
+                        self.EmbeddingStore.collection_id == collection.uuid
+                    )
+                stmt = stmt.where(self.EmbeddingStore.id.in_(ids))
+                await session.execute(stmt)
+            await session.commit()
     def get_collection(self, session: Session) -> Any:
+        assert not self._async_engine, "This method must be called without async_mode"
         return self.CollectionStore.get_by_name(session, self.collection_name)
+    async def aget_collection(self, session: AsyncSession) -> Any:
+        assert self._async_engine, "This method must be called with async_mode"
+        await self.__apost_init__()  # Lazy async init
+        return await self.CollectionStore.aget_by_name(session, self.collection_name)
     @classmethod
     def __from(
         cls,
@@ -452,6 +673,45 @@ class PGVector(VectorStore):
         return store
+    @classmethod
+    async def __afrom(
+        cls,
+        texts: List[str],
+        embeddings: List[List[float]],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        connection: Optional[str] = None,
+        pre_delete_collection: bool = False,
+        *,
+        use_jsonb: bool = True,
+        **kwargs: Any,
+    ) -> PGVector:
+        if ids is None:
+            ids = [str(uuid.uuid1()) for _ in texts]
+        if not metadatas:
+            metadatas = [{} for _ in texts]
+        store = cls(
+            connection=connection,
+            collection_name=collection_name,
+            embeddings=embedding,
+            distance_strategy=distance_strategy,
+            pre_delete_collection=pre_delete_collection,
+            use_jsonb=use_jsonb,
+            async_mode=True,
+            **kwargs,
+        )
+        await store.aadd_embeddings(
+            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+        )
+        return store
     def add_embeddings(
         self,
         texts: Iterable[str],
@@ -466,15 +726,18 @@ class PGVector(VectorStore):
             texts: Iterable of strings to add to the vectorstore.
             embeddings: List of list of embedding vectors.
             metadatas: List of metadatas associated with the texts.
+            ids: Optional list of ids for the documents.
+                 If not provided, will generate a new id for each document.
             kwargs: vectorstore specific parameters
         """
+        assert not self._async_engine, "This method must be called with sync_mode"
         if ids is None:
             ids = [str(uuid.uuid4()) for _ in texts]
         if not metadatas:
             metadatas = [{} for _ in texts]
-        with self._session_maker() as session:  # type: ignore[arg-type]
+        with self._make_sync_session() as session:  # type: ignore[arg-type]
             collection = self.get_collection(session)
             if not collection:
                 raise ValueError("Collection not found")
@@ -505,6 +768,62 @@ class PGVector(VectorStore):
         return ids
+    async def aadd_embeddings(
+        self,
+        texts: Iterable[str],
+        embeddings: List[List[float]],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Async add embeddings to the vectorstore.
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            embeddings: List of list of embedding vectors.
+            metadatas: List of metadatas associated with the texts.
+            ids: Optional list of ids for the texts.
+                 If not provided, will generate a new id for each text.
+            kwargs: vectorstore specific parameters
+        """
+        await self.__apost_init__()  # Lazy async init
+        if ids is None:
+            ids = [str(uuid.uuid1()) for _ in texts]
+        if not metadatas:
+            metadatas = [{} for _ in texts]
+        async with self._make_async_session() as session:  # type: ignore[arg-type]
+            collection = await self.aget_collection(session)
+            if not collection:
+                raise ValueError("Collection not found")
+            data = [
+                {
+                    "id": id,
+                    "collection_id": collection.uuid,
+                    "embedding": embedding,
+                    "document": text,
+                    "cmetadata": metadata or {},
+                }
+                for text, metadata, embedding, id in zip(
+                    texts, metadatas, embeddings, ids
+                )
+            ]
+            stmt = insert(self.EmbeddingStore).values(data)
+            on_conflict_stmt = stmt.on_conflict_do_update(
+                index_elements=["id"],
+                # Conflict detection based on these columns
+                set_={
+                    "embedding": stmt.excluded.embedding,
+                    "document": stmt.excluded.document,
+                    "cmetadata": stmt.excluded.cmetadata,
+                },
+            )
+            await session.execute(on_conflict_stmt)
+            await session.commit()
+        return ids
     def add_texts(
         self,
         texts: Iterable[str],
@@ -517,16 +836,44 @@ class PGVector(VectorStore):
         Args:
             texts: Iterable of strings to add to the vectorstore.
             metadatas: Optional list of metadatas associated with the texts.
+            ids: Optional list of ids for the texts.
+                 If not provided, will generate a new id for each text.
             kwargs: vectorstore specific parameters
         Returns:
             List of ids from adding the texts into the vectorstore.
         """
+        assert not self._async_engine, "This method must be called without async_mode"
         embeddings = self.embedding_function.embed_documents(list(texts))
         return self.add_embeddings(
             texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
         )
+    async def aadd_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the embeddings and add to the vectorstore.
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            ids: Optional list of ids for the texts.
+                 If not provided, will generate a new id for each text.
+            kwargs: vectorstore specific parameters
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        await self.__apost_init__()  # Lazy async init
+        embeddings = await self.embedding_function.aembed_documents(list(texts))
+        return await self.aadd_embeddings(
+            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+        )
     def similarity_search(
         self,
         query: str,
@@ -544,6 +891,7 @@ class PGVector(VectorStore):
         Returns:
             List of Documents most similar to the query.
         """
+        assert not self._async_engine, "This method must be called without async_mode"
         embedding = self.embedding_function.embed_query(text=query)
         return self.similarity_search_by_vector(
             embedding=embedding,
@@ -551,6 +899,31 @@ class PGVector(VectorStore):
             filter=filter,
         )
+    async def asimilarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Run similarity search with PGVector with distance.
+        Args:
+            query (str): Query text to search for.
+            k (int): Number of results to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List of Documents most similar to the query.
+        """
+        await self.__apost_init__()  # Lazy async init
+        embedding = self.embedding_function.embed_query(text=query)
+        return await self.asimilarity_search_by_vector(
+            embedding=embedding,
+            k=k,
+            filter=filter,
+        )
     def similarity_search_with_score(
         self,
         query: str,
@@ -567,12 +940,36 @@ class PGVector(VectorStore):
         Returns:
             List of Documents most similar to the query and score for each.
         """
+        assert not self._async_engine, "This method must be called without async_mode"
         embedding = self.embedding_function.embed_query(query)
         docs = self.similarity_search_with_score_by_vector(
             embedding=embedding, k=k, filter=filter
         )
         return docs
+    async def asimilarity_search_with_score(
+        self,
+        query: str,
+        k: int = 4,
+        filter: Optional[dict] = None,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List of Documents most similar to the query and score for each.
+        """
+        await self.__apost_init__()  # Lazy async init
+        embedding = self.embedding_function.embed_query(query)
+        docs = await self.asimilarity_search_with_score_by_vector(
+            embedding=embedding, k=k, filter=filter
+        )
+        return docs
     @property
     def distance_strategy(self) -> Any:
         if self._distance_strategy == DistanceStrategy.EUCLIDEAN:
@@ -593,10 +990,25 @@ class PGVector(VectorStore):
         k: int = 4,
         filter: Optional[dict] = None,
     ) -> List[Tuple[Document, float]]:
+        assert not self._async_engine, "This method must be called without async_mode"
         results = self.__query_collection(embedding=embedding, k=k, filter=filter)
         return self._results_to_docs_and_scores(results)
+    async def asimilarity_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        filter: Optional[dict] = None,
+    ) -> List[Tuple[Document, float]]:
+        await self.__apost_init__()  # Lazy async init
+        async with self._make_async_session() as session:  # type: ignore[arg-type]
+            results = await self.__aquery_collection(
+                session=session, embedding=embedding, k=k, filter=filter
+            )
+            return self._results_to_docs_and_scores(results)
     def _results_to_docs_and_scores(self, results: Any) -> List[Tuple[Document, float]]:
         """Return docs and scores from results."""
         docs = [
@@ -698,6 +1110,11 @@ class PGVector(VectorStore):
                             f"Unsupported type: {type(val)} for value: {val}"
                         )
+                    if isinstance(val, bool):  # b/c bool is an instance of int
+                        raise NotImplementedError(
+                            f"Unsupported type: {type(val)} for value: {val}"
+                        )
             queried_field = self.EmbeddingStore.cmetadata[field].astext
             if operator in {"$in"}:
@@ -832,26 +1249,25 @@ class PGVector(VectorStore):
         """
         if isinstance(filters, dict):
             if len(filters) == 1:
-                # The only operators allowed at the top level are $AND and $OR
+                # The only operators allowed at the top level are $AND, $OR, and $NOT
                 # First check if an operator or a field
                 key, value = list(filters.items())[0]
                 if key.startswith("$"):
                     # Then it's an operator
-                    if key.lower() not in ["$and", "$or"]:
+                    if key.lower() not in ["$and", "$or", "$not"]:
                         raise ValueError(
-                            f"Invalid filter condition. Expected $and or $or "
+                            f"Invalid filter condition. Expected $and, $or or $not "
                             f"but got: {key}"
                         )
                 else:
                     # Then it's a field
                     return self._handle_field_filter(key, filters[key])
-                # Here we handle the $and and $or operators
-                if not isinstance(value, list):
-                    raise ValueError(
-                        f"Expected a list, but got {type(value)} for value: {value}"
-                    )
                 if key.lower() == "$and":
+                    if not isinstance(value, list):
+                        raise ValueError(
+                            f"Expected a list, but got {type(value)} for value: {value}"
+                        )
                     and_ = [self._create_filter_clause(el) for el in value]
                     if len(and_) > 1:
                         return sqlalchemy.and_(*and_)
@@ -863,6 +1279,10 @@ class PGVector(VectorStore):
                             "but got an empty dictionary"
                         )
                 elif key.lower() == "$or":
+                    if not isinstance(value, list):
+                        raise ValueError(
+                            f"Expected a list, but got {type(value)} for value: {value}"
+                        )
                     or_ = [self._create_filter_clause(el) for el in value]
                     if len(or_) > 1:
                         return sqlalchemy.or_(*or_)
@@ -873,9 +1293,29 @@ class PGVector(VectorStore):
                             "Invalid filter condition. Expected a dictionary "
                             "but got an empty dictionary"
                         )
+                elif key.lower() == "$not":
+                    if isinstance(value, list):
+                        not_conditions = [
+                            self._create_filter_clause(item) for item in value
+                        ]
+                        not_ = sqlalchemy.and_(
+                            *[
+                                sqlalchemy.not_(condition)
+                                for condition in not_conditions
+                            ]
+                        )
+                        return not_
+                    elif isinstance(value, dict):
+                        not_ = self._create_filter_clause(value)
+                        return sqlalchemy.not_(not_)
+                    else:
+                        raise ValueError(
+                            f"Invalid filter condition. Expected a dictionary "
+                            f"or a list but got: {type(value)}"
+                        )
                 else:
                     raise ValueError(
-                        f"Invalid filter condition. Expected $and or $or "
+                        f"Invalid filter condition. Expected $and, $or or $not "
                         f"but got: {key}"
                     )
             elif len(filters) > 1:
@@ -908,9 +1348,9 @@ class PGVector(VectorStore):
         embedding: List[float],
         k: int = 4,
         filter: Optional[Dict[str, str]] = None,
-    ) -> List[Any]:
+    ) -> Sequence[Any]:
         """Query the collection."""
-        with self._session_maker() as session:  # type: ignore[arg-type]
+        with self._make_sync_session() as session:  # type: ignore[arg-type]
             collection = self.get_collection(session)
             if not collection:
                 raise ValueError("Collection not found")
@@ -931,7 +1371,7 @@ class PGVector(VectorStore):
             results: List[Any] = (
                 session.query(
                     self.EmbeddingStore,
-                    self.distance_strategy(embedding).label("distance"),  # type: ignore
+                    self.distance_strategy(embedding).label("distance"),
                 )
                 .filter(*filter_by)
                 .order_by(sqlalchemy.asc("distance"))
@@ -945,6 +1385,50 @@ class PGVector(VectorStore):
         return results
+    async def __aquery_collection(
+        self,
+        session: AsyncSession,
+        embedding: List[float],
+        k: int = 4,
+        filter: Optional[Dict[str, str]] = None,
+    ) -> Sequence[Any]:
+        """Query the collection."""
+        async with self._make_async_session() as session:  # type: ignore[arg-type]
+            collection = await self.aget_collection(session)
+            if not collection:
+                raise ValueError("Collection not found")
+            filter_by = [self.EmbeddingStore.collection_id == collection.uuid]
+            if filter:
+                if self.use_jsonb:
+                    filter_clauses = self._create_filter_clause(filter)
+                    if filter_clauses is not None:
+                        filter_by.append(filter_clauses)
+                else:
+                    # Old way of doing things
+                    filter_clauses = self._create_filter_clause_json_deprecated(filter)
+                    filter_by.extend(filter_clauses)
+            _type = self.EmbeddingStore
+            stmt = (
+                select(
+                    self.EmbeddingStore,
+                    self.distance_strategy(embedding).label("distance"),
+                )
+                .filter(*filter_by)
+                .order_by(sqlalchemy.asc("distance"))
+                .join(
+                    self.CollectionStore,
+                    self.EmbeddingStore.collection_id == self.CollectionStore.uuid,
+                )
+                .limit(k)
+            )
+            results: Sequence[Any] = (await session.execute(stmt)).all()
+            return results
     def similarity_search_by_vector(
         self,
         embedding: List[float],
@@ -962,11 +1446,36 @@ class PGVector(VectorStore):
         Returns:
             List of Documents most similar to the query vector.
         """
+        assert not self._async_engine, "This method must be called without async_mode"
         docs_and_scores = self.similarity_search_with_score_by_vector(
             embedding=embedding, k=k, filter=filter
         )
         return _results_to_docs(docs_and_scores)
+    async def asimilarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        filter: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List of Documents most similar to the query vector.
+        """
+        assert self._async_engine, "This method must be called with async_mode"
+        await self.__apost_init__()  # Lazy async init
+        docs_and_scores = await self.asimilarity_search_with_score_by_vector(
+            embedding=embedding, k=k, filter=filter
+        )
+        return _results_to_docs(docs_and_scores)
     @classmethod
     def from_texts(
         cls: Type[PGVector],
@@ -997,6 +1506,35 @@ class PGVector(VectorStore):
             **kwargs,
         )
+    @classmethod
+    async def afrom_texts(
+        cls: Type[PGVector],
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        pre_delete_collection: bool = False,
+        *,
+        use_jsonb: bool = True,
+        **kwargs: Any,
+    ) -> PGVector:
+        """Return VectorStore initialized from documents and embeddings."""
+        embeddings = embedding.embed_documents(list(texts))
+        return await cls.__afrom(
+            texts,
+            embeddings,
+            embedding,
+            metadatas=metadatas,
+            ids=ids,
+            collection_name=collection_name,
+            distance_strategy=distance_strategy,
+            pre_delete_collection=pre_delete_collection,
+            use_jsonb=use_jsonb,
+            **kwargs,
+        )
     @classmethod
     def from_embeddings(
         cls,
@@ -1019,6 +1557,7 @@ class PGVector(VectorStore):
             collection_name: Name of the collection.
             distance_strategy: Distance strategy to use.
             ids: Optional list of ids for the documents.
+                 If not provided, will generate a new id for each document.
             pre_delete_collection: If True, will delete the collection if it exists.
                 **Attention**: This will delete all the documents in the existing
                 collection.
@@ -1053,6 +1592,51 @@ class PGVector(VectorStore):
             **kwargs,
         )
+    @classmethod
+    async def afrom_embeddings(
+        cls,
+        text_embeddings: List[Tuple[str, List[float]]],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        pre_delete_collection: bool = False,
+        **kwargs: Any,
+    ) -> PGVector:
+        """Construct PGVector wrapper from raw documents and pre-
+        generated embeddings.
+        Return VectorStore initialized from documents and embeddings.
+        Postgres connection string is required
+        "Either pass it as a parameter
+        or set the PGVECTOR_CONNECTION_STRING environment variable.
+        Example:
+            .. code-block:: python
+                from langchain_community.vectorstores import PGVector
+                from langchain_community.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                text_embeddings = embeddings.embed_documents(texts)
+                text_embedding_pairs = list(zip(texts, text_embeddings))
+                faiss = PGVector.from_embeddings(text_embedding_pairs, embeddings)
+        """
+        texts = [t[0] for t in text_embeddings]
+        embeddings = [t[1] for t in text_embeddings]
+        return await cls.__afrom(
+            texts,
+            embeddings,
+            embedding,
+            metadatas=metadatas,
+            ids=ids,
+            collection_name=collection_name,
+            distance_strategy=distance_strategy,
+            pre_delete_collection=pre_delete_collection,
+            **kwargs,
+        )
     @classmethod
     def from_existing_index(
         cls: Type[PGVector],
@@ -1061,7 +1645,7 @@ class PGVector(VectorStore):
         collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
         distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
         pre_delete_collection: bool = False,
-        connection: Optional[Connection] = None,
+        connection: Optional[DBConnection] = None,
         **kwargs: Any,
     ) -> PGVector:
         """
@@ -1080,11 +1664,39 @@ class PGVector(VectorStore):
         return store
+    @classmethod
+    async def afrom_existing_index(
+        cls: Type[PGVector],
+        embedding: Embeddings,
+        *,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        pre_delete_collection: bool = False,
+        connection: Optional[DBConnection] = None,
+        **kwargs: Any,
+    ) -> PGVector:
+        """
+        Get instance of an existing PGVector store.This method will
+        return the instance of the store without inserting any new
+        embeddings
+        """
+        store = PGVector(
+            connection=connection,
+            collection_name=collection_name,
+            embeddings=embedding,
+            distance_strategy=distance_strategy,
+            pre_delete_collection=pre_delete_collection,
+            async_mode=True,
+            **kwargs,
+        )
+        return store
     @classmethod
     def get_connection_string(cls, kwargs: Dict[str, Any]) -> str:
         connection_string: str = get_from_dict_or_env(
             data=kwargs,
-            key="connection_string",
+            key="connection",
             env_key="PGVECTOR_CONNECTION_STRING",
         )
@@ -1103,7 +1715,7 @@ class PGVector(VectorStore):
         documents: List[Document],
         embedding: Embeddings,
         *,
-        connection: Optional[Connection] = None,
+        connection: Optional[DBConnection] = None,
         collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
         distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
         ids: Optional[List[str]] = None,
@@ -1129,6 +1741,44 @@ class PGVector(VectorStore):
             **kwargs,
         )
+    @classmethod
+    async def afrom_documents(
+        cls: Type[PGVector],
+        documents: List[Document],
+        embedding: Embeddings,
+        collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME,
+        distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
+        ids: Optional[List[str]] = None,
+        pre_delete_collection: bool = False,
+        *,
+        use_jsonb: bool = True,
+        **kwargs: Any,
+    ) -> PGVector:
+        """
+        Return VectorStore initialized from documents and embeddings.
+        Postgres connection string is required
+        "Either pass it as a parameter
+        or set the PGVECTOR_CONNECTION_STRING environment variable.
+        """
+        texts = [d.page_content for d in documents]
+        metadatas = [d.metadata for d in documents]
+        connection_string = cls.get_connection_string(kwargs)
+        kwargs["connection"] = connection_string
+        return await cls.afrom_texts(
+            texts=texts,
+            pre_delete_collection=pre_delete_collection,
+            embedding=embedding,
+            distance_strategy=distance_strategy,
+            metadatas=metadatas,
+            ids=ids,
+            collection_name=collection_name,
+            use_jsonb=use_jsonb,
+            **kwargs,
+        )
     @classmethod
     def connection_string_from_db_params(
         cls,
@@ -1201,6 +1851,7 @@ class PGVector(VectorStore):
             List[Tuple[Document, float]]: List of Documents selected by maximal marginal
                 relevance to the query and score for each.
         """
+        assert not self._async_engine, "This method must be called without async_mode"
         results = self.__query_collection(embedding=embedding, k=fetch_k, filter=filter)
         embedding_list = [result.EmbeddingStore.embedding for result in results]
@@ -1216,6 +1867,55 @@ class PGVector(VectorStore):
         return [r for i, r in enumerate(candidates) if i in mmr_selected]
+    async def amax_marginal_relevance_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs selected using the maximal marginal relevance with score
+            to embedding vector.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+            among selected documents.
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
+                Defaults to 20.
+            lambda_mult (float): Number between 0 and 1 that determines the degree
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List[Tuple[Document, float]]: List of Documents selected by maximal marginal
+                relevance to the query and score for each.
+        """
+        await self.__apost_init__()  # Lazy async init
+        async with self._make_async_session() as session:
+            results = await self.__aquery_collection(
+                session=session, embedding=embedding, k=fetch_k, filter=filter
+            )
+            embedding_list = [result.EmbeddingStore.embedding for result in results]
+            mmr_selected = maximal_marginal_relevance(
+                np.array(embedding, dtype=np.float32),
+                embedding_list,
+                k=k,
+                lambda_mult=lambda_mult,
+            )
+            candidates = self._results_to_docs_and_scores(results)
+            return [r for i, r in enumerate(candidates) if i in mmr_selected]
     def max_marginal_relevance_search(
         self,
         query: str,
@@ -1254,6 +1954,45 @@ class PGVector(VectorStore):
             **kwargs,
         )
+    async def amax_marginal_relevance_search(
+        self,
+        query: str,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs selected using the maximal marginal relevance.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+            among selected documents.
+        Args:
+            query (str): Text to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
+                Defaults to 20.
+            lambda_mult (float): Number between 0 and 1 that determines the degree
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List[Document]: List of Documents selected by maximal marginal relevance.
+        """
+        await self.__apost_init__()  # Lazy async init
+        embedding = self.embedding_function.embed_query(query)
+        return await self.amax_marginal_relevance_search_by_vector(
+            embedding,
+            k=k,
+            fetch_k=fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            **kwargs,
+        )
     def max_marginal_relevance_search_with_score(
         self,
         query: str,
@@ -1294,6 +2033,47 @@ class PGVector(VectorStore):
         )
         return docs
+    async def amax_marginal_relevance_search_with_score(
+        self,
+        query: str,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        filter: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs selected using the maximal marginal relevance with score.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+            among selected documents.
+        Args:
+            query (str): Text to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
+                Defaults to 20.
+            lambda_mult (float): Number between 0 and 1 that determines the degree
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List[Tuple[Document, float]]: List of Documents selected by maximal marginal
+                relevance to the query and score for each.
+        """
+        await self.__apost_init__()  # Lazy async init
+        embedding = self.embedding_function.embed_query(query)
+        docs = await self.amax_marginal_relevance_search_with_score_by_vector(
+            embedding=embedding,
+            k=k,
+            fetch_k=fetch_k,
+            lambda_mult=lambda_mult,
+            filter=filter,
+            **kwargs,
+        )
+        return docs
     def max_marginal_relevance_search_by_vector(
         self,
         embedding: List[float],
@@ -1343,18 +2123,58 @@ class PGVector(VectorStore):
         filter: Optional[Dict[str, str]] = None,
         **kwargs: Any,
     ) -> List[Document]:
-        """Return docs selected using the maximal marginal relevance."""
-        # This is a temporary workaround to make the similarity search
-        # asynchronous. The proper solution is to make the similarity search
-        # asynchronous in the vector store implementations.
-        return await run_in_executor(
-            None,
-            self.max_marginal_relevance_search_by_vector,
-            embedding,
-            k=k,
-            fetch_k=fetch_k,
-            lambda_mult=lambda_mult,
-            filter=filter,
-            **kwargs,
+        """Return docs selected using the maximal marginal relevance
+            to embedding vector.
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+            among selected documents.
+        Args:
+            embedding (str): Text to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm.
+                Defaults to 20.
+            lambda_mult (float): Number between 0 and 1 that determines the degree
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+        Returns:
+            List[Document]: List of Documents selected by maximal marginal relevance.
+        """
+        await self.__apost_init__()  # Lazy async init
+        docs_and_scores = (
+            await self.amax_marginal_relevance_search_with_score_by_vector(
+                embedding,
+                k=k,
+                fetch_k=fetch_k,
+                lambda_mult=lambda_mult,
+                filter=filter,
+                **kwargs,
+            )
         )
+        return _results_to_docs(docs_and_scores)
+    @contextlib.contextmanager
+    def _make_sync_session(self) -> Generator[Session, None, None]:
+        """Make an async session."""
+        if self.async_mode:
+            raise ValueError(
+                "Attempting to use a sync method in when async mode is turned on. "
+                "Please use the corresponding async method instead."
+            )
+        with self.session_maker() as session:
+            yield typing_cast(Session, session)
+    @contextlib.asynccontextmanager
+    async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]:
+        """Make an async session."""
+        if not self.async_mode:
+            raise ValueError(
+                "Attempting to use an async method in when sync mode is turned on. "
+                "Please use the corresponding async method instead."
+            )
+        async with self.session_maker() as session:
+            yield typing_cast(AsyncSession, session)

langchain-postgres 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

langchain-postgres 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl