PyPI - langchain-postgres - Versions diffs - 0.0.9__tar.gz → 0.0.11__tar.gz - Mend

langchain-postgres 0.0.9tar.gz → 0.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,17 @@
 Metadata-Version: 2.1
 Name: langchain-postgres
-Version: 0.0.9
+Version: 0.0.11
 Summary: An integration package connecting Postgres and LangChain
 Home-page: https://github.com/langchain-ai/langchain-postgres
 License: MIT
-Requires-Python: >=3.8.1,<4.0.0
+Requires-Python: >=3.9,<4.0
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: langchain-core (>=0.1.50,<0.3)
+Requires-Dist: langchain-core (>=0.2.13,<0.4.0)
 Requires-Dist: numpy (>=1,<2)
 Requires-Dist: pgvector (>=0.2.5,<0.3.0)
 Requires-Dist: psycopg (>=3,<4)

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/_utils.py RENAMED Viewed

@@ -30,10 +30,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
         X = np.array(X, dtype=np.float32)
         Y = np.array(Y, dtype=np.float32)
-        Z = 1 - simd.cdist(X, Y, metric="cosine")
-        if isinstance(Z, float):
-            return np.array([Z])
-        return np.array(Z)
+        Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
+        return Z
     except ImportError:
         logger.debug(
             "Unable to import simsimd, defaulting to NumPy implementation. If you want "

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/vectorstores.py RENAMED Viewed

@@ -246,98 +246,130 @@ DBConnection = Union[sqlalchemy.engine.Engine, str]
 class PGVector(VectorStore):
-    """Vectorstore implementation using Postgres as the backend.
+    """Postgres vector store integration.
-    Currently, there is no mechanism for supporting data migration.
+    Setup:
+        Install ``langchain_postgres`` and run the docker container.
-    So breaking changes in the vectorstore schema will require the user to recreate
-    the tables and re-add the documents.
+        .. code-block:: bash
-    If this is a concern, please use a different vectorstore. If
-    not, this implementation should be fine for your use case.
+            pip install -qU langchain-postgres
+            docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16
-    To use this vectorstore you need to have the `vector` extension installed.
-    The `vector` extension is a Postgres extension that provides vector
-    similarity search capabilities.
+    Key init args — indexing params:
+        collection_name: str
+            Name of the collection.
+        embeddings: Embeddings
+            Embedding function to use.
-    ```sh
-    docker run --name pgvector-container -e POSTGRES_PASSWORD=...
-        -d pgvector/pgvector:pg16
-    ```
+    Key init args — client params:
+        connection: Union[None, DBConnection, Engine, AsyncEngine, str]
+            Connection string or engine.
-    Example:
+    Instantiate:
         .. code-block:: python
+            from langchain_postgres import PGVector
             from langchain_postgres.vectorstores import PGVector
-            from langchain_openai.embeddings import OpenAIEmbeddings
-            connection_string = "postgresql+psycopg://..."
-            collection_name = "state_of_the_union_test"
-            embeddings = OpenAIEmbeddings()
-            vectorstore = PGVector.from_documents(
-                embedding=embeddings,
-                documents=docs,
-                connection=connection_string,
+            from langchain_openai import OpenAIEmbeddings
+            # See docker command above to launch a postgres instance with pgvector enabled.
+            connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"  # Uses psycopg3!
+            collection_name = "my_docs"
+            vector_store = PGVector(
+                embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),
                 collection_name=collection_name,
+                connection=connection,
                 use_jsonb=True,
-                async_mode=False,
             )
+    Add Documents:
+        .. code-block:: python
+            from langchain_core.documents import Document
+            document_1 = Document(page_content="foo", metadata={"baz": "bar"})
+            document_2 = Document(page_content="thud", metadata={"bar": "baz"})
+            document_3 = Document(page_content="i will be deleted :(")
+            documents = [document_1, document_2, document_3]
+            ids = ["1", "2", "3"]
+            vector_store.add_documents(documents=documents, ids=ids)
+    Delete Documents:
+        .. code-block:: python
+            vector_store.delete(ids=["3"])
+    Search:
+        .. code-block:: python
+            results = vector_store.similarity_search(query="thud",k=1)
+            for doc in results:
+                print(f"* {doc.page_content} [{doc.metadata}]")
+        .. code-block:: python
+            * thud [{'bar': 'baz'}]
+    Search with filter:
+        .. code-block:: python
+            results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
+            for doc in results:
+                print(f"* {doc.page_content} [{doc.metadata}]")
+        .. code-block:: python
+            * thud [{'bar': 'baz'}]
+    Search with score:
+        .. code-block:: python
+            results = vector_store.similarity_search_with_score(query="qux",k=1)
+            for doc, score in results:
+                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
+        .. code-block:: python
+            * [SIM=0.499243] foo [{'baz': 'bar'}]
+    Async:
+        .. code-block:: python
+            # add documents
+            # await vector_store.aadd_documents(documents=documents, ids=ids)
+            # delete documents
+            # await vector_store.adelete(ids=["3"])
+            # search
+            # results = vector_store.asimilarity_search(query="thud",k=1)
+            # search with score
+            results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
+            for doc,score in results:
+                print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
+        .. code-block:: python
+            * [SIM=0.499243] foo [{'baz': 'bar'}]
+    Use as Retriever:
+        .. code-block:: python
+            retriever = vector_store.as_retriever(
+                search_type="mmr",
+                search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
+            )
+            retriever.invoke("thud")
+        .. code-block:: python
+            [Document(metadata={'bar': 'baz'}, page_content='thud')]
-    This code has been ported over from langchain_community with minimal changes
-    to allow users to easily transition from langchain_community to langchain_postgres.
-    Some changes had to be made to address issues with the community implementation:
-    * langchain_postgres now works with psycopg3. Please update your
-      connection strings from `postgresql+psycopg2://...` to
-      `postgresql+psycopg://langchain:langchain@...`
-      (yes, the driver name is `psycopg` not `psycopg3`)
-    * The schema of the embedding store and collection have been changed to make
-      add_documents work correctly with user specified ids, specifically
-      when overwriting existing documents.
-      You will need to recreate the tables if you are using an existing database.
-    * A Connection object has to be provided explicitly. Connections will not be
-      picked up automatically based on env variables.
-    * langchain_postgres now accept async connections. If you want to use the async
-        version, you need to set `async_mode=True` when initializing the store or
-        use an async engine.
-    Supported filter operators:
-    * $eq: Equality operator
-    * $ne: Not equal operator
-    * $lt: Less than operator
-    * $lte: Less than or equal operator
-    * $gt: Greater than operator
-    * $gte: Greater than or equal operator
-    * $in: In operator
-    * $nin: Not in operator
-    * $between: Between operator
-    * $exists: Exists operator
-    * $like: Like operator
-    * $ilike: Case insensitive like operator
-    * $and: Logical AND operator
-    * $or: Logical OR operator
-    * $not: Logical NOT operator
-    Example:
-    .. code-block:: python
-        vectorstore.similarity_search('kitty', k=10, filter={
-            'id': {'$in': [1, 5, 2, 9]}
-        })
-        #%% md
-        If you provide a dict with multiple fields, but no operators,
-        the top level will be interpreted as a logical **AND** filter
-        vectorstore.similarity_search('ducks', k=10, filter={
-            'id': {'$in': [1, 5, 2, 9]},
-            'location': {'$in': ["pond", "market"]}
-        })
-    """
+    """  # noqa: E501
     def __init__(
         self,
@@ -714,7 +746,7 @@ class PGVector(VectorStore):
     def add_embeddings(
         self,
-        texts: Iterable[str],
+        texts: Sequence[str],
         embeddings: List[List[float]],
         metadatas: Optional[List[dict]] = None,
         ids: Optional[List[str]] = None,
@@ -732,7 +764,9 @@ class PGVector(VectorStore):
         """
         assert not self._async_engine, "This method must be called with sync_mode"
         if ids is None:
-            ids = [str(uuid.uuid4()) for _ in texts]
+            ids_ = [str(uuid.uuid4()) for _ in texts]
+        else:
+            ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
         if not metadatas:
             metadatas = [{} for _ in texts]
@@ -750,7 +784,7 @@ class PGVector(VectorStore):
                     "cmetadata": metadata or {},
                 }
                 for text, metadata, embedding, id in zip(
-                    texts, metadatas, embeddings, ids
+                    texts, metadatas, embeddings, ids_
                 )
             ]
             stmt = insert(self.EmbeddingStore).values(data)
@@ -766,11 +800,11 @@ class PGVector(VectorStore):
             session.execute(on_conflict_stmt)
             session.commit()
-        return ids
+        return ids_
     async def aadd_embeddings(
         self,
-        texts: Iterable[str],
+        texts: Sequence[str],
         embeddings: List[List[float]],
         metadatas: Optional[List[dict]] = None,
         ids: Optional[List[str]] = None,
@@ -787,8 +821,11 @@ class PGVector(VectorStore):
             kwargs: vectorstore specific parameters
         """
         await self.__apost_init__()  # Lazy async init
         if ids is None:
-            ids = [str(uuid.uuid1()) for _ in texts]
+            ids_ = [str(uuid.uuid4()) for _ in texts]
+        else:
+            ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
         if not metadatas:
             metadatas = [{} for _ in texts]
@@ -806,7 +843,7 @@ class PGVector(VectorStore):
                     "cmetadata": metadata or {},
                 }
                 for text, metadata, embedding, id in zip(
-                    texts, metadatas, embeddings, ids
+                    texts, metadatas, embeddings, ids_
                 )
             ]
             stmt = insert(self.EmbeddingStore).values(data)
@@ -822,7 +859,7 @@ class PGVector(VectorStore):
             await session.execute(on_conflict_stmt)
             await session.commit()
-        return ids
+        return ids_
     def add_texts(
         self,
@@ -844,9 +881,14 @@ class PGVector(VectorStore):
             List of ids from adding the texts into the vectorstore.
         """
         assert not self._async_engine, "This method must be called without async_mode"
-        embeddings = self.embedding_function.embed_documents(list(texts))
+        texts_ = list(texts)
+        embeddings = self.embedding_function.embed_documents(texts_)
         return self.add_embeddings(
-            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+            texts=texts_,
+            embeddings=list(embeddings),
+            metadatas=list(metadatas) if metadatas else None,
+            ids=list(ids) if ids else None,
+            **kwargs,
         )
     async def aadd_texts(
@@ -869,9 +911,14 @@ class PGVector(VectorStore):
             List of ids from adding the texts into the vectorstore.
         """
         await self.__apost_init__()  # Lazy async init
-        embeddings = await self.embedding_function.aembed_documents(list(texts))
+        texts_ = list(texts)
+        embeddings = await self.embedding_function.aembed_documents(texts_)
         return await self.aadd_embeddings(
-            texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
+            texts=texts_,
+            embeddings=list(embeddings),
+            metadatas=list(metadatas) if metadatas else None,
+            ids=list(ids) if ids else None,
+            **kwargs,
         )
     def similarity_search(
@@ -1014,6 +1061,7 @@ class PGVector(VectorStore):
         docs = [
             (
                 Document(
+                    id=str(result.EmbeddingStore.id),
                     page_content=result.EmbeddingStore.document,
                     metadata=result.EmbeddingStore.cmetadata,
                 ),
@@ -2178,3 +2226,54 @@ class PGVector(VectorStore):
             )
         async with self.session_maker() as session:
             yield typing_cast(AsyncSession, session)
+    def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
+        """Get documents by ids."""
+        documents = []
+        with self._make_sync_session() as session:
+            collection = self.get_collection(session)
+            filter_by = [self.EmbeddingStore.collection_id == collection.uuid]
+            stmt = (
+                select(
+                    self.EmbeddingStore,
+                )
+                .where(self.EmbeddingStore.id.in_(ids))
+                .filter(*filter_by)
+            )
+            for result in session.execute(stmt).scalars().all():
+                documents.append(
+                    Document(
+                        id=result.id,
+                        page_content=result.document,
+                        metadata=result.cmetadata,
+                    )
+                )
+        return documents
+    async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
+        """Get documents by ids."""
+        documents = []
+        async with self._make_async_session() as session:
+            collection = await self.aget_collection(session)
+            filter_by = [self.EmbeddingStore.collection_id == collection.uuid]
+            stmt = (
+                select(
+                    self.EmbeddingStore,
+                )
+                .where(self.EmbeddingStore.id.in_(ids))
+                .filter(*filter_by)
+            )
+            results: Sequence[Any] = (await session.execute(stmt)).scalars().all()
+            for result in results:
+                documents.append(
+                    Document(
+                        id=str(result.id),
+                        page_content=result.document,
+                        metadata=result.cmetadata,
+                    )
+                )
+        return documents

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-postgres"
-version = "0.0.9"
+version = "0.0.11"
 description = "An integration package connecting Postgres and LangChain"
 authors = []
 readme = "README.md"
@@ -11,8 +11,8 @@ license = "MIT"
 "Source Code" = "https://github.com/langchain-ai/langchain-postgres/tree/master/langchain_postgres"
 [tool.poetry.dependencies]
-python = "^3.8.1"
-langchain-core = ">=0.1.50,<0.3"
+python = "^3.9"
+langchain-core = ">=0.2.13,<0.4.0"
 psycopg = "^3"
 psycopg-pool = "^3.2.1"
 sqlalchemy = "^2"
@@ -24,6 +24,7 @@ numpy = "^1"
 [tool.poetry.group.dev.dependencies]
 jupyterlab = "^3.6.1"
 [tool.poetry.group.test]
 optional = true
@@ -33,6 +34,8 @@ pytest-asyncio = "^0.23.2"
 pytest-socket = "^0.7.0"
 pytest-cov = "^5.0.0"
 pytest-timeout = "^2.3.1"
+langchain-core = {git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core/"}
+langchain-standard-tests = {git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/standard-tests/"}
 [tool.poetry.group.codespell]
 optional = true

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/LICENSE RENAMED Viewed

File without changes

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/README.md RENAMED Viewed

File without changes

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/__init__.py RENAMED Viewed

File without changes

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/chat_message_histories.py RENAMED Viewed

File without changes

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/py.typed RENAMED Viewed

File without changes

{langchain_postgres-0.0.9 → langchain_postgres-0.0.11}/langchain_postgres/translator.py RENAMED Viewed

File without changes

langchain-postgres 0.0.9__tar.gz → 0.0.11__tar.gz

langchain-postgres 0.0.9tar.gz → 0.0.11tar.gz