PyPI - langchain-postgres - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.14rc1__py3-none-any.whl - Mend

langchain-postgres 0.0.12py3-none-any.whl → 0.0.14rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

langchain_postgres/__init__.py +6 -0
langchain_postgres/chat_message_histories.py +7 -1
langchain_postgres/utils/pgvector_migrator.py +321 -0
langchain_postgres/v2/__init__.py +0 -0
langchain_postgres/v2/async_vectorstore.py +1268 -0
langchain_postgres/v2/engine.py +351 -0
langchain_postgres/v2/indexes.py +155 -0
langchain_postgres/v2/vectorstores.py +842 -0
langchain_postgres/vectorstores.py +11 -4
langchain_postgres-0.0.14rc1.dist-info/METADATA +170 -0
langchain_postgres-0.0.14rc1.dist-info/RECORD +16 -0
langchain_postgres-0.0.12.dist-info/METADATA +0 -109
langchain_postgres-0.0.12.dist-info/RECORD +0 -10
{langchain_postgres-0.0.12.dist-info → langchain_postgres-0.0.14rc1.dist-info}/LICENSE +0 -0
{langchain_postgres-0.0.12.dist-info → langchain_postgres-0.0.14rc1.dist-info}/WHEEL +0 -0

langchain_postgres/v2/engine.py ADDED Viewed

@@ -0,0 +1,351 @@
+from __future__ import annotations
+import asyncio
+from dataclasses import dataclass
+from threading import Thread
+from typing import TYPE_CHECKING, Any, Awaitable, Optional, TypeVar, TypedDict, Union
+from sqlalchemy import text
+from sqlalchemy.engine import URL
+from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
+if TYPE_CHECKING:
+    import asyncpg  # type: ignore
+T = TypeVar("T")
+class ColumnDict(TypedDict):
+    name: str
+    data_type: str
+    nullable: bool
+@dataclass
+class Column:
+    name: str
+    data_type: str
+    nullable: bool = True
+    def __post_init__(self) -> None:
+        """Check if initialization parameters are valid.
+        Raises:
+            ValueError: If Column name is not string.
+            ValueError: If data_type is not type string.
+        """
+        if not isinstance(self.name, str):
+            raise ValueError("Column name must be type string")
+        if not isinstance(self.data_type, str):
+            raise ValueError("Column data_type must be type string")
+class PGEngine:
+    """A class for managing connections to a Postgres database."""
+    _default_loop: Optional[asyncio.AbstractEventLoop] = None
+    _default_thread: Optional[Thread] = None
+    __create_key = object()
+    def __init__(
+        self,
+        key: object,
+        pool: AsyncEngine,
+        loop: Optional[asyncio.AbstractEventLoop],
+        thread: Optional[Thread],
+    ) -> None:
+        """PGEngine constructor.
+        Args:
+            key (object): Prevent direct constructor usage.
+            pool (AsyncEngine): Async engine connection pool.
+            loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine.
+            thread (Optional[Thread]): Thread used to create the engine async.
+        Raises:
+            Exception: If the constructor is called directly by the user.
+        """
+        if key != PGEngine.__create_key:
+            raise Exception(
+                "Only create class through 'from_connection_string' or 'from_engine' methods!"
+            )
+        self._pool = pool
+        self._loop = loop
+        self._thread = thread
+    @classmethod
+    def from_engine(
+        cls: type[PGEngine],
+        engine: AsyncEngine,
+        loop: Optional[asyncio.AbstractEventLoop] = None,
+    ) -> PGEngine:
+        """Create an PGEngine instance from an AsyncEngine."""
+        return cls(cls.__create_key, engine, loop, None)
+    @classmethod
+    def from_connection_string(
+        cls,
+        url: str | URL,
+        **kwargs: Any,
+    ) -> PGEngine:
+        """Create an PGEngine instance from arguments
+        Args:
+            url (Optional[str]): the URL used to connect to a database. Use url or set other arguments.
+        Raises:
+            ValueError: If not all database url arguments are specified
+        Returns:
+            PGEngine
+        """
+        # Running a loop in a background thread allows us to support
+        # async methods from non-async environments
+        if cls._default_loop is None:
+            cls._default_loop = asyncio.new_event_loop()
+            cls._default_thread = Thread(
+                target=cls._default_loop.run_forever, daemon=True
+            )
+            cls._default_thread.start()
+        engine = create_async_engine(url, **kwargs)
+        return cls(cls.__create_key, engine, cls._default_loop, cls._default_thread)
+    async def _run_as_async(self, coro: Awaitable[T]) -> T:
+        """Run an async coroutine asynchronously"""
+        # If a loop has not been provided, attempt to run in current thread
+        if not self._loop:
+            return await coro
+        # Otherwise, run in the background thread
+        return await asyncio.wrap_future(
+            asyncio.run_coroutine_threadsafe(coro, self._loop)
+        )
+    def _run_as_sync(self, coro: Awaitable[T]) -> T:
+        """Run an async coroutine synchronously"""
+        if not self._loop:
+            raise Exception(
+                "Engine was initialized without a background loop and cannot call sync methods."
+            )
+        return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
+    async def close(self) -> None:
+        """Dispose of connection pool"""
+        await self._run_as_async(self._pool.dispose())
+    def _escape_postgres_identifier(self, name: str) -> str:
+        return name.replace('"', '""')
+    def _validate_column_dict(self, col: ColumnDict) -> None:
+        if not isinstance(col.get("name"), str):
+            raise TypeError("The 'name' field must be a string.")
+        if not isinstance(col.get("data_type"), str):
+            raise TypeError("The 'data_type' field must be a string.")
+        if not isinstance(col.get("nullable"), bool):
+            raise TypeError("The 'nullable' field must be a boolean.")
+    async def _ainit_vectorstore_table(
+        self,
+        table_name: str,
+        vector_size: int,
+        *,
+        schema_name: str = "public",
+        content_column: str = "content",
+        embedding_column: str = "embedding",
+        metadata_columns: Optional[list[Union[Column, ColumnDict]]] = None,
+        metadata_json_column: str = "langchain_metadata",
+        id_column: Union[str, Column, ColumnDict] = "langchain_id",
+        overwrite_existing: bool = False,
+        store_metadata: bool = True,
+    ) -> None:
+        """
+        Create a table for saving of vectors to be used with PGVectorStore.
+        Args:
+            table_name (str): The database table name.
+            vector_size (int): Vector size for the embedding model to be used.
+            schema_name (str): The schema name.
+                Default: "public".
+            content_column (str): Name of the column to store document content.
+                Default: "page_content".
+            embedding_column (str) : Name of the column to store vector embeddings.
+                Default: "embedding".
+            metadata_columns (Optional[list[Union[Column, ColumnDict]]]): A list of Columns to create for custom
+                metadata. Default: None. Optional.
+            metadata_json_column (str): The column to store extra metadata in JSON format.
+                Default: "langchain_metadata". Optional.
+            id_column (Union[str, Column, ColumnDict]) :  Column to store ids.
+                Default: "langchain_id" column name with data type UUID. Optional.
+            overwrite_existing (bool): Whether to drop existing table. Default: False.
+            store_metadata (bool): Whether to store metadata in the table.
+                Default: True.
+        Raises:
+            :class:`DuplicateTableError <asyncpg.exceptions.DuplicateTableError>`: if table already exists.
+            :class:`UndefinedObjectError <asyncpg.exceptions.UndefinedObjectError>`: if the data type of the id column is not a postgreSQL data type.
+        """
+        schema_name = self._escape_postgres_identifier(schema_name)
+        table_name = self._escape_postgres_identifier(table_name)
+        content_column = self._escape_postgres_identifier(content_column)
+        embedding_column = self._escape_postgres_identifier(embedding_column)
+        if metadata_columns is None:
+            metadata_columns = []
+        else:
+            for col in metadata_columns:
+                if isinstance(col, Column):
+                    col.name = self._escape_postgres_identifier(col.name)
+                elif isinstance(col, dict):
+                    self._validate_column_dict(col)
+                    col["name"] = self._escape_postgres_identifier(col["name"])
+        if isinstance(id_column, str):
+            id_column = self._escape_postgres_identifier(id_column)
+        elif isinstance(id_column, Column):
+            id_column.name = self._escape_postgres_identifier(id_column.name)
+        else:
+            self._validate_column_dict(id_column)
+            id_column["name"] = self._escape_postgres_identifier(id_column["name"])
+        async with self._pool.connect() as conn:
+            await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
+            await conn.commit()
+        if overwrite_existing:
+            async with self._pool.connect() as conn:
+                await conn.execute(
+                    text(f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}"')
+                )
+                await conn.commit()
+        if isinstance(id_column, str):
+            id_data_type = "UUID"
+            id_column_name = id_column
+        elif isinstance(id_column, Column):
+            id_data_type = id_column.data_type
+            id_column_name = id_column.name
+        else:
+            id_data_type = id_column["data_type"]
+            id_column_name = id_column["name"]
+        query = f"""CREATE TABLE "{schema_name}"."{table_name}"(
+            "{id_column_name}" {id_data_type} PRIMARY KEY,
+            "{content_column}" TEXT NOT NULL,
+            "{embedding_column}" vector({vector_size}) NOT NULL"""
+        for column in metadata_columns:
+            if isinstance(column, Column):
+                nullable = "NOT NULL" if not column.nullable else ""
+                query += f',\n"{column.name}" {column.data_type} {nullable}'
+            elif isinstance(column, dict):
+                nullable = "NOT NULL" if not column["nullable"] else ""
+                query += f',\n"{column["name"]}" {column["data_type"]} {nullable}'
+        if store_metadata:
+            query += f""",\n"{metadata_json_column}" JSON"""
+        query += "\n);"
+        async with self._pool.connect() as conn:
+            await conn.execute(text(query))
+            await conn.commit()
+    async def ainit_vectorstore_table(
+        self,
+        table_name: str,
+        vector_size: int,
+        *,
+        schema_name: str = "public",
+        content_column: str = "content",
+        embedding_column: str = "embedding",
+        metadata_columns: Optional[list[Union[Column, ColumnDict]]] = None,
+        metadata_json_column: str = "langchain_metadata",
+        id_column: Union[str, Column, ColumnDict] = "langchain_id",
+        overwrite_existing: bool = False,
+        store_metadata: bool = True,
+    ) -> None:
+        """
+        Create a table for saving of vectors to be used with PGVectorStore.
+        Args:
+            table_name (str): The database table name.
+            vector_size (int): Vector size for the embedding model to be used.
+            schema_name (str): The schema name.
+                Default: "public".
+            content_column (str): Name of the column to store document content.
+                Default: "page_content".
+            embedding_column (str) : Name of the column to store vector embeddings.
+                Default: "embedding".
+            metadata_columns (Optional[list[Union[Column, ColumnDict]]]): A list of Columns to create for custom
+                metadata. Default: None. Optional.
+            metadata_json_column (str): The column to store extra metadata in JSON format.
+                Default: "langchain_metadata". Optional.
+            id_column (Union[str, Column, ColumnDict]) :  Column to store ids.
+                Default: "langchain_id" column name with data type UUID. Optional.
+            overwrite_existing (bool): Whether to drop existing table. Default: False.
+            store_metadata (bool): Whether to store metadata in the table.
+                Default: True.
+        """
+        await self._run_as_async(
+            self._ainit_vectorstore_table(
+                table_name,
+                vector_size,
+                schema_name=schema_name,
+                content_column=content_column,
+                embedding_column=embedding_column,
+                metadata_columns=metadata_columns,
+                metadata_json_column=metadata_json_column,
+                id_column=id_column,
+                overwrite_existing=overwrite_existing,
+                store_metadata=store_metadata,
+            )
+        )
+    def init_vectorstore_table(
+        self,
+        table_name: str,
+        vector_size: int,
+        *,
+        schema_name: str = "public",
+        content_column: str = "content",
+        embedding_column: str = "embedding",
+        metadata_columns: Optional[list[Union[Column, ColumnDict]]] = None,
+        metadata_json_column: str = "langchain_metadata",
+        id_column: Union[str, Column, ColumnDict] = "langchain_id",
+        overwrite_existing: bool = False,
+        store_metadata: bool = True,
+    ) -> None:
+        """
+        Create a table for saving of vectors to be used with PGVectorStore.
+        Args:
+            table_name (str): The database table name.
+            vector_size (int): Vector size for the embedding model to be used.
+            schema_name (str): The schema name.
+                Default: "public".
+            content_column (str): Name of the column to store document content.
+                Default: "page_content".
+            embedding_column (str) : Name of the column to store vector embeddings.
+                Default: "embedding".
+            metadata_columns (Optional[list[Union[Column, ColumnDict]]]): A list of Columns to create for custom
+                metadata. Default: None. Optional.
+            metadata_json_column (str): The column to store extra metadata in JSON format.
+                Default: "langchain_metadata". Optional.
+            id_column (Union[str, Column, ColumnDict]) :  Column to store ids.
+                Default: "langchain_id" column name with data type UUID. Optional.
+            overwrite_existing (bool): Whether to drop existing table. Default: False.
+            store_metadata (bool): Whether to store metadata in the table.
+                Default: True.
+        """
+        self._run_as_sync(
+            self._ainit_vectorstore_table(
+                table_name,
+                vector_size,
+                schema_name=schema_name,
+                content_column=content_column,
+                embedding_column=embedding_column,
+                metadata_columns=metadata_columns,
+                metadata_json_column=metadata_json_column,
+                id_column=id_column,
+                overwrite_existing=overwrite_existing,
+                store_metadata=store_metadata,
+            )
+        )

langchain_postgres/v2/indexes.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Index class to add vector indexes on the PGVectorStore.
+Learn more about vector indexes at https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
+"""
+import enum
+import re
+import warnings
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional
+@dataclass
+class StrategyMixin:
+    operator: str
+    search_function: str
+    index_function: str
+class DistanceStrategy(StrategyMixin, enum.Enum):
+    """Enumerator of the Distance strategies."""
+    EUCLIDEAN = "<->", "l2_distance", "vector_l2_ops"
+    COSINE_DISTANCE = "<=>", "cosine_distance", "vector_cosine_ops"
+    INNER_PRODUCT = "<#>", "inner_product", "vector_ip_ops"
+DEFAULT_DISTANCE_STRATEGY: DistanceStrategy = DistanceStrategy.COSINE_DISTANCE
+DEFAULT_INDEX_NAME_SUFFIX: str = "langchainvectorindex"
+def validate_identifier(identifier: str) -> None:
+    if re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier) is None:
+        raise ValueError(
+            f"Invalid identifier: {identifier}. Identifiers must start with a letter or underscore, and subsequent characters can be letters, digits, or underscores."
+        )
+@dataclass
+class BaseIndex(ABC):
+    """
+    Abstract base class for defining vector indexes.
+    Attributes:
+        name (Optional[str]): A human-readable name for the index. Defaults to None.
+        index_type (str): A string identifying the type of index. Defaults to "base".
+        distance_strategy (DistanceStrategy): The strategy used to calculate distances
+            between vectors in the index. Defaults to DistanceStrategy.COSINE_DISTANCE.
+        partial_indexes (Optional[list[str]]): A list of names of partial indexes. Defaults to None.
+        extension_name (Optional[str]): The name of the extension to be created for the index, if any. Defaults to None.
+    """
+    name: Optional[str] = None
+    index_type: str = "base"
+    distance_strategy: DistanceStrategy = field(
+        default_factory=lambda: DistanceStrategy.COSINE_DISTANCE
+    )
+    partial_indexes: Optional[list[str]] = None
+    extension_name: Optional[str] = None
+    @abstractmethod
+    def index_options(self) -> str:
+        """Set index query options for vector store initialization."""
+        raise NotImplementedError(
+            "index_options method must be implemented by subclass"
+        )
+    def get_index_function(self) -> str:
+        return self.distance_strategy.index_function
+    def __post_init__(self) -> None:
+        """Check if initialization parameters are valid.
+        Raises:
+            ValueError: extension_name is a valid postgreSQL identifier
+        """
+        if self.extension_name:
+            validate_identifier(self.extension_name)
+        if self.index_type:
+            validate_identifier(self.index_type)
+@dataclass
+class ExactNearestNeighbor(BaseIndex):
+    index_type: str = "exactnearestneighbor"
+@dataclass
+class QueryOptions(ABC):
+    @abstractmethod
+    def to_parameter(self) -> list[str]:
+        """Convert index attributes to list of configurations."""
+        raise NotImplementedError("to_parameter method must be implemented by subclass")
+    @abstractmethod
+    def to_string(self) -> str:
+        """Convert index attributes to string."""
+        raise NotImplementedError("to_string method must be implemented by subclass")
+@dataclass
+class HNSWIndex(BaseIndex):
+    index_type: str = "hnsw"
+    m: int = 16
+    ef_construction: int = 64
+    def index_options(self) -> str:
+        """Set index query options for vector store initialization."""
+        return f"(m = {self.m}, ef_construction = {self.ef_construction})"
+@dataclass
+class HNSWQueryOptions(QueryOptions):
+    ef_search: int = 40
+    def to_parameter(self) -> list[str]:
+        """Convert index attributes to list of configurations."""
+        return [f"hnsw.ef_search = {self.ef_search}"]
+    def to_string(self) -> str:
+        """Convert index attributes to string."""
+        warnings.warn(
+            "to_string is deprecated, use to_parameter instead.",
+            DeprecationWarning,
+        )
+        return f"hnsw.ef_search = {self.ef_search}"
+@dataclass
+class IVFFlatIndex(BaseIndex):
+    index_type: str = "ivfflat"
+    lists: int = 100
+    def index_options(self) -> str:
+        """Set index query options for vector store initialization."""
+        return f"(lists = {self.lists})"
+@dataclass
+class IVFFlatQueryOptions(QueryOptions):
+    probes: int = 1
+    def to_parameter(self) -> list[str]:
+        """Convert index attributes to list of configurations."""
+        return [f"ivfflat.probes = {self.probes}"]
+    def to_string(self) -> str:
+        """Convert index attributes to string."""
+        warnings.warn(
+            "to_string is deprecated, use to_parameter instead.",
+            DeprecationWarning,
+        )
+        return f"ivfflat.probes = {self.probes}"

langchain-postgres 0.0.12__py3-none-any.whl → 0.0.14rc1__py3-none-any.whl

langchain-postgres 0.0.12py3-none-any.whl → 0.0.14rc1py3-none-any.whl