PyPI - pyobvector - Versions diffs - 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

pyobvector 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pyobvector/__init__.py +3 -0
pyobvector/client/collection_schema.py +6 -6
pyobvector/client/exceptions.py +4 -4
pyobvector/client/fts_index_param.py +2 -3
pyobvector/client/index_param.py +21 -8
pyobvector/client/milvus_like_client.py +126 -90
pyobvector/client/ob_client.py +459 -0
pyobvector/client/ob_vec_client.py +153 -493
pyobvector/client/schema_type.py +4 -2
pyobvector/schema/__init__.py +3 -0
pyobvector/schema/dialect.py +3 -0
pyobvector/schema/reflection.py +1 -1
pyobvector/schema/sparse_vector.py +35 -0
pyobvector/schema/vector_index.py +1 -1
pyobvector/util/__init__.py +3 -1
pyobvector/util/ob_version.py +1 -1
pyobvector/util/sparse_vector.py +48 -0
pyobvector/util/vector.py +10 -4
{pyobvector-0.2.15.dist-info → pyobvector-0.2.17.dist-info}/METADATA +65 -7
pyobvector-0.2.17.dist-info/RECORD +39 -0
{pyobvector-0.2.15.dist-info → pyobvector-0.2.17.dist-info}/WHEEL +1 -1
pyobvector-0.2.15.dist-info/RECORD +0 -36
{pyobvector-0.2.15.dist-info → pyobvector-0.2.17.dist-info/licenses}/LICENSE +0 -0

pyobvector/client/ob_vec_client.py CHANGED Viewed

@@ -1,52 +1,35 @@
 """OceanBase Vector Store Client."""
 import logging
-from typing import List, Optional, Dict, Union
+from typing import List, Optional, Union
+import numpy as np
 from sqlalchemy import (
-    create_engine,
-    MetaData,
     Table,
     Column,
     Index,
     select,
-    delete,
-    update,
-    insert,
     text,
-    inspect,
-    and_,
 )
-from sqlalchemy.exc import NoSuchTableError
-from sqlalchemy.dialects import registry
-import sqlalchemy.sql.functions as func_mod
-import numpy as np
-from urllib.parse import quote
-from .index_param import IndexParams, IndexParam
+from sqlalchemy.schema import CreateTable
+from .exceptions import ClusterVersionException, ErrorCode, ExceptionsMessage
 from .fts_index_param import FtsIndexParam
+from .index_param import IndexParams, IndexParam
+from .ob_client import ObClient
+from .partitions import ObPartition
 from ..schema import (
     ObTable,
     VectorIndex,
-    l2_distance,
-    cosine_distance,
-    inner_product,
-    negative_inner_product,
-    ST_GeomFromText,
-    st_distance,
-    st_dwithin,
-    st_astext,
-    ReplaceStmt,
     FtsIndex,
 )
 from ..util import ObVersion
-from .partitions import *
-from .exceptions import *
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
-class ObVecClient:
-    """The OceanBase Client"""
+class ObVecClient(ObClient):
+    """The OceanBase Vector Client"""
     def __init__(
         self,
@@ -56,119 +39,23 @@ class ObVecClient:
         db_name: str = "test",
         **kwargs,
     ):
-        registry.register("mysql.oceanbase", "pyobvector.schema.dialect", "OceanBaseDialect")
-        # ischema_names["VECTOR"] = VECTOR
-        setattr(func_mod, "l2_distance", l2_distance)
-        setattr(func_mod, "cosine_distance", cosine_distance)
-        setattr(func_mod, "inner_product", inner_product)
-        setattr(func_mod, "negative_inner_product", negative_inner_product)
-        setattr(func_mod, "ST_GeomFromText", ST_GeomFromText)
-        setattr(func_mod, "st_distance", st_distance)
-        setattr(func_mod, "st_dwithin", st_dwithin)
-        setattr(func_mod, "st_astext", st_astext)
-        user = quote(user, safe="")
-        password = quote(password, safe="")
-        connection_str = (
-            f"mysql+oceanbase://{user}:{password}@{uri}/{db_name}?charset=utf8mb4"
-        )
-        self.engine = create_engine(connection_str, **kwargs)
-        self.metadata_obj = MetaData()
-        self.metadata_obj.reflect(bind=self.engine)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                res = conn.execute(text("SELECT OB_VERSION() FROM DUAL"))
-                version = [r[0] for r in res][0]
-                ob_version = ObVersion.from_db_version_string(version)
-                if ob_version < ObVersion.from_db_version_nums(4, 3, 3, 0):
-                    raise ClusterVersionException(
-                        code=ErrorCode.NOT_SUPPORTED,
-                        message=ExceptionsMessage.ClusterVersionIsLow,
-                    )
-    def refresh_metadata(self, tables: Optional[list[str]] = None):
-        """Reload metadata from the database.
-        Args:
-            tables (Optional[list[str]]): names of the tables to refresh. If None, refresh all tables.
-        """
-        if tables is not None:
-            for table_name in tables:
-                if table_name in self.metadata_obj.tables:
-                    self.metadata_obj.remove(Table(table_name, self.metadata_obj))
-            self.metadata_obj.reflect(bind=self.engine, only=tables, extend_existing=True)
-        else:
-            self.metadata_obj.clear()
-            self.metadata_obj.reflect(bind=self.engine, extend_existing=True)
-    def _insert_partition_hint_for_query_sql(self, sql: str, partition_hint: str):
-        from_index = sql.find("FROM")
-        assert from_index != -1
-        first_space_after_from = sql.find(" ", from_index + len("FROM") + 1)
-        if first_space_after_from == -1:
-            return sql + " " + partition_hint
-        return (
-            sql[:first_space_after_from]
-            + " "
-            + partition_hint
-            + sql[first_space_after_from:]
-        )
-    def check_table_exists(self, table_name: str):
-        """check if table exists.
+        super().__init__(uri, user, password, db_name, **kwargs)
-        Args:
-            table_name (string) : table name
-        """
-        inspector = inspect(self.engine)
-        return inspector.has_table(table_name)
+        if self.ob_version < ObVersion.from_db_version_nums(4, 3, 3, 0):
+            raise ClusterVersionException(
+                code=ErrorCode.NOT_SUPPORTED,
+                message=ExceptionsMessage.ClusterVersionIsLow,
+            )
-    def create_table(
-        self,
-        table_name: str,
-        columns: List[Column],
-        indexes: Optional[List[Index]] = None,
-        partitions: Optional[ObPartition] = None,
+    def _get_sparse_vector_index_params(
+        self, vidxs: Optional[IndexParams]
     ):
-        """Create a table.
-        Args:
-            table_name (string) : table name
-            columns (List[Column]) : column schema
-            indexes (Optional[List[Index]]) : optional index schema
-            partitions (Optional[ObPartition]) : optional partition strategy
-        """
-        with self.engine.connect() as conn:
-            with conn.begin():
-                if indexes is not None:
-                    table = ObTable(
-                        table_name,
-                        self.metadata_obj,
-                        *columns,
-                        *indexes,
-                        extend_existing=True,
-                    )
-                else:
-                    table = ObTable(
-                        table_name,
-                        self.metadata_obj,
-                        *columns,
-                        extend_existing=True,
-                    )
-                table.create(self.engine, checkfirst=True)
-                # do partition
-                if partitions is not None:
-                    conn.execute(
-                        text(f"ALTER TABLE `{table_name}` {partitions.do_compile()}")
-                    )
-    @classmethod
-    def prepare_index_params(cls):
-        """Create `IndexParams` to hold index configuration."""
-        return IndexParams()
+        if vidxs is None:
+            return None
+        return [
+            vidx for vidx in vidxs
+            if vidx.is_index_type_sparse_vector()
+        ]
     def create_table_with_index_params(
         self,
@@ -182,12 +69,14 @@ class ObVecClient:
         """Create table with optional index_params.
         Args:
-            table_name (string) : table name
-            columns (List[Column]) : column schema
-            indexes (Optional[List[Index]]) : optional common index schema
-            vids (Optional[IndexParams]) : optional vector index schema
-            partitions (Optional[ObPartition]) : optional partition strategy
+            table_name (string): table name
+            columns (List[Column]): column schema
+            indexes (Optional[List[Index]]): optional common index schema
+            vidxs (Optional[IndexParams]): optional vector index schema
+            fts_idxs (Optional[List[FtsIndexParam]]): optional full-text search index schema
+            partitions (Optional[ObPartition]): optional partition strategy
         """
+        sparse_vidxs = self._get_sparse_vector_index_params(vidxs)
         with self.engine.connect() as conn:
             with conn.begin():
                 # create table with common index
@@ -206,7 +95,15 @@ class ObVecClient:
                         *columns,
                         extend_existing=True,
                     )
-                table.create(self.engine, checkfirst=True)
+                if sparse_vidxs is not None and len(sparse_vidxs) > 0:
+                    create_table_sql = str(CreateTable(table).compile(self.engine))
+                    new_sql = create_table_sql[:create_table_sql.rfind(')')]
+                    for sparse_vidx in sparse_vidxs:
+                        new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (distance=inner_product)"
+                    new_sql += "\n)"
+                    conn.execute(text(new_sql))
+                else:
+                    table.create(self.engine, checkfirst=True)
                 # do partition
                 if partitions is not None:
                     conn.execute(
@@ -215,6 +112,8 @@ class ObVecClient:
                 # create vector indexes
                 if vidxs is not None:
                     for vidx in vidxs:
+                        if vidx.is_index_type_sparse_vector():
+                            continue
                         vidx = VectorIndex(
                             vidx.index_name,
                             table.c[vidx.field_name],
@@ -244,12 +143,12 @@ class ObVecClient:
         """Create common index or vector index.
         Args:
-            table_name (string) : table name
-            is_vec_index (bool) : common index or vector index
-            index_name (string) : index name
-            column_names (List[string]) : create index on which columns
-            vidx_params (Optional[str]) :
-                vector index params, for example 'distance=l2, type=hnsw, lib=vsag'
+            table_name (string): table name
+            is_vec_index (bool): common index or vector index
+            index_name (string): index name
+            column_names (List[string]): create index on which columns
+            vidx_params (Optional[str]): vector index params, for example 'distance=l2, type=hnsw, lib=vsag'
+            **kw: additional keyword arguments
         """
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
         columns = [table.c[column_name] for column_name in column_names]
@@ -270,8 +169,8 @@ class ObVecClient:
         """Create vector index with vector index parameter.
         Args:
-            table_name (string) : table name
-            vidx_param (IndexParam) : vector index parameter
+            table_name (string): table name
+            vidx_param (IndexParam): vector index parameter
         """
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
         with self.engine.connect() as conn:
@@ -291,8 +190,8 @@ class ObVecClient:
         """Create fts index with fts index parameter.
         Args:
-            table_name (string) : table name
-            fts_idx_param (FtsIndexParam) : fts index parameter
+            table_name (string): table name
+            fts_idx_param (FtsIndexParam): fts index parameter
         """
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
         with self.engine.connect() as conn:
@@ -305,26 +204,6 @@ class ObVecClient:
                 )
                 fts_idx.create(self.engine, checkfirst=True)
-    def drop_table_if_exist(self, table_name: str):
-        """Drop table if exists."""
-        try:
-            table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        except NoSuchTableError:
-            return
-        with self.engine.connect() as conn:
-            with conn.begin():
-                table.drop(self.engine, checkfirst=True)
-                self.metadata_obj.remove(table)
-    def drop_index(self, table_name: str, index_name: str):
-        """drop index on specified table.
-        If the index not exists, SQL ERROR 1091 will raise.
-        """
-        with self.engine.connect() as conn:
-            with conn.begin():
-                conn.execute(text(f"DROP INDEX `{index_name}` ON `{table_name}`"))
     def refresh_index(
         self,
         table_name: str,
@@ -334,10 +213,9 @@ class ObVecClient:
         """Refresh vector index for performance.
         Args:
-        :param table_name (string) : table name
-        :param index_name (string) : vector index name
-        :param trigger_threshold (int) :
-                If delta_buffer_table row count is greater than `trigger_threshold`,
+            table_name (string): table name
+            index_name (string): vector index name
+            trigger_threshold (int): If delta_buffer_table row count is greater than `trigger_threshold`,
                 refreshing is actually triggered.
         """
         with self.engine.connect() as conn:
@@ -358,9 +236,9 @@ class ObVecClient:
         """Rebuild vector index for performance.
         Args:
-        :param table_name (string) : table name
-        :param index_name (string) : vector index name
-        :param trigger_threshold (float)
+            table_name (string): table name
+            index_name (string): vector index name
+            trigger_threshold (float): threshold value for rebuilding index
         """
         with self.engine.connect() as conn:
             with conn.begin():
@@ -371,221 +249,6 @@ class ObVecClient:
                     )
                 )
-    def insert(
-        self,
-        table_name: str,
-        data: Union[Dict, List[Dict]],
-        partition_name: Optional[str] = "",
-    ):
-        """Insert data into table.
-        Args:
-            table_name (string) : table name
-            data (Union[Dict, List[Dict]]) : data that will be inserted
-            partition_names (Optional[str]) : limit the query to certain partition
-        """
-        if isinstance(data, Dict):
-            data = [data]
-        if len(data) == 0:
-            return
-        table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                if partition_name is None or partition_name == "":
-                    conn.execute(insert(table).values(data))
-                else:
-                    conn.execute(
-                        insert(table)
-                        .with_hint(f"PARTITION({partition_name})")
-                        .values(data)
-                    )
-    def upsert(
-        self,
-        table_name: str,
-        data: Union[Dict, List[Dict]],
-        partition_name: Optional[str] = "",
-    ):
-        """Update data in table. If primary key is duplicated, replace it.
-        Args:
-            table_name (string) : table name
-            data (Union[Dict, List[Dict]]) : data that will be upserted
-            partition_names (Optional[str]) : limit the query to certain partition
-        """
-        if isinstance(data, Dict):
-            data = [data]
-        if len(data) == 0:
-            return
-        table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                upsert_stmt = (
-                    ReplaceStmt(table).with_hint(f"PARTITION({partition_name})")
-                    if partition_name is not None and partition_name != ""
-                    else ReplaceStmt(table)
-                )
-                upsert_stmt = upsert_stmt.values(data)
-                conn.execute(upsert_stmt)
-    def update(
-        self,
-        table_name: str,
-        values_clause,
-        where_clause=None,
-        partition_name: Optional[str] = "",
-    ):
-        """Update data in table.
-        Args:
-            table_name (string) : table name
-            values_clause: update values clause
-            where_clause: update with filter
-            partition_name (Optional[str]) : limit the query to certain partition
-        Example:
-            .. code-block:: python
-            data = [
-                {"id": 112, "embedding": [1, 2, 3], "meta": {'doc':'hhh1'}},
-                {"id": 190, "embedding": [0.13, 0.123, 1.213], "meta": {'doc':'hhh2'}},
-            ]
-            client.insert(collection_name=test_collection_name, data=data)
-            client.update(
-                table_name=test_collection_name,
-                values_clause=[{'meta':{'doc':'HHH'}}],
-                where_clause=[text("id=112")]
-            )
-        """
-        table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                update_stmt = (
-                    update(table).with_hint(f"PARTITION({partition_name})")
-                    if partition_name is not None and partition_name != ""
-                    else update(table)
-                )
-                if where_clause is not None:
-                    update_stmt = update_stmt.where(*where_clause).values(
-                        *values_clause
-                    )
-                else:
-                    update_stmt = update_stmt.values(*values_clause)
-                conn.execute(update_stmt)
-    def delete(
-        self,
-        table_name: str,
-        ids: Optional[Union[list, str, int]] = None,
-        where_clause=None,
-        partition_name: Optional[str] = "",
-    ):
-        """Delete data in table.
-        Args:
-            table_name (string) : table name
-            where_clause : delete with filter
-            partition_names (Optional[str]) : limit the query to certain partition
-        """
-        table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        where_in_clause = None
-        if ids is not None:
-            primary_keys = table.primary_key
-            pkey_names = [column.name for column in primary_keys]
-            if len(pkey_names) == 1:
-                if isinstance(ids, list):
-                    where_in_clause = table.c[pkey_names[0]].in_(ids)
-                elif isinstance(ids, (str, int)):
-                    where_in_clause = table.c[pkey_names[0]].in_([ids])
-                else:
-                    raise TypeError("'ids' is not a list/str/int")
-        with self.engine.connect() as conn:
-            with conn.begin():
-                delete_stmt = (
-                    delete(table).with_hint(f"PARTITION({partition_name})")
-                    if partition_name is not None and partition_name != ""
-                    else delete(table)
-                )
-                if where_in_clause is None and where_clause is None:
-                    conn.execute(delete_stmt)
-                elif where_in_clause is not None and where_clause is None:
-                    conn.execute(delete_stmt.where(where_in_clause))
-                elif where_in_clause is None and where_clause is not None:
-                    conn.execute(delete_stmt.where(*where_clause))
-                else:
-                    conn.execute(
-                        delete_stmt.where(and_(where_in_clause, *where_clause))
-                    )
-    def get(
-        self,
-        table_name: str,
-        ids: Optional[Union[list, str, int]],
-        where_clause = None,
-        output_column_name: Optional[List[str]] = None,
-        partition_names: Optional[List[str]] = None,
-        n_limits: Optional[int] = None,
-    ):
-        """get records with specified primary field `ids`.
-        Args:
-        :param table_name (string) : table name
-        :param ids : specified primary field values
-        :param where_clause : SQL filter
-        :param output_column_name (Optional[List[str]]) : output fields name
-        :param partition_names (List[str]) : limit the query to certain partitions
-        """
-        table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        if output_column_name is not None:
-            columns = [table.c[column_name] for column_name in output_column_name]
-            stmt = select(*columns)
-        else:
-            stmt = select(table)
-        primary_keys = table.primary_key
-        pkey_names = [column.name for column in primary_keys]
-        where_in_clause = None
-        if ids is not None and len(pkey_names) == 1:
-            if isinstance(ids, list):
-                where_in_clause = table.c[pkey_names[0]].in_(ids)
-            elif isinstance(ids, (str, int)):
-                where_in_clause = table.c[pkey_names[0]].in_([ids])
-            else:
-                raise TypeError("'ids' is not a list/str/int")
-        if where_in_clause is not None and where_clause is None:
-            stmt = stmt.where(where_in_clause)
-        elif where_in_clause is None and where_clause is not None:
-            stmt = stmt.where(*where_clause)
-        elif where_in_clause is not None and where_clause is not None:
-            stmt = stmt.where(and_(where_in_clause, *where_clause))
-        if n_limits is not None:
-            stmt = stmt.limit(n_limits)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                if partition_names is None:
-                    execute_res = conn.execute(stmt)
-                else:
-                    stmt_str = str(stmt.compile(
-                        dialect=self.engine.dialect,
-                        compile_kwargs={"literal_binds": True}
-                    ))
-                    stmt_str = self._insert_partition_hint_for_query_sql(
-                        stmt_str, f"PARTITION({', '.join(partition_names)})"
-                    )
-                    logging.debug(stmt_str)
-                    execute_res = conn.execute(text(stmt_str))
-                return execute_res
     def set_ob_hnsw_ef_search(self, ob_hnsw_ef_search: int):
         """Set ob_hnsw_ef_search system variable."""
         with self.engine.connect() as conn:
@@ -602,49 +265,74 @@ class ObVecClient:
     def ann_search(
         self,
         table_name: str,
-        vec_data: list,
+        vec_data: Union[list, dict],
         vec_column_name: str,
         distance_func,
         with_dist: bool = False,
         topk: int = 10,
         output_column_names: Optional[List[str]] = None,
+        output_columns: Optional[Union[List, tuple]] = None,
         extra_output_cols: Optional[List] = None,
         where_clause=None,
         partition_names: Optional[List[str]] = None,
         idx_name_hint: Optional[List[str]] = None,
+        distance_threshold: Optional[float] = None,
         **kwargs,
-    ): # pylint: disable=unused-argument
-        """perform ann search.
+    ):  # pylint: disable=unused-argument
+        """Perform ann search.
         Args:
-            table_name (string) : table name
-            vec_data (list) : the vector data to search
-            vec_column_name (string) : which vector field to search
-            distance_func : function to calculate distance between vectors
-            with_dist (bool) : return result with distance
-            topk (int) : top K
-            output_column_names (Optional[List[str]]) : output fields
-            where_clause : do ann search with filter
-            idx_name_hint : post-filtering enabled if vector index name is specified
-                            Or pre-filtering enabled
+            table_name (string): table name
+            vec_data (Union[list, dict]): the vector/sparse_vector data to search
+            vec_column_name (string): which vector field to search
+            distance_func: function to calculate distance between vectors
+            with_dist (bool): return result with distance
+            topk (int): top K
+            output_column_names (Optional[List[str]]): output fields
+            output_columns (Optional[Union[List, tuple]]): output columns as SQLAlchemy Column objects
+                or expressions. Similar to SQLAlchemy's select() function arguments.
+                If provided, takes precedence over output_column_names.
+            extra_output_cols (Optional[List]): additional output columns
+            where_clause: do ann search with filter
+            partition_names (Optional[List[str]]): limit the query to certain partitions
+            idx_name_hint (Optional[List[str]]): post-filtering enabled if vector index name is specified
+                Or pre-filtering enabled
+            distance_threshold (Optional[float]): filter results where distance <= threshold.
+            **kwargs: additional arguments
         """
+        if not (isinstance(vec_data, list) or isinstance(vec_data, dict)):
+            raise ValueError("'vec_data' type must be in 'list'/'dict'")
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
-        if output_column_names is not None:
+        columns = []
+        if output_columns:
+            if isinstance(output_columns, (list, tuple)):
+                columns = list(output_columns)
+            else:
+                columns = [output_columns]
+        elif output_column_names:
             columns = [table.c[column_name] for column_name in output_column_names]
         else:
             columns = [table.c[column.name] for column in table.columns]
-        if extra_output_cols is not None:
+        if extra_output_cols:
             columns.extend(extra_output_cols)
         if with_dist:
-            columns.append(
-                distance_func(
-                    table.c[vec_column_name],
-                    "[" + ",".join([str(np.float32(v)) for v in vec_data]) + "]",
+            if isinstance(vec_data, list):
+                columns.append(
+                    distance_func(
+                        table.c[vec_column_name],
+                        "[" + ",".join([str(np.float32(v)) for v in vec_data]) + "]",
+                    )
+                )
+            else:
+                columns.append(
+                    distance_func(
+                        table.c[vec_column_name], f"{vec_data}"
+                    )
                 )
-            )
         # if idx_name_hint is not None:
         #     stmt = select(*columns).with_hint(
         #         table,
@@ -657,12 +345,32 @@ class ObVecClient:
         if where_clause is not None:
             stmt = stmt.where(*where_clause)
-        stmt = stmt.order_by(
-            distance_func(
-                table.c[vec_column_name],
-                "[" + ",".join([str(np.float32(v)) for v in vec_data]) + "]",
+        # Add distance threshold filter in SQL WHERE clause
+        if distance_threshold is not None:
+            if isinstance(vec_data, list):
+                dist_expr = distance_func(
+                    table.c[vec_column_name],
+                    "[" + ",".join([str(np.float32(v)) for v in vec_data]) + "]",
+                )
+            else:
+                dist_expr = distance_func(
+                    table.c[vec_column_name], f"{vec_data}"
+                )
+            stmt = stmt.where(dist_expr <= distance_threshold)
+        if isinstance(vec_data, list):
+            stmt = stmt.order_by(
+                distance_func(
+                    table.c[vec_column_name],
+                    "[" + ",".join([str(np.float32(v)) for v in vec_data]) + "]",
+                )
+            )
+        else:
+            stmt = stmt.order_by(
+                distance_func(
+                    table.c[vec_column_name], f"{vec_data}"
+                )
             )
-        )
         stmt_str = (
             str(stmt.compile(
                 dialect=self.engine.dialect,
@@ -697,18 +405,22 @@ class ObVecClient:
         partition_names: Optional[List[str]] = None,
         str_list: Optional[List[str]] = None,
         **kwargs,
-    ): # pylint: disable=unused-argument
-        """perform post ann search.
+    ):  # pylint: disable=unused-argument
+        """Perform post ann search.
         Args:
-            table_name (string) : table name
-            vec_data (list) : the vector data to search
-            vec_column_name (string) : which vector field to search
-            distance_func : function to calculate distance between vectors
-            with_dist (bool) : return result with distance
-            topk (int) : top K
-            output_column_names (Optional[List[str]]) : output fields
-            where_clause : do ann search with filter
+            table_name (string): table name
+            vec_data (list): the vector data to search
+            vec_column_name (string): which vector field to search
+            distance_func: function to calculate distance between vectors
+            with_dist (bool): return result with distance
+            topk (int): top K
+            output_column_names (Optional[List[str]]): output fields
+            extra_output_cols (Optional[List]): additional output columns
+            where_clause: do ann search with filter
+            partition_names (Optional[List[str]]): limit the query to certain partitions
+            str_list (Optional[List[str]]): list to append SQL string to
+            **kwargs: additional arguments
         """
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
@@ -770,17 +482,18 @@ class ObVecClient:
         output_column_names: Optional[List[str]] = None,
         where_clause=None,
         **kwargs,
-    ): # pylint: disable=unused-argument
-        """perform precise vector search.
+    ):  # pylint: disable=unused-argument
+        """Perform precise vector search.
         Args:
-            table_name (string) : table name
-            vec_data (list) : the vector data to search
-            vec_column_name (string) : which vector field to search
-            distance_func : function to calculate distance between vectors
-            topk (int) : top K
-            output_column_names (Optional[List[str]]) : output column names
-            where_clause : do ann search with filter
+            table_name (string): table name
+            vec_data (list): the vector data to search
+            vec_column_name (string): which vector field to search
+            distance_func: function to calculate distance between vectors
+            topk (int): top K
+            output_column_names (Optional[List[str]]): output column names
+            where_clause: do ann search with filter
+            **kwargs: additional arguments
         """
         table = Table(table_name, self.metadata_obj, autoload_with=self.engine)
@@ -807,56 +520,3 @@ class ObVecClient:
             with self.engine.connect() as conn:
                 with conn.begin():
                     return conn.execute(stmt)
-    def perform_raw_text_sql(
-        self,
-        text_sql: str,
-    ):
-        """Execute raw text SQL."""
-        with self.engine.connect() as conn:
-            with conn.begin():
-                return conn.execute(text(text_sql))
-    def add_columns(
-        self,
-        table_name: str,
-        columns: list[Column],
-    ):
-        """Add multiple columns to an existing table.
-        Args:
-            table_name (string): table name
-            columns (list[Column]): list of SQLAlchemy Column objects representing the new columns
-        """
-        compiler = self.engine.dialect.ddl_compiler(self.engine.dialect, None)
-        column_specs = [compiler.get_column_specification(column) for column in columns]
-        columns_ddl = ", ".join(f"ADD COLUMN {spec}" for spec in column_specs)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                conn.execute(
-                    text(f"ALTER TABLE `{table_name}` {columns_ddl}")
-                )
-        self.refresh_metadata([table_name])
-    def drop_columns(
-        self,
-        table_name: str,
-        column_names: list[str],
-    ):
-        """Drop multiple columns from an existing table.
-        Args:
-            table_name (string): table name
-            column_names (list[str]): names of the columns to drop
-        """
-        columns_ddl = ", ".join(f"DROP COLUMN `{name}`" for name in column_names)
-        with self.engine.connect() as conn:
-            with conn.begin():
-                conn.execute(
-                    text(f"ALTER TABLE `{table_name}` {columns_ddl}")
-                )
-        self.refresh_metadata([table_name])

pyobvector 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

pyobvector 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl