PyPI - pyobvector - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

pyobvector 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

pyobvector/__init__.py +2 -0
pyobvector/client/ob_vec_json_table_client.py +4 -4
pyobvector/client/schema_type.py +3 -3
pyobvector/schema/__init__.py +3 -0
pyobvector/schema/array.py +142 -0
pyobvector/schema/reflection.py +66 -1
{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/METADATA +2 -2
{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/RECORD +10 -9
{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/WHEEL +1 -1
{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/LICENSE +0 -0

pyobvector/__init__.py CHANGED Viewed

@@ -41,6 +41,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
 """
 from .client import *
 from .schema import (
+    ARRAY,
     VECTOR,
     POINT,
     VectorIndex,
@@ -67,6 +68,7 @@ __all__ = [
     "IndexParam",
     "IndexParams",
     "DataType",
+    "ARRAY",
     "VECTOR",
     "POINT",
     "VectorIndex",

pyobvector/client/ob_vec_json_table_client.py CHANGED Viewed

@@ -26,8 +26,8 @@ from ..json_table import (
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
-JSON_TABLE_META_TABLE_NAME = "_meta_json_t"
-JSON_TABLE_DATA_TABLE_NAME = "_data_json_t"
+JSON_TABLE_META_TABLE_NAME = "meta_json_t"
+JSON_TABLE_DATA_TABLE_NAME = "data_json_t"
 class ObVecJsonTableClient(ObVecClient):
     """OceanBase Vector Store Client with JSON Table."""
@@ -55,7 +55,7 @@ class ObVecJsonTableClient(ObVecClient):
         jdata_id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
         jdata = Column(JSON)
-    class JsonTableMetadata:
+    class JsonTableMetadata:
         def __init__(self, user_id: str):
             self.user_id = user_id
             self.meta_cache: Dict[str, List] = {}
@@ -200,7 +200,7 @@ class ObVecJsonTableClient(ObVecClient):
             return "INT"
         if datatype == exp.DataType.Type.TINYINT:
             return "TINYINT"
-        if datatype == exp.DataType.Type.TIMESTAMP:
+        if datatype in (exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPTZ):
             return "TIMESTAMP"
         if datatype == exp.DataType.Type.VARCHAR:
             return "VARCHAR"

pyobvector/client/schema_type.py CHANGED Viewed

@@ -11,7 +11,7 @@ from sqlalchemy import (
 )
 from sqlalchemy.dialects.mysql import LONGTEXT
 from .enum import IntEnum
-from ..schema import VECTOR
+from ..schema import ARRAY, VECTOR
 class DataType(IntEnum):
@@ -60,8 +60,8 @@ def convert_datatype_to_sqltype(datatype: DataType):
         return LONGTEXT
     if datatype == DataType.VARCHAR:
         return String
-    # if datatype == DataType.ARRAY:
-    #     return ARRAY
+    if datatype == DataType.ARRAY:
+        return ARRAY
     if datatype == DataType.JSON:
         return JSON
     if datatype == DataType.FLOAT_VECTOR:

pyobvector/schema/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """A extension for SQLAlchemy for vector storage related schema definition.
+* ARRAY             An extended data type in SQLAlchemy for ObVecClient
 * VECTOR            An extended data type in SQLAlchemy for ObVecClient
 * VectorIndex       An extended index type in SQLAlchemy for ObVecClient
 * CreateVectorIndex Vector Index Creation statement clause
@@ -18,6 +19,7 @@
 * CreateFtsIndex    Full Text Search Index Creation statement clause
 * MatchAgainst      Full Text Search clause
 """
+from .array import ARRAY
 from .vector import VECTOR
 from .geo_srid_point import POINT
 from .vector_index import VectorIndex, CreateVectorIndex
@@ -30,6 +32,7 @@ from .full_text_index import FtsIndex, CreateFtsIndex
 from .match_against_func import MatchAgainst
 __all__ = [
+    "ARRAY",
     "VECTOR",
     "POINT",
     "VectorIndex",

pyobvector/schema/array.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""ARRAY: An extended data type for SQLAlchemy"""
+import json
+from typing import Any, List, Optional, Sequence, Union, Type
+from sqlalchemy.sql.type_api import TypeEngine
+from sqlalchemy.types import UserDefinedType, String
+class ARRAY(UserDefinedType):
+    """ARRAY data type definition with support for up to 6 levels of nesting."""
+    cache_ok = True
+    _string = String()
+    _max_nesting_level = 6
+    def __init__(self, item_type: Union[TypeEngine, type]):
+        """Construct an ARRAY.
+        Args:
+            item_type: The data type of items in this array. For nested arrays,
+                      pass another ARRAY type.
+        Raises:
+            ValueError: If nesting level exceeds the maximum allowed level (6).
+        """
+        super(UserDefinedType, self).__init__()
+        if isinstance(item_type, type):
+            item_type = item_type()
+        self.item_type = item_type
+        self._validate_nesting_level()
+    def _validate_nesting_level(self):
+        """Validate that the nesting level does not exceed the maximum allowed level."""
+        level = 1
+        current_type = self.item_type
+        while isinstance(current_type, ARRAY):
+            level += 1
+            if level > self._max_nesting_level:
+                raise ValueError(f"Maximum nesting level of {self._max_nesting_level} exceeded")
+            current_type = current_type.item_type
+    def get_col_spec(self, **kw):  # pylint: disable=unused-argument
+        """Parse to array data type definition in text SQL."""
+        if hasattr(self.item_type, 'get_col_spec'):
+            base_type = self.item_type.get_col_spec(**kw)
+        else:
+            base_type = str(self.item_type)
+        return f"ARRAY({base_type})"
+    def bind_processor(self, dialect):
+        item_proc = self.item_type.dialect_impl(dialect).bind_processor(dialect)
+        def process(value: Optional[Sequence[Any]]) -> Optional[str]:
+            if value is None:
+                return None
+            def convert(val):
+                if isinstance(val, (list, tuple)):
+                    return [convert(v) for v in val]
+                if item_proc:
+                    return item_proc(val)
+                return val
+            processed = convert(value)
+            return json.dumps(processed)
+        return process
+    def result_processor(self, dialect, coltype):
+        item_proc = self.item_type.dialect_impl(dialect).result_processor(dialect, coltype)
+        def process(value: Optional[str]) -> Optional[List[Any]]:
+            if value is None:
+                return None
+            def convert(val):
+                if isinstance(val, (list, tuple)):
+                    return [convert(v) for v in val]
+                if item_proc:
+                    return item_proc(val)
+                return val
+            value = json.loads(value) if isinstance(value, str) else value
+            return convert(value)
+        return process
+    def literal_processor(self, dialect):
+        item_proc = self.item_type.dialect_impl(dialect).literal_processor(dialect)
+        def process(value: Sequence[Any]) -> str:
+            def convert(val):
+                if isinstance(val, (list, tuple)):
+                    return [convert(v) for v in val]
+                if item_proc:
+                    return item_proc(val)
+                return val
+            processed = convert(value)
+            return json.dumps(processed)
+        return process
+    def __repr__(self):
+        """Return a string representation of the array type."""
+        current_type = self.item_type
+        nesting_level = 1
+        base_type = current_type
+        # Find the innermost type and count nesting level
+        while isinstance(current_type, ARRAY):
+            nesting_level += 1
+            current_type = current_type.item_type
+            if not isinstance(current_type, ARRAY):
+                base_type = current_type
+        return f"{nesting_level}D_Array({base_type})"
+def nested_array(dim: int) -> Type[ARRAY]:
+    """Create a nested array type class with specified dimensions.
+    Args:
+        dim: The number of dimensions for the array type (1-6)
+    Returns:
+        A class type that can be instantiated with an item_type to create a nested array
+    Raises:
+        ValueError: If dim is not between 1 and 6
+    """
+    if not 1 <= dim <= 6:
+        raise ValueError("Dimension must be between 1 and 6")
+    class ArrayType(ARRAY):
+        def __init__(self, item_type: Union[TypeEngine, type]):
+            nested_type = item_type
+            for _ in range(dim - 1):
+                nested_type = ARRAY(nested_type)
+            super().__init__(nested_type)
+    ArrayType.__name__ = f"{dim}D_Array"
+    return ArrayType

pyobvector/schema/reflection.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """OceanBase table definition reflection."""
 import re
 import logging
-from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile
+from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
+from pyobvector.schema.array import nested_array
 logger = logging.getLogger(__name__)
@@ -31,6 +33,16 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
         )
         ### end of block
+        self._re_array_column = _re_compile(
+            r"\s*"
+            r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s\s+"
+            r"(?P<coltype_with_args>(?i:(?<!\w)array(?!\w))\s*\([^()]*(?:\([^()]*\)[^()]*)*\))"
+            r"(?:\s+(?P<notnull>(?:NOT\s+)?NULL))?"
+            r"(?:\s+DEFAULT\s+(?P<default>(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+)))?"
+            r"(?:\s+COMMENT\s+'(?P<comment>(?:''|[^'])*)')?"
+            r"\s*,?\s*$" % quotes
+        )
         self._re_key = _re_compile(
             r"  "
             r"(?:(FULLTEXT|SPATIAL|VECTOR|(?P<type>\S+)) )?KEY"
@@ -64,6 +76,59 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
             )
         )
+    def _parse_column(self, line, state):
+        m = self._re_array_column.match(line)
+        if m:
+            spec = m.groupdict()
+            name, coltype_with_args = spec["name"].strip(), spec["coltype_with_args"].strip()
+            item_pattern = re.compile(
+                r"^(?:array\s*\()*([\w]+)(?:\(([\d,]+)\))?\)*$",
+                re.IGNORECASE
+            )
+            item_m = item_pattern.match(coltype_with_args)
+            if not item_m:
+                raise ValueError(f"Failed to find inner type from array column definition: {line}")
+            item_type = self.dialect.ischema_names[item_m.group(1).lower()]
+            item_type_arg = item_m.group(2)
+            if item_type_arg is None or item_type_arg == "":
+                item_type_args = []
+            elif item_type_arg[0] == "'" and item_type_arg[-1] == "'":
+                item_type_args = self._re_csv_str.findall(item_type_arg)
+            else:
+                item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
+            nested_level = coltype_with_args.lower().count('array')
+            type_instance = nested_array(nested_level)(item_type(*item_type_args))
+            col_kw = {}
+            # NOT NULL
+            col_kw["nullable"] = True
+            if spec.get("notnull", False) == "NOT NULL":
+                col_kw["nullable"] = False
+            # DEFAULT
+            default = spec.get("default", None)
+            if default == "NULL":
+                # eliminates the need to deal with this later.
+                default = None
+            comment = spec.get("comment", None)
+            if comment is not None:
+                comment = cleanup_text(comment)
+            col_d = dict(
+                name=name, type=type_instance, default=default, comment=comment
+            )
+            col_d.update(col_kw)
+            state.columns.append(col_d)
+        else:
+            super()._parse_column(line, state)
     def _parse_constraints(self, line):
         """Parse a CONSTRAINT line."""
         ret = super()._parse_constraints(line)

{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pyobvector
-Version: 0.2.10
+Version: 0.2.12
 Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
 Author: shanhaikang.shk
 Author-email: shanhaikang.shk@oceanbase.com
@@ -36,7 +36,7 @@ poetry install
 - install with pip:
 ```shell
-pip install pyobvector==0.2.10
+pip install pyobvector==0.2.12
 ```
 ## Build Doc

{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-pyobvector/__init__.py,sha256=9URzXBXtF5gEwy97TvZyqH0ICeRdHXE0UNJT1q3x95o,3748
+pyobvector/__init__.py,sha256=q_WyBRa0pVIQZSMbWvgykOJDD7JVDKGnojVDo9CT26E,3772
 pyobvector/client/__init__.py,sha256=fDK2FVdSm3-XCwTqsam7zisic5UMhANUq97r29i27nc,2819
 pyobvector/client/collection_schema.py,sha256=x6cicII8wdsThgzMDA0HlcMYefnLfpdHepnh_Rv8Hr8,5548
 pyobvector/client/enum.py,sha256=3lPjSltITSE694-qOAP4yoX6fzCjKD4WAewmIxFs49o,139
@@ -7,21 +7,22 @@ pyobvector/client/fts_index_param.py,sha256=hMCjA3Aecnt0uQQT6UQGTIIqdPk1M4gX4-zR
 pyobvector/client/index_param.py,sha256=3gXi66Ey1PO9x5_61CrH7DmPb496kviBQI5NT7nfbGc,6309
 pyobvector/client/milvus_like_client.py,sha256=CpPo6mkGE8iNFpKGBFof3h7E1VTzy1DAPGlFM9F_s8g,26373
 pyobvector/client/ob_vec_client.py,sha256=Yt2nG0w4268hg7DE0tqkGaytGsY-jqojX8hGTQjmsKg,29390
-pyobvector/client/ob_vec_json_table_client.py,sha256=6xLQaU8HwhR4H9z26nyzNwDYXtvdCpfwz0ZK8NbPVkg,39034
+pyobvector/client/ob_vec_json_table_client.py,sha256=m0Oq41dXEil9S1YCK2_RGbSMziatqAItn8Osk-9rzJI,39066
 pyobvector/client/partitions.py,sha256=Bxwr5yVNlXwZc7SXBC03NeqL9giy4Fe6S2qZdHD8xGw,15621
-pyobvector/client/schema_type.py,sha256=ICCSriOhk-P7Q1PhK0D0XQMTd3ZDFenCrJMXp6hRQdw,1579
+pyobvector/client/schema_type.py,sha256=u1LJsr1o9lxv2b_6KYu77RciFa1R_Qk69k_WT30x6BU,1582
 pyobvector/json_table/__init__.py,sha256=X5MmK3f10oyJleUUFZJFeunMEfzmf6P1f_7094b-FZc,554
 pyobvector/json_table/json_value_returning_func.py,sha256=NWSV2zhe2-1KhIprQaFqOH3vUVF46YaHIZUqX66WZKM,1864
 pyobvector/json_table/oceanbase_dialect.py,sha256=lxpbWBQdK18LWXLmGyk_-ODv6VfnwGLHbcpsQMElOUo,4480
 pyobvector/json_table/virtual_data_type.py,sha256=uQh6ZQ0UbwpVO9TFegGeu4E8bXW7rdLHAXFQJdiEjLs,3467
-pyobvector/schema/__init__.py,sha256=8eW-N9CClU1yTRTD8wryrfAI84bInuYJufBOWspPi9k,2161
+pyobvector/schema/__init__.py,sha256=EU8NH8Q-L05sFBGKPV6yIBUeh5f3awTkArdBJ7d4CvQ,2271
+pyobvector/schema/array.py,sha256=cFAbayxsJoArPciawFImH835Wnrfm6KeahjBQB7uS44,4787
 pyobvector/schema/dialect.py,sha256=mdRjn3roztCkk6RXbaB0Wn1uhT2BPS2y18MwL6wW-jo,1840
 pyobvector/schema/full_text_index.py,sha256=ohQX8uTPdRswEJONuN5A-bNv203d0N0b2BsJ7etx71g,2071
 pyobvector/schema/geo_srid_point.py,sha256=RwEoCgGTmXDc0le1B2E3mZudtqiFdMf2M0Va1ocmVSY,1210
 pyobvector/schema/gis_func.py,sha256=u7bqaB5qIylW8GvRdglLQL2H1SheQZNnAqgZrOGyrks,3118
 pyobvector/schema/match_against_func.py,sha256=ExTQJvAXHaZwBo1Sjy6IlnF1nF6D9xGUsF4f7zaP8Q0,1336
 pyobvector/schema/ob_table.py,sha256=wlb6Oo9LG-sr8XnG_wbX1Qi5CgnS0XUzNL5qTdsncoY,392
-pyobvector/schema/reflection.py,sha256=0fDQQEwJG1-B2VhmghQORi_1_nTAbxQgc96LXN1-aqs,3241
+pyobvector/schema/reflection.py,sha256=aWJrodN9B2NmCOLoagg_v4b-9ABJGLCEWoDb7CIjTPY,5745
 pyobvector/schema/replace_stmt.py,sha256=FtGLXHz6DwzD0FOZPn1EZgXdbHZu-K9HIHS02rZqYrE,560
 pyobvector/schema/vec_dist_func.py,sha256=4GAWSrhFNDYooBpbBg604wDrByPrewp46Y4VeoDxV7Y,2986
 pyobvector/schema/vector.py,sha256=dFKfPcTOto0jNxVjhvDmJM7Q4wwp6Z-HcZ3K6oZxUMc,1120
@@ -29,7 +30,7 @@ pyobvector/schema/vector_index.py,sha256=aNtrEBUclc4s6QuqCZpu3Hj3OdjyhbWgtLiJzo6
 pyobvector/util/__init__.py,sha256=D9EgRDlcMSDhY3uI__vnCl45Or75dOXMWSval5P5fqs,251
 pyobvector/util/ob_version.py,sha256=ZIySam8q_MCiwctAiAHPB4GdAzGQiXEo1wVkc9IOTDU,1539
 pyobvector/util/vector.py,sha256=xyM-NuOyd78K7P3kinqyWvLIzEbf9c-4TKn_QVF7qgw,2265
-pyobvector-0.2.10.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
-pyobvector-0.2.10.dist-info/METADATA,sha256=Wgm6wNdD8Nkbd_NW4AxoLG5kT0mqIGve6ehjLMJ4UWM,6659
-pyobvector-0.2.10.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-pyobvector-0.2.10.dist-info/RECORD,,
+pyobvector-0.2.12.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
+pyobvector-0.2.12.dist-info/METADATA,sha256=b8nShwi2i0RfIRXdmaF-9zgy3LTt0mpujD3tgtOx9L8,6659
+pyobvector-0.2.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+pyobvector-0.2.12.dist-info/RECORD,,

{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.2
+Generator: poetry-core 2.1.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{pyobvector-0.2.10.dist-info → pyobvector-0.2.12.dist-info}/LICENSE RENAMED Viewed

File without changes

pyobvector 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

pyobvector 0.2.10py3-none-any.whl → 0.2.12py3-none-any.whl