pyobvector 0.2.10__tar.gz → 0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.10 → pyobvector-0.2.12}/PKG-INFO +2 -2
- {pyobvector-0.2.10 → pyobvector-0.2.12}/README.md +1 -1
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/__init__.py +2 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/ob_vec_json_table_client.py +4 -4
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/schema_type.py +3 -3
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/__init__.py +3 -0
- pyobvector-0.2.12/pyobvector/schema/array.py +142 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/reflection.py +66 -1
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyproject.toml +1 -1
- {pyobvector-0.2.10 → pyobvector-0.2.12}/LICENSE +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/__init__.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/collection_schema.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/enum.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/exceptions.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/fts_index_param.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/index_param.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/milvus_like_client.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/ob_vec_client.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/client/partitions.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/json_table/__init__.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/json_table/json_value_returning_func.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/json_table/oceanbase_dialect.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/json_table/virtual_data_type.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/dialect.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/full_text_index.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/geo_srid_point.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/gis_func.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/match_against_func.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/ob_table.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/replace_stmt.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/vec_dist_func.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/vector.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/schema/vector_index.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/util/__init__.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/util/ob_version.py +0 -0
- {pyobvector-0.2.10 → pyobvector-0.2.12}/pyobvector/util/vector.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pyobvector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.12
|
|
4
4
|
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
5
|
Author: shanhaikang.shk
|
|
6
6
|
Author-email: shanhaikang.shk@oceanbase.com
|
|
@@ -36,7 +36,7 @@ poetry install
|
|
|
36
36
|
- install with pip:
|
|
37
37
|
|
|
38
38
|
```shell
|
|
39
|
-
pip install pyobvector==0.2.
|
|
39
|
+
pip install pyobvector==0.2.12
|
|
40
40
|
```
|
|
41
41
|
|
|
42
42
|
## Build Doc
|
|
@@ -41,6 +41,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
41
41
|
"""
|
|
42
42
|
from .client import *
|
|
43
43
|
from .schema import (
|
|
44
|
+
ARRAY,
|
|
44
45
|
VECTOR,
|
|
45
46
|
POINT,
|
|
46
47
|
VectorIndex,
|
|
@@ -67,6 +68,7 @@ __all__ = [
|
|
|
67
68
|
"IndexParam",
|
|
68
69
|
"IndexParams",
|
|
69
70
|
"DataType",
|
|
71
|
+
"ARRAY",
|
|
70
72
|
"VECTOR",
|
|
71
73
|
"POINT",
|
|
72
74
|
"VectorIndex",
|
|
@@ -26,8 +26,8 @@ from ..json_table import (
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
logger.setLevel(logging.DEBUG)
|
|
28
28
|
|
|
29
|
-
JSON_TABLE_META_TABLE_NAME = "
|
|
30
|
-
JSON_TABLE_DATA_TABLE_NAME = "
|
|
29
|
+
JSON_TABLE_META_TABLE_NAME = "meta_json_t"
|
|
30
|
+
JSON_TABLE_DATA_TABLE_NAME = "data_json_t"
|
|
31
31
|
|
|
32
32
|
class ObVecJsonTableClient(ObVecClient):
|
|
33
33
|
"""OceanBase Vector Store Client with JSON Table."""
|
|
@@ -55,7 +55,7 @@ class ObVecJsonTableClient(ObVecClient):
|
|
|
55
55
|
jdata_id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
|
|
56
56
|
jdata = Column(JSON)
|
|
57
57
|
|
|
58
|
-
class JsonTableMetadata:
|
|
58
|
+
class JsonTableMetadata:
|
|
59
59
|
def __init__(self, user_id: str):
|
|
60
60
|
self.user_id = user_id
|
|
61
61
|
self.meta_cache: Dict[str, List] = {}
|
|
@@ -200,7 +200,7 @@ class ObVecJsonTableClient(ObVecClient):
|
|
|
200
200
|
return "INT"
|
|
201
201
|
if datatype == exp.DataType.Type.TINYINT:
|
|
202
202
|
return "TINYINT"
|
|
203
|
-
if datatype
|
|
203
|
+
if datatype in (exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPTZ):
|
|
204
204
|
return "TIMESTAMP"
|
|
205
205
|
if datatype == exp.DataType.Type.VARCHAR:
|
|
206
206
|
return "VARCHAR"
|
|
@@ -11,7 +11,7 @@ from sqlalchemy import (
|
|
|
11
11
|
)
|
|
12
12
|
from sqlalchemy.dialects.mysql import LONGTEXT
|
|
13
13
|
from .enum import IntEnum
|
|
14
|
-
from ..schema import VECTOR
|
|
14
|
+
from ..schema import ARRAY, VECTOR
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class DataType(IntEnum):
|
|
@@ -60,8 +60,8 @@ def convert_datatype_to_sqltype(datatype: DataType):
|
|
|
60
60
|
return LONGTEXT
|
|
61
61
|
if datatype == DataType.VARCHAR:
|
|
62
62
|
return String
|
|
63
|
-
|
|
64
|
-
|
|
63
|
+
if datatype == DataType.ARRAY:
|
|
64
|
+
return ARRAY
|
|
65
65
|
if datatype == DataType.JSON:
|
|
66
66
|
return JSON
|
|
67
67
|
if datatype == DataType.FLOAT_VECTOR:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""A extension for SQLAlchemy for vector storage related schema definition.
|
|
2
2
|
|
|
3
|
+
* ARRAY An extended data type in SQLAlchemy for ObVecClient
|
|
3
4
|
* VECTOR An extended data type in SQLAlchemy for ObVecClient
|
|
4
5
|
* VectorIndex An extended index type in SQLAlchemy for ObVecClient
|
|
5
6
|
* CreateVectorIndex Vector Index Creation statement clause
|
|
@@ -18,6 +19,7 @@
|
|
|
18
19
|
* CreateFtsIndex Full Text Search Index Creation statement clause
|
|
19
20
|
* MatchAgainst Full Text Search clause
|
|
20
21
|
"""
|
|
22
|
+
from .array import ARRAY
|
|
21
23
|
from .vector import VECTOR
|
|
22
24
|
from .geo_srid_point import POINT
|
|
23
25
|
from .vector_index import VectorIndex, CreateVectorIndex
|
|
@@ -30,6 +32,7 @@ from .full_text_index import FtsIndex, CreateFtsIndex
|
|
|
30
32
|
from .match_against_func import MatchAgainst
|
|
31
33
|
|
|
32
34
|
__all__ = [
|
|
35
|
+
"ARRAY",
|
|
33
36
|
"VECTOR",
|
|
34
37
|
"POINT",
|
|
35
38
|
"VectorIndex",
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""ARRAY: An extended data type for SQLAlchemy"""
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, List, Optional, Sequence, Union, Type
|
|
4
|
+
|
|
5
|
+
from sqlalchemy.sql.type_api import TypeEngine
|
|
6
|
+
from sqlalchemy.types import UserDefinedType, String
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ARRAY(UserDefinedType):
|
|
10
|
+
"""ARRAY data type definition with support for up to 6 levels of nesting."""
|
|
11
|
+
cache_ok = True
|
|
12
|
+
_string = String()
|
|
13
|
+
_max_nesting_level = 6
|
|
14
|
+
|
|
15
|
+
def __init__(self, item_type: Union[TypeEngine, type]):
|
|
16
|
+
"""Construct an ARRAY.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
item_type: The data type of items in this array. For nested arrays,
|
|
20
|
+
pass another ARRAY type.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ValueError: If nesting level exceeds the maximum allowed level (6).
|
|
24
|
+
"""
|
|
25
|
+
super(UserDefinedType, self).__init__()
|
|
26
|
+
if isinstance(item_type, type):
|
|
27
|
+
item_type = item_type()
|
|
28
|
+
self.item_type = item_type
|
|
29
|
+
self._validate_nesting_level()
|
|
30
|
+
|
|
31
|
+
def _validate_nesting_level(self):
|
|
32
|
+
"""Validate that the nesting level does not exceed the maximum allowed level."""
|
|
33
|
+
level = 1
|
|
34
|
+
current_type = self.item_type
|
|
35
|
+
while isinstance(current_type, ARRAY):
|
|
36
|
+
level += 1
|
|
37
|
+
if level > self._max_nesting_level:
|
|
38
|
+
raise ValueError(f"Maximum nesting level of {self._max_nesting_level} exceeded")
|
|
39
|
+
current_type = current_type.item_type
|
|
40
|
+
|
|
41
|
+
def get_col_spec(self, **kw): # pylint: disable=unused-argument
|
|
42
|
+
"""Parse to array data type definition in text SQL."""
|
|
43
|
+
if hasattr(self.item_type, 'get_col_spec'):
|
|
44
|
+
base_type = self.item_type.get_col_spec(**kw)
|
|
45
|
+
else:
|
|
46
|
+
base_type = str(self.item_type)
|
|
47
|
+
return f"ARRAY({base_type})"
|
|
48
|
+
|
|
49
|
+
def bind_processor(self, dialect):
|
|
50
|
+
item_proc = self.item_type.dialect_impl(dialect).bind_processor(dialect)
|
|
51
|
+
|
|
52
|
+
def process(value: Optional[Sequence[Any]]) -> Optional[str]:
|
|
53
|
+
if value is None:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
def convert(val):
|
|
57
|
+
if isinstance(val, (list, tuple)):
|
|
58
|
+
return [convert(v) for v in val]
|
|
59
|
+
if item_proc:
|
|
60
|
+
return item_proc(val)
|
|
61
|
+
return val
|
|
62
|
+
|
|
63
|
+
processed = convert(value)
|
|
64
|
+
return json.dumps(processed)
|
|
65
|
+
|
|
66
|
+
return process
|
|
67
|
+
|
|
68
|
+
def result_processor(self, dialect, coltype):
|
|
69
|
+
item_proc = self.item_type.dialect_impl(dialect).result_processor(dialect, coltype)
|
|
70
|
+
|
|
71
|
+
def process(value: Optional[str]) -> Optional[List[Any]]:
|
|
72
|
+
if value is None:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def convert(val):
|
|
76
|
+
if isinstance(val, (list, tuple)):
|
|
77
|
+
return [convert(v) for v in val]
|
|
78
|
+
if item_proc:
|
|
79
|
+
return item_proc(val)
|
|
80
|
+
return val
|
|
81
|
+
|
|
82
|
+
value = json.loads(value) if isinstance(value, str) else value
|
|
83
|
+
return convert(value)
|
|
84
|
+
|
|
85
|
+
return process
|
|
86
|
+
|
|
87
|
+
def literal_processor(self, dialect):
|
|
88
|
+
item_proc = self.item_type.dialect_impl(dialect).literal_processor(dialect)
|
|
89
|
+
|
|
90
|
+
def process(value: Sequence[Any]) -> str:
|
|
91
|
+
def convert(val):
|
|
92
|
+
if isinstance(val, (list, tuple)):
|
|
93
|
+
return [convert(v) for v in val]
|
|
94
|
+
if item_proc:
|
|
95
|
+
return item_proc(val)
|
|
96
|
+
return val
|
|
97
|
+
|
|
98
|
+
processed = convert(value)
|
|
99
|
+
return json.dumps(processed)
|
|
100
|
+
|
|
101
|
+
return process
|
|
102
|
+
|
|
103
|
+
def __repr__(self):
|
|
104
|
+
"""Return a string representation of the array type."""
|
|
105
|
+
current_type = self.item_type
|
|
106
|
+
nesting_level = 1
|
|
107
|
+
base_type = current_type
|
|
108
|
+
|
|
109
|
+
# Find the innermost type and count nesting level
|
|
110
|
+
while isinstance(current_type, ARRAY):
|
|
111
|
+
nesting_level += 1
|
|
112
|
+
current_type = current_type.item_type
|
|
113
|
+
if not isinstance(current_type, ARRAY):
|
|
114
|
+
base_type = current_type
|
|
115
|
+
|
|
116
|
+
return f"{nesting_level}D_Array({base_type})"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def nested_array(dim: int) -> Type[ARRAY]:
|
|
120
|
+
"""Create a nested array type class with specified dimensions.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
dim: The number of dimensions for the array type (1-6)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
A class type that can be instantiated with an item_type to create a nested array
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
ValueError: If dim is not between 1 and 6
|
|
130
|
+
"""
|
|
131
|
+
if not 1 <= dim <= 6:
|
|
132
|
+
raise ValueError("Dimension must be between 1 and 6")
|
|
133
|
+
|
|
134
|
+
class ArrayType(ARRAY):
|
|
135
|
+
def __init__(self, item_type: Union[TypeEngine, type]):
|
|
136
|
+
nested_type = item_type
|
|
137
|
+
for _ in range(dim - 1):
|
|
138
|
+
nested_type = ARRAY(nested_type)
|
|
139
|
+
super().__init__(nested_type)
|
|
140
|
+
|
|
141
|
+
ArrayType.__name__ = f"{dim}D_Array"
|
|
142
|
+
return ArrayType
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""OceanBase table definition reflection."""
|
|
2
2
|
import re
|
|
3
3
|
import logging
|
|
4
|
-
from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile
|
|
4
|
+
from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
|
|
5
|
+
|
|
6
|
+
from pyobvector.schema.array import nested_array
|
|
5
7
|
|
|
6
8
|
logger = logging.getLogger(__name__)
|
|
7
9
|
|
|
@@ -31,6 +33,16 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
|
|
|
31
33
|
)
|
|
32
34
|
### end of block
|
|
33
35
|
|
|
36
|
+
self._re_array_column = _re_compile(
|
|
37
|
+
r"\s*"
|
|
38
|
+
r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s\s+"
|
|
39
|
+
r"(?P<coltype_with_args>(?i:(?<!\w)array(?!\w))\s*\([^()]*(?:\([^()]*\)[^()]*)*\))"
|
|
40
|
+
r"(?:\s+(?P<notnull>(?:NOT\s+)?NULL))?"
|
|
41
|
+
r"(?:\s+DEFAULT\s+(?P<default>(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+)))?"
|
|
42
|
+
r"(?:\s+COMMENT\s+'(?P<comment>(?:''|[^'])*)')?"
|
|
43
|
+
r"\s*,?\s*$" % quotes
|
|
44
|
+
)
|
|
45
|
+
|
|
34
46
|
self._re_key = _re_compile(
|
|
35
47
|
r" "
|
|
36
48
|
r"(?:(FULLTEXT|SPATIAL|VECTOR|(?P<type>\S+)) )?KEY"
|
|
@@ -64,6 +76,59 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
|
|
|
64
76
|
)
|
|
65
77
|
)
|
|
66
78
|
|
|
79
|
+
def _parse_column(self, line, state):
|
|
80
|
+
m = self._re_array_column.match(line)
|
|
81
|
+
if m:
|
|
82
|
+
spec = m.groupdict()
|
|
83
|
+
name, coltype_with_args = spec["name"].strip(), spec["coltype_with_args"].strip()
|
|
84
|
+
|
|
85
|
+
item_pattern = re.compile(
|
|
86
|
+
r"^(?:array\s*\()*([\w]+)(?:\(([\d,]+)\))?\)*$",
|
|
87
|
+
re.IGNORECASE
|
|
88
|
+
)
|
|
89
|
+
item_m = item_pattern.match(coltype_with_args)
|
|
90
|
+
if not item_m:
|
|
91
|
+
raise ValueError(f"Failed to find inner type from array column definition: {line}")
|
|
92
|
+
|
|
93
|
+
item_type = self.dialect.ischema_names[item_m.group(1).lower()]
|
|
94
|
+
item_type_arg = item_m.group(2)
|
|
95
|
+
if item_type_arg is None or item_type_arg == "":
|
|
96
|
+
item_type_args = []
|
|
97
|
+
elif item_type_arg[0] == "'" and item_type_arg[-1] == "'":
|
|
98
|
+
item_type_args = self._re_csv_str.findall(item_type_arg)
|
|
99
|
+
else:
|
|
100
|
+
item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
|
|
101
|
+
|
|
102
|
+
nested_level = coltype_with_args.lower().count('array')
|
|
103
|
+
type_instance = nested_array(nested_level)(item_type(*item_type_args))
|
|
104
|
+
|
|
105
|
+
col_kw = {}
|
|
106
|
+
|
|
107
|
+
# NOT NULL
|
|
108
|
+
col_kw["nullable"] = True
|
|
109
|
+
if spec.get("notnull", False) == "NOT NULL":
|
|
110
|
+
col_kw["nullable"] = False
|
|
111
|
+
|
|
112
|
+
# DEFAULT
|
|
113
|
+
default = spec.get("default", None)
|
|
114
|
+
|
|
115
|
+
if default == "NULL":
|
|
116
|
+
# eliminates the need to deal with this later.
|
|
117
|
+
default = None
|
|
118
|
+
|
|
119
|
+
comment = spec.get("comment", None)
|
|
120
|
+
|
|
121
|
+
if comment is not None:
|
|
122
|
+
comment = cleanup_text(comment)
|
|
123
|
+
|
|
124
|
+
col_d = dict(
|
|
125
|
+
name=name, type=type_instance, default=default, comment=comment
|
|
126
|
+
)
|
|
127
|
+
col_d.update(col_kw)
|
|
128
|
+
state.columns.append(col_d)
|
|
129
|
+
else:
|
|
130
|
+
super()._parse_column(line, state)
|
|
131
|
+
|
|
67
132
|
def _parse_constraints(self, line):
|
|
68
133
|
"""Parse a CONSTRAINT line."""
|
|
69
134
|
ret = super()._parse_constraints(line)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pyobvector"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.12"
|
|
4
4
|
description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
|
|
5
5
|
authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
|
|
6
6
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|