pyobvector 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyobvector/__init__.py CHANGED
@@ -41,6 +41,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
41
41
  """
42
42
  from .client import *
43
43
  from .schema import (
44
+ ARRAY,
44
45
  VECTOR,
45
46
  POINT,
46
47
  VectorIndex,
@@ -67,6 +68,7 @@ __all__ = [
67
68
  "IndexParam",
68
69
  "IndexParams",
69
70
  "DataType",
71
+ "ARRAY",
70
72
  "VECTOR",
71
73
  "POINT",
72
74
  "VectorIndex",
@@ -89,6 +89,13 @@ class ObVecClient:
89
89
  message=ExceptionsMessage.ClusterVersionIsLow,
90
90
  )
91
91
 
92
+ def refresh_metadata(self, tables: Optional[list[str]] = None):
93
+ """Reload metadata from the database."""
94
+ if tables is None:
95
+ self.metadata_obj.reflect(bind=self.engine, extend_existing=True)
96
+ else:
97
+ self.metadata_obj.reflect(bind=self.engine, only=tables, extend_existing=True)
98
+
92
99
  def _insert_partition_hint_for_query_sql(self, sql: str, partition_hint: str):
93
100
  from_index = sql.find("FROM")
94
101
  assert from_index != -1
@@ -55,7 +55,7 @@ class ObVecJsonTableClient(ObVecClient):
55
55
  jdata_id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
56
56
  jdata = Column(JSON)
57
57
 
58
- class JsonTableMetadata:
58
+ class JsonTableMetadata:
59
59
  def __init__(self, user_id: str):
60
60
  self.user_id = user_id
61
61
  self.meta_cache: Dict[str, List] = {}
@@ -200,7 +200,7 @@ class ObVecJsonTableClient(ObVecClient):
200
200
  return "INT"
201
201
  if datatype == exp.DataType.Type.TINYINT:
202
202
  return "TINYINT"
203
- if datatype == exp.DataType.Type.TIMESTAMP:
203
+ if datatype in (exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPTZ):
204
204
  return "TIMESTAMP"
205
205
  if datatype == exp.DataType.Type.VARCHAR:
206
206
  return "VARCHAR"
@@ -11,7 +11,7 @@ from sqlalchemy import (
11
11
  )
12
12
  from sqlalchemy.dialects.mysql import LONGTEXT
13
13
  from .enum import IntEnum
14
- from ..schema import VECTOR
14
+ from ..schema import ARRAY, VECTOR
15
15
 
16
16
 
17
17
  class DataType(IntEnum):
@@ -60,8 +60,8 @@ def convert_datatype_to_sqltype(datatype: DataType):
60
60
  return LONGTEXT
61
61
  if datatype == DataType.VARCHAR:
62
62
  return String
63
- # if datatype == DataType.ARRAY:
64
- # return ARRAY
63
+ if datatype == DataType.ARRAY:
64
+ return ARRAY
65
65
  if datatype == DataType.JSON:
66
66
  return JSON
67
67
  if datatype == DataType.FLOAT_VECTOR:
@@ -1,5 +1,6 @@
1
1
  """A extension for SQLAlchemy for vector storage related schema definition.
2
2
 
3
+ * ARRAY An extended data type in SQLAlchemy for ObVecClient
3
4
  * VECTOR An extended data type in SQLAlchemy for ObVecClient
4
5
  * VectorIndex An extended index type in SQLAlchemy for ObVecClient
5
6
  * CreateVectorIndex Vector Index Creation statement clause
@@ -18,6 +19,7 @@
18
19
  * CreateFtsIndex Full Text Search Index Creation statement clause
19
20
  * MatchAgainst Full Text Search clause
20
21
  """
22
+ from .array import ARRAY
21
23
  from .vector import VECTOR
22
24
  from .geo_srid_point import POINT
23
25
  from .vector_index import VectorIndex, CreateVectorIndex
@@ -30,6 +32,7 @@ from .full_text_index import FtsIndex, CreateFtsIndex
30
32
  from .match_against_func import MatchAgainst
31
33
 
32
34
  __all__ = [
35
+ "ARRAY",
33
36
  "VECTOR",
34
37
  "POINT",
35
38
  "VectorIndex",
@@ -0,0 +1,158 @@
1
+ """ARRAY: An extended data type for SQLAlchemy"""
2
+ import json
3
+ from typing import Any, List, Optional, Sequence, Union, Type
4
+
5
+ from sqlalchemy.sql.type_api import TypeEngine
6
+ from sqlalchemy.types import UserDefinedType, String
7
+
8
+
9
+ class ARRAY(UserDefinedType):
10
+ """ARRAY data type definition with support for up to 6 levels of nesting."""
11
+ cache_ok = True
12
+ _string = String()
13
+
14
+ def __init__(self, item_type: Union[TypeEngine, type]):
15
+ """Construct an ARRAY.
16
+
17
+ Args:
18
+ item_type: The data type of items in this array. For nested arrays,
19
+ pass another ARRAY type.
20
+ """
21
+ super(UserDefinedType, self).__init__()
22
+ if isinstance(item_type, type):
23
+ item_type = item_type()
24
+ self.item_type = item_type
25
+ if isinstance(item_type, ARRAY):
26
+ self.dim = item_type.dim + 1
27
+ else:
28
+ self.dim = 1
29
+ if self.dim > 6:
30
+ raise ValueError("Maximum nesting level of 6 exceeded")
31
+
32
+ def get_col_spec(self, **kw): # pylint: disable=unused-argument
33
+ """Parse to array data type definition in text SQL."""
34
+ if hasattr(self.item_type, 'get_col_spec'):
35
+ base_type = self.item_type.get_col_spec(**kw)
36
+ else:
37
+ base_type = str(self.item_type)
38
+ return f"ARRAY({base_type})"
39
+
40
+ def _get_list_depth(self, value: Any) -> int:
41
+ if not isinstance(value, list):
42
+ return 0
43
+ max_depth = 0
44
+ for element in value:
45
+ current_depth = self._get_list_depth(element)
46
+ if current_depth > max_depth:
47
+ max_depth = current_depth
48
+ return 1 + max_depth
49
+
50
+ def _validate_dimension(self, value: list[Any]):
51
+ arr_depth = self._get_list_depth(value)
52
+ assert arr_depth == self.dim, "Array dimension mismatch, expected {}, got {}".format(self.dim, arr_depth)
53
+
54
+ def bind_processor(self, dialect):
55
+ item_type = self.item_type
56
+ while isinstance(item_type, ARRAY):
57
+ item_type = item_type.item_type
58
+
59
+ item_proc = item_type.dialect_impl(dialect).bind_processor(dialect)
60
+
61
+ def process(value: Optional[Sequence[Any] | str]) -> Optional[str]:
62
+ if value is None:
63
+ return None
64
+ if isinstance(value, str):
65
+ self._validate_dimension(json.loads(value))
66
+ return value
67
+
68
+ def convert(val):
69
+ if isinstance(val, (list, tuple)):
70
+ return [convert(v) for v in val]
71
+ if item_proc:
72
+ return item_proc(val)
73
+ return val
74
+
75
+ processed = convert(value)
76
+ self._validate_dimension(processed)
77
+ return json.dumps(processed)
78
+
79
+ return process
80
+
81
+ def result_processor(self, dialect, coltype):
82
+ item_type = self.item_type
83
+ while isinstance(item_type, ARRAY):
84
+ item_type = item_type.item_type
85
+
86
+ item_proc = item_type.dialect_impl(dialect).result_processor(dialect, coltype)
87
+
88
+ def process(value: Optional[str]) -> Optional[List[Any]]:
89
+ if value is None:
90
+ return None
91
+
92
+ def convert(val):
93
+ if isinstance(val, (list, tuple)):
94
+ return [convert(v) for v in val]
95
+ if item_proc:
96
+ return item_proc(val)
97
+ return val
98
+
99
+ value = json.loads(value) if isinstance(value, str) else value
100
+ return convert(value)
101
+
102
+ return process
103
+
104
+ def literal_processor(self, dialect):
105
+ item_type = self.item_type
106
+ while isinstance(item_type, ARRAY):
107
+ item_type = item_type.item_type
108
+
109
+ item_proc = item_type.dialect_impl(dialect).literal_processor(dialect)
110
+
111
+ def process(value: Sequence[Any]) -> str:
112
+ def convert(val):
113
+ if isinstance(val, (list, tuple)):
114
+ return [convert(v) for v in val]
115
+ if item_proc:
116
+ return item_proc(val)
117
+ return val
118
+
119
+ processed = convert(value)
120
+ return json.dumps(processed)
121
+
122
+ return process
123
+
124
+
125
+ def nested_array(dim: int) -> Type[ARRAY]:
126
+ """Create a nested array type class with specified dimensions.
127
+
128
+ Args:
129
+ dim: The number of dimensions for the array type (1-6)
130
+
131
+ Returns:
132
+ A class type that can be instantiated with an item_type to create a nested array
133
+
134
+ Raises:
135
+ ValueError: If dim is not between 1 and 6
136
+ """
137
+ if not 1 <= dim <= 6:
138
+ raise ValueError("Dimension must be between 1 and 6")
139
+
140
+ class NestedArray(ARRAY):
141
+ cache_ok = True
142
+ _string = String()
143
+
144
+ def __init__(self, item_type: Union[TypeEngine, type]):
145
+ super(UserDefinedType, self).__init__()
146
+ if isinstance(item_type, type):
147
+ item_type = item_type()
148
+
149
+ assert not isinstance(item_type, ARRAY), "The item_type of NestedArray should not be an ARRAY type"
150
+
151
+ nested_type = item_type
152
+ for _ in range(dim):
153
+ nested_type = ARRAY(nested_type)
154
+
155
+ self.item_type = nested_type.item_type
156
+ self.dim = dim
157
+
158
+ return NestedArray
@@ -1,7 +1,9 @@
1
1
  """OceanBase table definition reflection."""
2
2
  import re
3
3
  import logging
4
- from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile
4
+ from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
5
+
6
+ from pyobvector.schema.array import nested_array
5
7
 
6
8
  logger = logging.getLogger(__name__)
7
9
 
@@ -31,6 +33,16 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
31
33
  )
32
34
  ### end of block
33
35
 
36
+ self._re_array_column = _re_compile(
37
+ r"\s*"
38
+ r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s\s+"
39
+ r"(?P<coltype_with_args>(?i:(?<!\w)array(?!\w))\s*\([^()]*(?:\([^()]*\)[^()]*)*\))"
40
+ r"(?:\s+(?P<notnull>(?:NOT\s+)?NULL))?"
41
+ r"(?:\s+DEFAULT\s+(?P<default>(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+)))?"
42
+ r"(?:\s+COMMENT\s+'(?P<comment>(?:''|[^'])*)')?"
43
+ r"\s*,?\s*$" % quotes
44
+ )
45
+
34
46
  self._re_key = _re_compile(
35
47
  r" "
36
48
  r"(?:(FULLTEXT|SPATIAL|VECTOR|(?P<type>\S+)) )?KEY"
@@ -64,6 +76,59 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
64
76
  )
65
77
  )
66
78
 
79
+ def _parse_column(self, line, state):
80
+ m = self._re_array_column.match(line)
81
+ if m:
82
+ spec = m.groupdict()
83
+ name, coltype_with_args = spec["name"].strip(), spec["coltype_with_args"].strip()
84
+
85
+ item_pattern = re.compile(
86
+ r"^(?:array\s*\()*([\w]+)(?:\(([\d,]+)\))?\)*$",
87
+ re.IGNORECASE
88
+ )
89
+ item_m = item_pattern.match(coltype_with_args)
90
+ if not item_m:
91
+ raise ValueError(f"Failed to find inner type from array column definition: {line}")
92
+
93
+ item_type = self.dialect.ischema_names[item_m.group(1).lower()]
94
+ item_type_arg = item_m.group(2)
95
+ if item_type_arg is None or item_type_arg == "":
96
+ item_type_args = []
97
+ elif item_type_arg[0] == "'" and item_type_arg[-1] == "'":
98
+ item_type_args = self._re_csv_str.findall(item_type_arg)
99
+ else:
100
+ item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
101
+
102
+ nested_level = coltype_with_args.lower().count('array')
103
+ type_instance = nested_array(nested_level)(item_type(*item_type_args))
104
+
105
+ col_kw = {}
106
+
107
+ # NOT NULL
108
+ col_kw["nullable"] = True
109
+ if spec.get("notnull", False) == "NOT NULL":
110
+ col_kw["nullable"] = False
111
+
112
+ # DEFAULT
113
+ default = spec.get("default", None)
114
+
115
+ if default == "NULL":
116
+ # eliminates the need to deal with this later.
117
+ default = None
118
+
119
+ comment = spec.get("comment", None)
120
+
121
+ if comment is not None:
122
+ comment = cleanup_text(comment)
123
+
124
+ col_d = dict(
125
+ name=name, type=type_instance, default=default, comment=comment
126
+ )
127
+ col_d.update(col_kw)
128
+ state.columns.append(col_d)
129
+ else:
130
+ super()._parse_column(line, state)
131
+
67
132
  def _parse_constraints(self, line):
68
133
  """Parse a CONSTRAINT line."""
69
134
  ret = super()._parse_constraints(line)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pyobvector
3
- Version: 0.2.11
3
+ Version: 0.2.13
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  Author: shanhaikang.shk
6
6
  Author-email: shanhaikang.shk@oceanbase.com
@@ -36,7 +36,7 @@ poetry install
36
36
  - install with pip:
37
37
 
38
38
  ```shell
39
- pip install pyobvector==0.2.11
39
+ pip install pyobvector==0.2.13
40
40
  ```
41
41
 
42
42
  ## Build Doc
@@ -1,4 +1,4 @@
1
- pyobvector/__init__.py,sha256=9URzXBXtF5gEwy97TvZyqH0ICeRdHXE0UNJT1q3x95o,3748
1
+ pyobvector/__init__.py,sha256=q_WyBRa0pVIQZSMbWvgykOJDD7JVDKGnojVDo9CT26E,3772
2
2
  pyobvector/client/__init__.py,sha256=fDK2FVdSm3-XCwTqsam7zisic5UMhANUq97r29i27nc,2819
3
3
  pyobvector/client/collection_schema.py,sha256=x6cicII8wdsThgzMDA0HlcMYefnLfpdHepnh_Rv8Hr8,5548
4
4
  pyobvector/client/enum.py,sha256=3lPjSltITSE694-qOAP4yoX6fzCjKD4WAewmIxFs49o,139
@@ -6,22 +6,23 @@ pyobvector/client/exceptions.py,sha256=CAsTHR9juYleRjYIa4bqk_lw14h8daBvChKoU0o19
6
6
  pyobvector/client/fts_index_param.py,sha256=hMCjA3Aecnt0uQQT6UQGTIIqdPk1M4gX4-zREDQygLs,1139
7
7
  pyobvector/client/index_param.py,sha256=3gXi66Ey1PO9x5_61CrH7DmPb496kviBQI5NT7nfbGc,6309
8
8
  pyobvector/client/milvus_like_client.py,sha256=CpPo6mkGE8iNFpKGBFof3h7E1VTzy1DAPGlFM9F_s8g,26373
9
- pyobvector/client/ob_vec_client.py,sha256=Yt2nG0w4268hg7DE0tqkGaytGsY-jqojX8hGTQjmsKg,29390
10
- pyobvector/client/ob_vec_json_table_client.py,sha256=zUNP7PXVuUwohzM9rjtWks8sGVJVJ3GvyRKtoFDnhgI,39032
9
+ pyobvector/client/ob_vec_client.py,sha256=QZAZxtSgsS2Z8SgEtsSSUcCroaML_XS-cq7aK-sSxic,29718
10
+ pyobvector/client/ob_vec_json_table_client.py,sha256=m0Oq41dXEil9S1YCK2_RGbSMziatqAItn8Osk-9rzJI,39066
11
11
  pyobvector/client/partitions.py,sha256=Bxwr5yVNlXwZc7SXBC03NeqL9giy4Fe6S2qZdHD8xGw,15621
12
- pyobvector/client/schema_type.py,sha256=ICCSriOhk-P7Q1PhK0D0XQMTd3ZDFenCrJMXp6hRQdw,1579
12
+ pyobvector/client/schema_type.py,sha256=u1LJsr1o9lxv2b_6KYu77RciFa1R_Qk69k_WT30x6BU,1582
13
13
  pyobvector/json_table/__init__.py,sha256=X5MmK3f10oyJleUUFZJFeunMEfzmf6P1f_7094b-FZc,554
14
14
  pyobvector/json_table/json_value_returning_func.py,sha256=NWSV2zhe2-1KhIprQaFqOH3vUVF46YaHIZUqX66WZKM,1864
15
15
  pyobvector/json_table/oceanbase_dialect.py,sha256=lxpbWBQdK18LWXLmGyk_-ODv6VfnwGLHbcpsQMElOUo,4480
16
16
  pyobvector/json_table/virtual_data_type.py,sha256=uQh6ZQ0UbwpVO9TFegGeu4E8bXW7rdLHAXFQJdiEjLs,3467
17
- pyobvector/schema/__init__.py,sha256=8eW-N9CClU1yTRTD8wryrfAI84bInuYJufBOWspPi9k,2161
17
+ pyobvector/schema/__init__.py,sha256=EU8NH8Q-L05sFBGKPV6yIBUeh5f3awTkArdBJ7d4CvQ,2271
18
+ pyobvector/schema/array.py,sha256=PoSBc3qCVdMJcLramZp95t69i15ES1J_bqnFANqQoRs,5255
18
19
  pyobvector/schema/dialect.py,sha256=mdRjn3roztCkk6RXbaB0Wn1uhT2BPS2y18MwL6wW-jo,1840
19
20
  pyobvector/schema/full_text_index.py,sha256=ohQX8uTPdRswEJONuN5A-bNv203d0N0b2BsJ7etx71g,2071
20
21
  pyobvector/schema/geo_srid_point.py,sha256=RwEoCgGTmXDc0le1B2E3mZudtqiFdMf2M0Va1ocmVSY,1210
21
22
  pyobvector/schema/gis_func.py,sha256=u7bqaB5qIylW8GvRdglLQL2H1SheQZNnAqgZrOGyrks,3118
22
23
  pyobvector/schema/match_against_func.py,sha256=ExTQJvAXHaZwBo1Sjy6IlnF1nF6D9xGUsF4f7zaP8Q0,1336
23
24
  pyobvector/schema/ob_table.py,sha256=wlb6Oo9LG-sr8XnG_wbX1Qi5CgnS0XUzNL5qTdsncoY,392
24
- pyobvector/schema/reflection.py,sha256=0fDQQEwJG1-B2VhmghQORi_1_nTAbxQgc96LXN1-aqs,3241
25
+ pyobvector/schema/reflection.py,sha256=aWJrodN9B2NmCOLoagg_v4b-9ABJGLCEWoDb7CIjTPY,5745
25
26
  pyobvector/schema/replace_stmt.py,sha256=FtGLXHz6DwzD0FOZPn1EZgXdbHZu-K9HIHS02rZqYrE,560
26
27
  pyobvector/schema/vec_dist_func.py,sha256=4GAWSrhFNDYooBpbBg604wDrByPrewp46Y4VeoDxV7Y,2986
27
28
  pyobvector/schema/vector.py,sha256=dFKfPcTOto0jNxVjhvDmJM7Q4wwp6Z-HcZ3K6oZxUMc,1120
@@ -29,7 +30,7 @@ pyobvector/schema/vector_index.py,sha256=aNtrEBUclc4s6QuqCZpu3Hj3OdjyhbWgtLiJzo6
29
30
  pyobvector/util/__init__.py,sha256=D9EgRDlcMSDhY3uI__vnCl45Or75dOXMWSval5P5fqs,251
30
31
  pyobvector/util/ob_version.py,sha256=ZIySam8q_MCiwctAiAHPB4GdAzGQiXEo1wVkc9IOTDU,1539
31
32
  pyobvector/util/vector.py,sha256=xyM-NuOyd78K7P3kinqyWvLIzEbf9c-4TKn_QVF7qgw,2265
32
- pyobvector-0.2.11.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
33
- pyobvector-0.2.11.dist-info/METADATA,sha256=_wtAgSEx2uHG_ScDgZxKFKcfWs4QFihXOOLmRChHgIM,6659
34
- pyobvector-0.2.11.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
35
- pyobvector-0.2.11.dist-info/RECORD,,
33
+ pyobvector-0.2.13.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
34
+ pyobvector-0.2.13.dist-info/METADATA,sha256=fxRXFWsunoyvBZ3wW5r7FMo8JB6WZbYBj9ijOtF8tnU,6659
35
+ pyobvector-0.2.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
+ pyobvector-0.2.13.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any