pyobvector 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyobvector/__init__.py CHANGED
@@ -41,6 +41,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
41
41
  """
42
42
  from .client import *
43
43
  from .schema import (
44
+ ARRAY,
44
45
  VECTOR,
45
46
  POINT,
46
47
  VectorIndex,
@@ -67,6 +68,7 @@ __all__ = [
67
68
  "IndexParam",
68
69
  "IndexParams",
69
70
  "DataType",
71
+ "ARRAY",
70
72
  "VECTOR",
71
73
  "POINT",
72
74
  "VectorIndex",
@@ -26,8 +26,8 @@ from ..json_table import (
26
26
  logger = logging.getLogger(__name__)
27
27
  logger.setLevel(logging.DEBUG)
28
28
 
29
- JSON_TABLE_META_TABLE_NAME = "_meta_json_t"
30
- JSON_TABLE_DATA_TABLE_NAME = "_data_json_t"
29
+ JSON_TABLE_META_TABLE_NAME = "meta_json_t"
30
+ JSON_TABLE_DATA_TABLE_NAME = "data_json_t"
31
31
 
32
32
  class ObVecJsonTableClient(ObVecClient):
33
33
  """OceanBase Vector Store Client with JSON Table."""
@@ -55,7 +55,7 @@ class ObVecJsonTableClient(ObVecClient):
55
55
  jdata_id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
56
56
  jdata = Column(JSON)
57
57
 
58
- class JsonTableMetadata:
58
+ class JsonTableMetadata:
59
59
  def __init__(self, user_id: str):
60
60
  self.user_id = user_id
61
61
  self.meta_cache: Dict[str, List] = {}
@@ -200,7 +200,7 @@ class ObVecJsonTableClient(ObVecClient):
200
200
  return "INT"
201
201
  if datatype == exp.DataType.Type.TINYINT:
202
202
  return "TINYINT"
203
- if datatype == exp.DataType.Type.TIMESTAMP:
203
+ if datatype in (exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPTZ):
204
204
  return "TIMESTAMP"
205
205
  if datatype == exp.DataType.Type.VARCHAR:
206
206
  return "VARCHAR"
@@ -11,7 +11,7 @@ from sqlalchemy import (
11
11
  )
12
12
  from sqlalchemy.dialects.mysql import LONGTEXT
13
13
  from .enum import IntEnum
14
- from ..schema import VECTOR
14
+ from ..schema import ARRAY, VECTOR
15
15
 
16
16
 
17
17
  class DataType(IntEnum):
@@ -60,8 +60,8 @@ def convert_datatype_to_sqltype(datatype: DataType):
60
60
  return LONGTEXT
61
61
  if datatype == DataType.VARCHAR:
62
62
  return String
63
- # if datatype == DataType.ARRAY:
64
- # return ARRAY
63
+ if datatype == DataType.ARRAY:
64
+ return ARRAY
65
65
  if datatype == DataType.JSON:
66
66
  return JSON
67
67
  if datatype == DataType.FLOAT_VECTOR:
@@ -1,5 +1,6 @@
1
1
  """A extension for SQLAlchemy for vector storage related schema definition.
2
2
 
3
+ * ARRAY An extended data type in SQLAlchemy for ObVecClient
3
4
  * VECTOR An extended data type in SQLAlchemy for ObVecClient
4
5
  * VectorIndex An extended index type in SQLAlchemy for ObVecClient
5
6
  * CreateVectorIndex Vector Index Creation statement clause
@@ -18,6 +19,7 @@
18
19
  * CreateFtsIndex Full Text Search Index Creation statement clause
19
20
  * MatchAgainst Full Text Search clause
20
21
  """
22
+ from .array import ARRAY
21
23
  from .vector import VECTOR
22
24
  from .geo_srid_point import POINT
23
25
  from .vector_index import VectorIndex, CreateVectorIndex
@@ -30,6 +32,7 @@ from .full_text_index import FtsIndex, CreateFtsIndex
30
32
  from .match_against_func import MatchAgainst
31
33
 
32
34
  __all__ = [
35
+ "ARRAY",
33
36
  "VECTOR",
34
37
  "POINT",
35
38
  "VectorIndex",
@@ -0,0 +1,142 @@
1
+ """ARRAY: An extended data type for SQLAlchemy"""
2
+ import json
3
+ from typing import Any, List, Optional, Sequence, Union, Type
4
+
5
+ from sqlalchemy.sql.type_api import TypeEngine
6
+ from sqlalchemy.types import UserDefinedType, String
7
+
8
+
9
+ class ARRAY(UserDefinedType):
10
+ """ARRAY data type definition with support for up to 6 levels of nesting."""
11
+ cache_ok = True
12
+ _string = String()
13
+ _max_nesting_level = 6
14
+
15
+ def __init__(self, item_type: Union[TypeEngine, type]):
16
+ """Construct an ARRAY.
17
+
18
+ Args:
19
+ item_type: The data type of items in this array. For nested arrays,
20
+ pass another ARRAY type.
21
+
22
+ Raises:
23
+ ValueError: If nesting level exceeds the maximum allowed level (6).
24
+ """
25
+ super(UserDefinedType, self).__init__()
26
+ if isinstance(item_type, type):
27
+ item_type = item_type()
28
+ self.item_type = item_type
29
+ self._validate_nesting_level()
30
+
31
+ def _validate_nesting_level(self):
32
+ """Validate that the nesting level does not exceed the maximum allowed level."""
33
+ level = 1
34
+ current_type = self.item_type
35
+ while isinstance(current_type, ARRAY):
36
+ level += 1
37
+ if level > self._max_nesting_level:
38
+ raise ValueError(f"Maximum nesting level of {self._max_nesting_level} exceeded")
39
+ current_type = current_type.item_type
40
+
41
+ def get_col_spec(self, **kw): # pylint: disable=unused-argument
42
+ """Parse to array data type definition in text SQL."""
43
+ if hasattr(self.item_type, 'get_col_spec'):
44
+ base_type = self.item_type.get_col_spec(**kw)
45
+ else:
46
+ base_type = str(self.item_type)
47
+ return f"ARRAY({base_type})"
48
+
49
+ def bind_processor(self, dialect):
50
+ item_proc = self.item_type.dialect_impl(dialect).bind_processor(dialect)
51
+
52
+ def process(value: Optional[Sequence[Any]]) -> Optional[str]:
53
+ if value is None:
54
+ return None
55
+
56
+ def convert(val):
57
+ if isinstance(val, (list, tuple)):
58
+ return [convert(v) for v in val]
59
+ if item_proc:
60
+ return item_proc(val)
61
+ return val
62
+
63
+ processed = convert(value)
64
+ return json.dumps(processed)
65
+
66
+ return process
67
+
68
+ def result_processor(self, dialect, coltype):
69
+ item_proc = self.item_type.dialect_impl(dialect).result_processor(dialect, coltype)
70
+
71
+ def process(value: Optional[str]) -> Optional[List[Any]]:
72
+ if value is None:
73
+ return None
74
+
75
+ def convert(val):
76
+ if isinstance(val, (list, tuple)):
77
+ return [convert(v) for v in val]
78
+ if item_proc:
79
+ return item_proc(val)
80
+ return val
81
+
82
+ value = json.loads(value) if isinstance(value, str) else value
83
+ return convert(value)
84
+
85
+ return process
86
+
87
+ def literal_processor(self, dialect):
88
+ item_proc = self.item_type.dialect_impl(dialect).literal_processor(dialect)
89
+
90
+ def process(value: Sequence[Any]) -> str:
91
+ def convert(val):
92
+ if isinstance(val, (list, tuple)):
93
+ return [convert(v) for v in val]
94
+ if item_proc:
95
+ return item_proc(val)
96
+ return val
97
+
98
+ processed = convert(value)
99
+ return json.dumps(processed)
100
+
101
+ return process
102
+
103
+ def __repr__(self):
104
+ """Return a string representation of the array type."""
105
+ current_type = self.item_type
106
+ nesting_level = 1
107
+ base_type = current_type
108
+
109
+ # Find the innermost type and count nesting level
110
+ while isinstance(current_type, ARRAY):
111
+ nesting_level += 1
112
+ current_type = current_type.item_type
113
+ if not isinstance(current_type, ARRAY):
114
+ base_type = current_type
115
+
116
+ return f"{nesting_level}D_Array({base_type})"
117
+
118
+
119
+ def nested_array(dim: int) -> Type[ARRAY]:
120
+ """Create a nested array type class with specified dimensions.
121
+
122
+ Args:
123
+ dim: The number of dimensions for the array type (1-6)
124
+
125
+ Returns:
126
+ A class type that can be instantiated with an item_type to create a nested array
127
+
128
+ Raises:
129
+ ValueError: If dim is not between 1 and 6
130
+ """
131
+ if not 1 <= dim <= 6:
132
+ raise ValueError("Dimension must be between 1 and 6")
133
+
134
+ class ArrayType(ARRAY):
135
+ def __init__(self, item_type: Union[TypeEngine, type]):
136
+ nested_type = item_type
137
+ for _ in range(dim - 1):
138
+ nested_type = ARRAY(nested_type)
139
+ super().__init__(nested_type)
140
+
141
+ ArrayType.__name__ = f"{dim}D_Array"
142
+ return ArrayType
@@ -1,7 +1,9 @@
1
1
  """OceanBase table definition reflection."""
2
2
  import re
3
3
  import logging
4
- from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile
4
+ from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
5
+
6
+ from pyobvector.schema.array import nested_array
5
7
 
6
8
  logger = logging.getLogger(__name__)
7
9
 
@@ -31,6 +33,16 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
31
33
  )
32
34
  ### end of block
33
35
 
36
+ self._re_array_column = _re_compile(
37
+ r"\s*"
38
+ r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s\s+"
39
+ r"(?P<coltype_with_args>(?i:(?<!\w)array(?!\w))\s*\([^()]*(?:\([^()]*\)[^()]*)*\))"
40
+ r"(?:\s+(?P<notnull>(?:NOT\s+)?NULL))?"
41
+ r"(?:\s+DEFAULT\s+(?P<default>(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+)))?"
42
+ r"(?:\s+COMMENT\s+'(?P<comment>(?:''|[^'])*)')?"
43
+ r"\s*,?\s*$" % quotes
44
+ )
45
+
34
46
  self._re_key = _re_compile(
35
47
  r" "
36
48
  r"(?:(FULLTEXT|SPATIAL|VECTOR|(?P<type>\S+)) )?KEY"
@@ -64,6 +76,59 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
64
76
  )
65
77
  )
66
78
 
79
+ def _parse_column(self, line, state):
80
+ m = self._re_array_column.match(line)
81
+ if m:
82
+ spec = m.groupdict()
83
+ name, coltype_with_args = spec["name"].strip(), spec["coltype_with_args"].strip()
84
+
85
+ item_pattern = re.compile(
86
+ r"^(?:array\s*\()*([\w]+)(?:\(([\d,]+)\))?\)*$",
87
+ re.IGNORECASE
88
+ )
89
+ item_m = item_pattern.match(coltype_with_args)
90
+ if not item_m:
91
+ raise ValueError(f"Failed to find inner type from array column definition: {line}")
92
+
93
+ item_type = self.dialect.ischema_names[item_m.group(1).lower()]
94
+ item_type_arg = item_m.group(2)
95
+ if item_type_arg is None or item_type_arg == "":
96
+ item_type_args = []
97
+ elif item_type_arg[0] == "'" and item_type_arg[-1] == "'":
98
+ item_type_args = self._re_csv_str.findall(item_type_arg)
99
+ else:
100
+ item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
101
+
102
+ nested_level = coltype_with_args.lower().count('array')
103
+ type_instance = nested_array(nested_level)(item_type(*item_type_args))
104
+
105
+ col_kw = {}
106
+
107
+ # NOT NULL
108
+ col_kw["nullable"] = True
109
+ if spec.get("notnull", False) == "NOT NULL":
110
+ col_kw["nullable"] = False
111
+
112
+ # DEFAULT
113
+ default = spec.get("default", None)
114
+
115
+ if default == "NULL":
116
+ # eliminates the need to deal with this later.
117
+ default = None
118
+
119
+ comment = spec.get("comment", None)
120
+
121
+ if comment is not None:
122
+ comment = cleanup_text(comment)
123
+
124
+ col_d = dict(
125
+ name=name, type=type_instance, default=default, comment=comment
126
+ )
127
+ col_d.update(col_kw)
128
+ state.columns.append(col_d)
129
+ else:
130
+ super()._parse_column(line, state)
131
+
67
132
  def _parse_constraints(self, line):
68
133
  """Parse a CONSTRAINT line."""
69
134
  ret = super()._parse_constraints(line)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pyobvector
3
- Version: 0.2.10
3
+ Version: 0.2.12
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  Author: shanhaikang.shk
6
6
  Author-email: shanhaikang.shk@oceanbase.com
@@ -36,7 +36,7 @@ poetry install
36
36
  - install with pip:
37
37
 
38
38
  ```shell
39
- pip install pyobvector==0.2.10
39
+ pip install pyobvector==0.2.12
40
40
  ```
41
41
 
42
42
  ## Build Doc
@@ -1,4 +1,4 @@
1
- pyobvector/__init__.py,sha256=9URzXBXtF5gEwy97TvZyqH0ICeRdHXE0UNJT1q3x95o,3748
1
+ pyobvector/__init__.py,sha256=q_WyBRa0pVIQZSMbWvgykOJDD7JVDKGnojVDo9CT26E,3772
2
2
  pyobvector/client/__init__.py,sha256=fDK2FVdSm3-XCwTqsam7zisic5UMhANUq97r29i27nc,2819
3
3
  pyobvector/client/collection_schema.py,sha256=x6cicII8wdsThgzMDA0HlcMYefnLfpdHepnh_Rv8Hr8,5548
4
4
  pyobvector/client/enum.py,sha256=3lPjSltITSE694-qOAP4yoX6fzCjKD4WAewmIxFs49o,139
@@ -7,21 +7,22 @@ pyobvector/client/fts_index_param.py,sha256=hMCjA3Aecnt0uQQT6UQGTIIqdPk1M4gX4-zR
7
7
  pyobvector/client/index_param.py,sha256=3gXi66Ey1PO9x5_61CrH7DmPb496kviBQI5NT7nfbGc,6309
8
8
  pyobvector/client/milvus_like_client.py,sha256=CpPo6mkGE8iNFpKGBFof3h7E1VTzy1DAPGlFM9F_s8g,26373
9
9
  pyobvector/client/ob_vec_client.py,sha256=Yt2nG0w4268hg7DE0tqkGaytGsY-jqojX8hGTQjmsKg,29390
10
- pyobvector/client/ob_vec_json_table_client.py,sha256=6xLQaU8HwhR4H9z26nyzNwDYXtvdCpfwz0ZK8NbPVkg,39034
10
+ pyobvector/client/ob_vec_json_table_client.py,sha256=m0Oq41dXEil9S1YCK2_RGbSMziatqAItn8Osk-9rzJI,39066
11
11
  pyobvector/client/partitions.py,sha256=Bxwr5yVNlXwZc7SXBC03NeqL9giy4Fe6S2qZdHD8xGw,15621
12
- pyobvector/client/schema_type.py,sha256=ICCSriOhk-P7Q1PhK0D0XQMTd3ZDFenCrJMXp6hRQdw,1579
12
+ pyobvector/client/schema_type.py,sha256=u1LJsr1o9lxv2b_6KYu77RciFa1R_Qk69k_WT30x6BU,1582
13
13
  pyobvector/json_table/__init__.py,sha256=X5MmK3f10oyJleUUFZJFeunMEfzmf6P1f_7094b-FZc,554
14
14
  pyobvector/json_table/json_value_returning_func.py,sha256=NWSV2zhe2-1KhIprQaFqOH3vUVF46YaHIZUqX66WZKM,1864
15
15
  pyobvector/json_table/oceanbase_dialect.py,sha256=lxpbWBQdK18LWXLmGyk_-ODv6VfnwGLHbcpsQMElOUo,4480
16
16
  pyobvector/json_table/virtual_data_type.py,sha256=uQh6ZQ0UbwpVO9TFegGeu4E8bXW7rdLHAXFQJdiEjLs,3467
17
- pyobvector/schema/__init__.py,sha256=8eW-N9CClU1yTRTD8wryrfAI84bInuYJufBOWspPi9k,2161
17
+ pyobvector/schema/__init__.py,sha256=EU8NH8Q-L05sFBGKPV6yIBUeh5f3awTkArdBJ7d4CvQ,2271
18
+ pyobvector/schema/array.py,sha256=cFAbayxsJoArPciawFImH835Wnrfm6KeahjBQB7uS44,4787
18
19
  pyobvector/schema/dialect.py,sha256=mdRjn3roztCkk6RXbaB0Wn1uhT2BPS2y18MwL6wW-jo,1840
19
20
  pyobvector/schema/full_text_index.py,sha256=ohQX8uTPdRswEJONuN5A-bNv203d0N0b2BsJ7etx71g,2071
20
21
  pyobvector/schema/geo_srid_point.py,sha256=RwEoCgGTmXDc0le1B2E3mZudtqiFdMf2M0Va1ocmVSY,1210
21
22
  pyobvector/schema/gis_func.py,sha256=u7bqaB5qIylW8GvRdglLQL2H1SheQZNnAqgZrOGyrks,3118
22
23
  pyobvector/schema/match_against_func.py,sha256=ExTQJvAXHaZwBo1Sjy6IlnF1nF6D9xGUsF4f7zaP8Q0,1336
23
24
  pyobvector/schema/ob_table.py,sha256=wlb6Oo9LG-sr8XnG_wbX1Qi5CgnS0XUzNL5qTdsncoY,392
24
- pyobvector/schema/reflection.py,sha256=0fDQQEwJG1-B2VhmghQORi_1_nTAbxQgc96LXN1-aqs,3241
25
+ pyobvector/schema/reflection.py,sha256=aWJrodN9B2NmCOLoagg_v4b-9ABJGLCEWoDb7CIjTPY,5745
25
26
  pyobvector/schema/replace_stmt.py,sha256=FtGLXHz6DwzD0FOZPn1EZgXdbHZu-K9HIHS02rZqYrE,560
26
27
  pyobvector/schema/vec_dist_func.py,sha256=4GAWSrhFNDYooBpbBg604wDrByPrewp46Y4VeoDxV7Y,2986
27
28
  pyobvector/schema/vector.py,sha256=dFKfPcTOto0jNxVjhvDmJM7Q4wwp6Z-HcZ3K6oZxUMc,1120
@@ -29,7 +30,7 @@ pyobvector/schema/vector_index.py,sha256=aNtrEBUclc4s6QuqCZpu3Hj3OdjyhbWgtLiJzo6
29
30
  pyobvector/util/__init__.py,sha256=D9EgRDlcMSDhY3uI__vnCl45Or75dOXMWSval5P5fqs,251
30
31
  pyobvector/util/ob_version.py,sha256=ZIySam8q_MCiwctAiAHPB4GdAzGQiXEo1wVkc9IOTDU,1539
31
32
  pyobvector/util/vector.py,sha256=xyM-NuOyd78K7P3kinqyWvLIzEbf9c-4TKn_QVF7qgw,2265
32
- pyobvector-0.2.10.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
33
- pyobvector-0.2.10.dist-info/METADATA,sha256=Wgm6wNdD8Nkbd_NW4AxoLG5kT0mqIGve6ehjLMJ4UWM,6659
34
- pyobvector-0.2.10.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
35
- pyobvector-0.2.10.dist-info/RECORD,,
33
+ pyobvector-0.2.12.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
34
+ pyobvector-0.2.12.dist-info/METADATA,sha256=b8nShwi2i0RfIRXdmaF-9zgy3LTt0mpujD3tgtOx9L8,6659
35
+ pyobvector-0.2.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
36
+ pyobvector-0.2.12.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any