pyobvector 0.2.11__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {pyobvector-0.2.11 → pyobvector-0.2.12}/PKG-INFO +2 -2
  2. {pyobvector-0.2.11 → pyobvector-0.2.12}/README.md +1 -1
  3. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/__init__.py +2 -0
  4. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/ob_vec_json_table_client.py +2 -2
  5. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/schema_type.py +3 -3
  6. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/__init__.py +3 -0
  7. pyobvector-0.2.12/pyobvector/schema/array.py +142 -0
  8. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/reflection.py +66 -1
  9. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyproject.toml +1 -1
  10. {pyobvector-0.2.11 → pyobvector-0.2.12}/LICENSE +0 -0
  11. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/__init__.py +0 -0
  12. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/collection_schema.py +0 -0
  13. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/enum.py +0 -0
  14. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/exceptions.py +0 -0
  15. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/fts_index_param.py +0 -0
  16. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/index_param.py +0 -0
  17. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/milvus_like_client.py +0 -0
  18. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/ob_vec_client.py +0 -0
  19. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/client/partitions.py +0 -0
  20. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/json_table/__init__.py +0 -0
  21. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/json_table/json_value_returning_func.py +0 -0
  22. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/json_table/oceanbase_dialect.py +0 -0
  23. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/json_table/virtual_data_type.py +0 -0
  24. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/dialect.py +0 -0
  25. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/full_text_index.py +0 -0
  26. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/geo_srid_point.py +0 -0
  27. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/gis_func.py +0 -0
  28. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/match_against_func.py +0 -0
  29. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/ob_table.py +0 -0
  30. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/replace_stmt.py +0 -0
  31. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/vec_dist_func.py +0 -0
  32. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/vector.py +0 -0
  33. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/schema/vector_index.py +0 -0
  34. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/util/__init__.py +0 -0
  35. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/util/ob_version.py +0 -0
  36. {pyobvector-0.2.11 → pyobvector-0.2.12}/pyobvector/util/vector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pyobvector
3
- Version: 0.2.11
3
+ Version: 0.2.12
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  Author: shanhaikang.shk
6
6
  Author-email: shanhaikang.shk@oceanbase.com
@@ -36,7 +36,7 @@ poetry install
36
36
  - install with pip:
37
37
 
38
38
  ```shell
39
- pip install pyobvector==0.2.11
39
+ pip install pyobvector==0.2.12
40
40
  ```
41
41
 
42
42
  ## Build Doc
@@ -15,7 +15,7 @@ poetry install
15
15
  - install with pip:
16
16
 
17
17
  ```shell
18
- pip install pyobvector==0.2.11
18
+ pip install pyobvector==0.2.12
19
19
  ```
20
20
 
21
21
  ## Build Doc
@@ -41,6 +41,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
41
41
  """
42
42
  from .client import *
43
43
  from .schema import (
44
+ ARRAY,
44
45
  VECTOR,
45
46
  POINT,
46
47
  VectorIndex,
@@ -67,6 +68,7 @@ __all__ = [
67
68
  "IndexParam",
68
69
  "IndexParams",
69
70
  "DataType",
71
+ "ARRAY",
70
72
  "VECTOR",
71
73
  "POINT",
72
74
  "VectorIndex",
@@ -55,7 +55,7 @@ class ObVecJsonTableClient(ObVecClient):
55
55
  jdata_id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
56
56
  jdata = Column(JSON)
57
57
 
58
- class JsonTableMetadata:
58
+ class JsonTableMetadata:
59
59
  def __init__(self, user_id: str):
60
60
  self.user_id = user_id
61
61
  self.meta_cache: Dict[str, List] = {}
@@ -200,7 +200,7 @@ class ObVecJsonTableClient(ObVecClient):
200
200
  return "INT"
201
201
  if datatype == exp.DataType.Type.TINYINT:
202
202
  return "TINYINT"
203
- if datatype == exp.DataType.Type.TIMESTAMP:
203
+ if datatype in (exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPTZ):
204
204
  return "TIMESTAMP"
205
205
  if datatype == exp.DataType.Type.VARCHAR:
206
206
  return "VARCHAR"
@@ -11,7 +11,7 @@ from sqlalchemy import (
11
11
  )
12
12
  from sqlalchemy.dialects.mysql import LONGTEXT
13
13
  from .enum import IntEnum
14
- from ..schema import VECTOR
14
+ from ..schema import ARRAY, VECTOR
15
15
 
16
16
 
17
17
  class DataType(IntEnum):
@@ -60,8 +60,8 @@ def convert_datatype_to_sqltype(datatype: DataType):
60
60
  return LONGTEXT
61
61
  if datatype == DataType.VARCHAR:
62
62
  return String
63
- # if datatype == DataType.ARRAY:
64
- # return ARRAY
63
+ if datatype == DataType.ARRAY:
64
+ return ARRAY
65
65
  if datatype == DataType.JSON:
66
66
  return JSON
67
67
  if datatype == DataType.FLOAT_VECTOR:
@@ -1,5 +1,6 @@
1
1
  """A extension for SQLAlchemy for vector storage related schema definition.
2
2
 
3
+ * ARRAY An extended data type in SQLAlchemy for ObVecClient
3
4
  * VECTOR An extended data type in SQLAlchemy for ObVecClient
4
5
  * VectorIndex An extended index type in SQLAlchemy for ObVecClient
5
6
  * CreateVectorIndex Vector Index Creation statement clause
@@ -18,6 +19,7 @@
18
19
  * CreateFtsIndex Full Text Search Index Creation statement clause
19
20
  * MatchAgainst Full Text Search clause
20
21
  """
22
+ from .array import ARRAY
21
23
  from .vector import VECTOR
22
24
  from .geo_srid_point import POINT
23
25
  from .vector_index import VectorIndex, CreateVectorIndex
@@ -30,6 +32,7 @@ from .full_text_index import FtsIndex, CreateFtsIndex
30
32
  from .match_against_func import MatchAgainst
31
33
 
32
34
  __all__ = [
35
+ "ARRAY",
33
36
  "VECTOR",
34
37
  "POINT",
35
38
  "VectorIndex",
@@ -0,0 +1,142 @@
1
+ """ARRAY: An extended data type for SQLAlchemy"""
2
+ import json
3
+ from typing import Any, List, Optional, Sequence, Union, Type
4
+
5
+ from sqlalchemy.sql.type_api import TypeEngine
6
+ from sqlalchemy.types import UserDefinedType, String
7
+
8
+
9
+ class ARRAY(UserDefinedType):
10
+ """ARRAY data type definition with support for up to 6 levels of nesting."""
11
+ cache_ok = True
12
+ _string = String()
13
+ _max_nesting_level = 6
14
+
15
+ def __init__(self, item_type: Union[TypeEngine, type]):
16
+ """Construct an ARRAY.
17
+
18
+ Args:
19
+ item_type: The data type of items in this array. For nested arrays,
20
+ pass another ARRAY type.
21
+
22
+ Raises:
23
+ ValueError: If nesting level exceeds the maximum allowed level (6).
24
+ """
25
+ super(UserDefinedType, self).__init__()
26
+ if isinstance(item_type, type):
27
+ item_type = item_type()
28
+ self.item_type = item_type
29
+ self._validate_nesting_level()
30
+
31
+ def _validate_nesting_level(self):
32
+ """Validate that the nesting level does not exceed the maximum allowed level."""
33
+ level = 1
34
+ current_type = self.item_type
35
+ while isinstance(current_type, ARRAY):
36
+ level += 1
37
+ if level > self._max_nesting_level:
38
+ raise ValueError(f"Maximum nesting level of {self._max_nesting_level} exceeded")
39
+ current_type = current_type.item_type
40
+
41
+ def get_col_spec(self, **kw): # pylint: disable=unused-argument
42
+ """Parse to array data type definition in text SQL."""
43
+ if hasattr(self.item_type, 'get_col_spec'):
44
+ base_type = self.item_type.get_col_spec(**kw)
45
+ else:
46
+ base_type = str(self.item_type)
47
+ return f"ARRAY({base_type})"
48
+
49
+ def bind_processor(self, dialect):
50
+ item_proc = self.item_type.dialect_impl(dialect).bind_processor(dialect)
51
+
52
+ def process(value: Optional[Sequence[Any]]) -> Optional[str]:
53
+ if value is None:
54
+ return None
55
+
56
+ def convert(val):
57
+ if isinstance(val, (list, tuple)):
58
+ return [convert(v) for v in val]
59
+ if item_proc:
60
+ return item_proc(val)
61
+ return val
62
+
63
+ processed = convert(value)
64
+ return json.dumps(processed)
65
+
66
+ return process
67
+
68
+ def result_processor(self, dialect, coltype):
69
+ item_proc = self.item_type.dialect_impl(dialect).result_processor(dialect, coltype)
70
+
71
+ def process(value: Optional[str]) -> Optional[List[Any]]:
72
+ if value is None:
73
+ return None
74
+
75
+ def convert(val):
76
+ if isinstance(val, (list, tuple)):
77
+ return [convert(v) for v in val]
78
+ if item_proc:
79
+ return item_proc(val)
80
+ return val
81
+
82
+ value = json.loads(value) if isinstance(value, str) else value
83
+ return convert(value)
84
+
85
+ return process
86
+
87
+ def literal_processor(self, dialect):
88
+ item_proc = self.item_type.dialect_impl(dialect).literal_processor(dialect)
89
+
90
+ def process(value: Sequence[Any]) -> str:
91
+ def convert(val):
92
+ if isinstance(val, (list, tuple)):
93
+ return [convert(v) for v in val]
94
+ if item_proc:
95
+ return item_proc(val)
96
+ return val
97
+
98
+ processed = convert(value)
99
+ return json.dumps(processed)
100
+
101
+ return process
102
+
103
+ def __repr__(self):
104
+ """Return a string representation of the array type."""
105
+ current_type = self.item_type
106
+ nesting_level = 1
107
+ base_type = current_type
108
+
109
+ # Find the innermost type and count nesting level
110
+ while isinstance(current_type, ARRAY):
111
+ nesting_level += 1
112
+ current_type = current_type.item_type
113
+ if not isinstance(current_type, ARRAY):
114
+ base_type = current_type
115
+
116
+ return f"{nesting_level}D_Array({base_type})"
117
+
118
+
119
+ def nested_array(dim: int) -> Type[ARRAY]:
120
+ """Create a nested array type class with specified dimensions.
121
+
122
+ Args:
123
+ dim: The number of dimensions for the array type (1-6)
124
+
125
+ Returns:
126
+ A class type that can be instantiated with an item_type to create a nested array
127
+
128
+ Raises:
129
+ ValueError: If dim is not between 1 and 6
130
+ """
131
+ if not 1 <= dim <= 6:
132
+ raise ValueError("Dimension must be between 1 and 6")
133
+
134
+ class ArrayType(ARRAY):
135
+ def __init__(self, item_type: Union[TypeEngine, type]):
136
+ nested_type = item_type
137
+ for _ in range(dim - 1):
138
+ nested_type = ARRAY(nested_type)
139
+ super().__init__(nested_type)
140
+
141
+ ArrayType.__name__ = f"{dim}D_Array"
142
+ return ArrayType
@@ -1,7 +1,9 @@
1
1
  """OceanBase table definition reflection."""
2
2
  import re
3
3
  import logging
4
- from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile
4
+ from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
5
+
6
+ from pyobvector.schema.array import nested_array
5
7
 
6
8
  logger = logging.getLogger(__name__)
7
9
 
@@ -31,6 +33,16 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
31
33
  )
32
34
  ### end of block
33
35
 
36
+ self._re_array_column = _re_compile(
37
+ r"\s*"
38
+ r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s\s+"
39
+ r"(?P<coltype_with_args>(?i:(?<!\w)array(?!\w))\s*\([^()]*(?:\([^()]*\)[^()]*)*\))"
40
+ r"(?:\s+(?P<notnull>(?:NOT\s+)?NULL))?"
41
+ r"(?:\s+DEFAULT\s+(?P<default>(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+)))?"
42
+ r"(?:\s+COMMENT\s+'(?P<comment>(?:''|[^'])*)')?"
43
+ r"\s*,?\s*$" % quotes
44
+ )
45
+
34
46
  self._re_key = _re_compile(
35
47
  r" "
36
48
  r"(?:(FULLTEXT|SPATIAL|VECTOR|(?P<type>\S+)) )?KEY"
@@ -64,6 +76,59 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
64
76
  )
65
77
  )
66
78
 
79
+ def _parse_column(self, line, state):
80
+ m = self._re_array_column.match(line)
81
+ if m:
82
+ spec = m.groupdict()
83
+ name, coltype_with_args = spec["name"].strip(), spec["coltype_with_args"].strip()
84
+
85
+ item_pattern = re.compile(
86
+ r"^(?:array\s*\()*([\w]+)(?:\(([\d,]+)\))?\)*$",
87
+ re.IGNORECASE
88
+ )
89
+ item_m = item_pattern.match(coltype_with_args)
90
+ if not item_m:
91
+ raise ValueError(f"Failed to find inner type from array column definition: {line}")
92
+
93
+ item_type = self.dialect.ischema_names[item_m.group(1).lower()]
94
+ item_type_arg = item_m.group(2)
95
+ if item_type_arg is None or item_type_arg == "":
96
+ item_type_args = []
97
+ elif item_type_arg[0] == "'" and item_type_arg[-1] == "'":
98
+ item_type_args = self._re_csv_str.findall(item_type_arg)
99
+ else:
100
+ item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
101
+
102
+ nested_level = coltype_with_args.lower().count('array')
103
+ type_instance = nested_array(nested_level)(item_type(*item_type_args))
104
+
105
+ col_kw = {}
106
+
107
+ # NOT NULL
108
+ col_kw["nullable"] = True
109
+ if spec.get("notnull", False) == "NOT NULL":
110
+ col_kw["nullable"] = False
111
+
112
+ # DEFAULT
113
+ default = spec.get("default", None)
114
+
115
+ if default == "NULL":
116
+ # eliminates the need to deal with this later.
117
+ default = None
118
+
119
+ comment = spec.get("comment", None)
120
+
121
+ if comment is not None:
122
+ comment = cleanup_text(comment)
123
+
124
+ col_d = dict(
125
+ name=name, type=type_instance, default=default, comment=comment
126
+ )
127
+ col_d.update(col_kw)
128
+ state.columns.append(col_d)
129
+ else:
130
+ super()._parse_column(line, state)
131
+
67
132
  def _parse_constraints(self, line):
68
133
  """Parse a CONSTRAINT line."""
69
134
  ret = super()._parse_constraints(line)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pyobvector"
3
- version = "0.2.11"
3
+ version = "0.2.12"
4
4
  description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
5
5
  authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
6
6
  readme = "README.md"
File without changes