pyobvector 0.2.12__tar.gz → 0.2.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {pyobvector-0.2.12 → pyobvector-0.2.14}/PKG-INFO +2 -2
  2. {pyobvector-0.2.12 → pyobvector-0.2.14}/README.md +1 -1
  3. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/ob_vec_client.py +59 -0
  4. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/ob_vec_json_table_client.py +3 -0
  5. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/array.py +41 -61
  6. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/reflection.py +4 -2
  7. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyproject.toml +1 -1
  8. {pyobvector-0.2.12 → pyobvector-0.2.14}/LICENSE +0 -0
  9. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/__init__.py +0 -0
  10. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/__init__.py +0 -0
  11. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/collection_schema.py +0 -0
  12. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/enum.py +0 -0
  13. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/exceptions.py +0 -0
  14. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/fts_index_param.py +0 -0
  15. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/index_param.py +0 -0
  16. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/milvus_like_client.py +0 -0
  17. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/partitions.py +0 -0
  18. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/client/schema_type.py +0 -0
  19. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/json_table/__init__.py +0 -0
  20. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/json_table/json_value_returning_func.py +0 -0
  21. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/json_table/oceanbase_dialect.py +0 -0
  22. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/json_table/virtual_data_type.py +0 -0
  23. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/__init__.py +0 -0
  24. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/dialect.py +0 -0
  25. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/full_text_index.py +0 -0
  26. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/geo_srid_point.py +0 -0
  27. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/gis_func.py +0 -0
  28. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/match_against_func.py +0 -0
  29. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/ob_table.py +0 -0
  30. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/replace_stmt.py +0 -0
  31. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/vec_dist_func.py +0 -0
  32. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/vector.py +0 -0
  33. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/schema/vector_index.py +0 -0
  34. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/util/__init__.py +0 -0
  35. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/util/ob_version.py +0 -0
  36. {pyobvector-0.2.12 → pyobvector-0.2.14}/pyobvector/util/vector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pyobvector
3
- Version: 0.2.12
3
+ Version: 0.2.14
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  Author: shanhaikang.shk
6
6
  Author-email: shanhaikang.shk@oceanbase.com
@@ -36,7 +36,7 @@ poetry install
36
36
  - install with pip:
37
37
 
38
38
  ```shell
39
- pip install pyobvector==0.2.12
39
+ pip install pyobvector==0.2.14
40
40
  ```
41
41
 
42
42
  ## Build Doc
@@ -15,7 +15,7 @@ poetry install
15
15
  - install with pip:
16
16
 
17
17
  ```shell
18
- pip install pyobvector==0.2.12
18
+ pip install pyobvector==0.2.14
19
19
  ```
20
20
 
21
21
  ## Build Doc
@@ -89,6 +89,21 @@ class ObVecClient:
89
89
  message=ExceptionsMessage.ClusterVersionIsLow,
90
90
  )
91
91
 
92
+ def refresh_metadata(self, tables: Optional[list[str]] = None):
93
+ """Reload metadata from the database.
94
+
95
+ Args:
96
+ tables (Optional[list[str]]): names of the tables to refresh. If None, refresh all tables.
97
+ """
98
+ if tables is not None:
99
+ for table_name in tables:
100
+ if table_name in self.metadata_obj.tables:
101
+ self.metadata_obj.remove(Table(table_name, self.metadata_obj))
102
+ self.metadata_obj.reflect(bind=self.engine, only=tables, extend_existing=True)
103
+ else:
104
+ self.metadata_obj.clear()
105
+ self.metadata_obj.reflect(bind=self.engine, extend_existing=True)
106
+
92
107
  def _insert_partition_hint_for_query_sql(self, sql: str, partition_hint: str):
93
108
  from_index = sql.find("FROM")
94
109
  assert from_index != -1
@@ -801,3 +816,47 @@ class ObVecClient:
801
816
  with self.engine.connect() as conn:
802
817
  with conn.begin():
803
818
  return conn.execute(text(text_sql))
819
+
820
+ def add_columns(
821
+ self,
822
+ table_name: str,
823
+ columns: list[Column],
824
+ ):
825
+ """Add multiple columns to an existing table.
826
+
827
+ Args:
828
+ table_name (string): table name
829
+ columns (list[Column]): list of SQLAlchemy Column objects representing the new columns
830
+ """
831
+ compiler = self.engine.dialect.ddl_compiler(self.engine.dialect, None)
832
+ column_specs = [compiler.get_column_specification(column) for column in columns]
833
+ columns_ddl = ", ".join(f"ADD COLUMN {spec}" for spec in column_specs)
834
+
835
+ with self.engine.connect() as conn:
836
+ with conn.begin():
837
+ conn.execute(
838
+ text(f"ALTER TABLE `{table_name}` {columns_ddl}")
839
+ )
840
+
841
+ self.refresh_metadata([table_name])
842
+
843
+ def drop_columns(
844
+ self,
845
+ table_name: str,
846
+ column_names: list[str],
847
+ ):
848
+ """Drop multiple columns from an existing table.
849
+
850
+ Args:
851
+ table_name (string): table name
852
+ column_names (list[str]): names of the columns to drop
853
+ """
854
+ columns_ddl = ", ".join(f"DROP COLUMN `{name}`" for name in column_names)
855
+
856
+ with self.engine.connect() as conn:
857
+ with conn.begin():
858
+ conn.execute(
859
+ text(f"ALTER TABLE `{table_name}` {columns_ddl}")
860
+ )
861
+
862
+ self.refresh_metadata([table_name])
@@ -236,6 +236,7 @@ class ObVecJsonTableClient(ObVecClient):
236
236
  raise ValueError("Table name duplicated")
237
237
 
238
238
  session = self.session()
239
+ session.execute(text("SET @@session.autocommit=0"))
239
240
  new_meta_cache_items = []
240
241
  col_id = 16
241
242
  for col_def in ast.find_all(exp.ColumnDef):
@@ -607,6 +608,7 @@ class ObVecJsonTableClient(ObVecClient):
607
608
  raise ValueError(f"Table {jtable_name} does not exists")
608
609
 
609
610
  session = self.session()
611
+ session.execute(text("SET @@session.autocommit=0"))
610
612
  for action in ast.actions:
611
613
  if isinstance(action, ChangeColumn):
612
614
  self._handle_alter_jtable_change_column(
@@ -681,6 +683,7 @@ class ObVecJsonTableClient(ObVecClient):
681
683
  raise ValueError(f"Invalid ast type {ast.this}")
682
684
 
683
685
  session = self.session()
686
+ session.execute(text("SET @@session.autocommit=0"))
684
687
  n_new_records = 0
685
688
  for tuple in ast.expression.expressions:
686
689
  expr_list = tuple.expressions
@@ -1,6 +1,6 @@
1
1
  """ARRAY: An extended data type for SQLAlchemy"""
2
2
  import json
3
- from typing import Any, List, Optional, Sequence, Union, Type
3
+ from typing import Any, List, Optional, Sequence, Union
4
4
 
5
5
  from sqlalchemy.sql.type_api import TypeEngine
6
6
  from sqlalchemy.types import UserDefinedType, String
@@ -10,7 +10,6 @@ class ARRAY(UserDefinedType):
10
10
  """ARRAY data type definition with support for up to 6 levels of nesting."""
11
11
  cache_ok = True
12
12
  _string = String()
13
- _max_nesting_level = 6
14
13
 
15
14
  def __init__(self, item_type: Union[TypeEngine, type]):
16
15
  """Construct an ARRAY.
@@ -18,25 +17,17 @@ class ARRAY(UserDefinedType):
18
17
  Args:
19
18
  item_type: The data type of items in this array. For nested arrays,
20
19
  pass another ARRAY type.
21
-
22
- Raises:
23
- ValueError: If nesting level exceeds the maximum allowed level (6).
24
20
  """
25
21
  super(UserDefinedType, self).__init__()
26
22
  if isinstance(item_type, type):
27
23
  item_type = item_type()
28
24
  self.item_type = item_type
29
- self._validate_nesting_level()
30
-
31
- def _validate_nesting_level(self):
32
- """Validate that the nesting level does not exceed the maximum allowed level."""
33
- level = 1
34
- current_type = self.item_type
35
- while isinstance(current_type, ARRAY):
36
- level += 1
37
- if level > self._max_nesting_level:
38
- raise ValueError(f"Maximum nesting level of {self._max_nesting_level} exceeded")
39
- current_type = current_type.item_type
25
+ if isinstance(item_type, ARRAY):
26
+ self.dim = item_type.dim + 1
27
+ else:
28
+ self.dim = 1
29
+ if self.dim > 6:
30
+ raise ValueError("Maximum nesting level of 6 exceeded")
40
31
 
41
32
  def get_col_spec(self, **kw): # pylint: disable=unused-argument
42
33
  """Parse to array data type definition in text SQL."""
@@ -46,12 +37,33 @@ class ARRAY(UserDefinedType):
46
37
  base_type = str(self.item_type)
47
38
  return f"ARRAY({base_type})"
48
39
 
40
+ def _get_list_depth(self, value: Any) -> int:
41
+ if not isinstance(value, list):
42
+ return 0
43
+ max_depth = 0
44
+ for element in value:
45
+ current_depth = self._get_list_depth(element)
46
+ if current_depth > max_depth:
47
+ max_depth = current_depth
48
+ return 1 + max_depth
49
+
50
+ def _validate_dimension(self, value: list[Any]):
51
+ arr_depth = self._get_list_depth(value)
52
+ assert arr_depth == self.dim, "Array dimension mismatch, expected {}, got {}".format(self.dim, arr_depth)
53
+
49
54
  def bind_processor(self, dialect):
50
- item_proc = self.item_type.dialect_impl(dialect).bind_processor(dialect)
55
+ item_type = self.item_type
56
+ while isinstance(item_type, ARRAY):
57
+ item_type = item_type.item_type
51
58
 
52
- def process(value: Optional[Sequence[Any]]) -> Optional[str]:
59
+ item_proc = item_type.dialect_impl(dialect).bind_processor(dialect)
60
+
61
+ def process(value: Optional[Sequence[Any] | str]) -> Optional[str]:
53
62
  if value is None:
54
63
  return None
64
+ if isinstance(value, str):
65
+ self._validate_dimension(json.loads(value))
66
+ return value
55
67
 
56
68
  def convert(val):
57
69
  if isinstance(val, (list, tuple)):
@@ -61,12 +73,17 @@ class ARRAY(UserDefinedType):
61
73
  return val
62
74
 
63
75
  processed = convert(value)
76
+ self._validate_dimension(processed)
64
77
  return json.dumps(processed)
65
78
 
66
79
  return process
67
80
 
68
81
  def result_processor(self, dialect, coltype):
69
- item_proc = self.item_type.dialect_impl(dialect).result_processor(dialect, coltype)
82
+ item_type = self.item_type
83
+ while isinstance(item_type, ARRAY):
84
+ item_type = item_type.item_type
85
+
86
+ item_proc = item_type.dialect_impl(dialect).result_processor(dialect, coltype)
70
87
 
71
88
  def process(value: Optional[str]) -> Optional[List[Any]]:
72
89
  if value is None:
@@ -85,7 +102,11 @@ class ARRAY(UserDefinedType):
85
102
  return process
86
103
 
87
104
  def literal_processor(self, dialect):
88
- item_proc = self.item_type.dialect_impl(dialect).literal_processor(dialect)
105
+ item_type = self.item_type
106
+ while isinstance(item_type, ARRAY):
107
+ item_type = item_type.item_type
108
+
109
+ item_proc = item_type.dialect_impl(dialect).literal_processor(dialect)
89
110
 
90
111
  def process(value: Sequence[Any]) -> str:
91
112
  def convert(val):
@@ -99,44 +120,3 @@ class ARRAY(UserDefinedType):
99
120
  return json.dumps(processed)
100
121
 
101
122
  return process
102
-
103
- def __repr__(self):
104
- """Return a string representation of the array type."""
105
- current_type = self.item_type
106
- nesting_level = 1
107
- base_type = current_type
108
-
109
- # Find the innermost type and count nesting level
110
- while isinstance(current_type, ARRAY):
111
- nesting_level += 1
112
- current_type = current_type.item_type
113
- if not isinstance(current_type, ARRAY):
114
- base_type = current_type
115
-
116
- return f"{nesting_level}D_Array({base_type})"
117
-
118
-
119
- def nested_array(dim: int) -> Type[ARRAY]:
120
- """Create a nested array type class with specified dimensions.
121
-
122
- Args:
123
- dim: The number of dimensions for the array type (1-6)
124
-
125
- Returns:
126
- A class type that can be instantiated with an item_type to create a nested array
127
-
128
- Raises:
129
- ValueError: If dim is not between 1 and 6
130
- """
131
- if not 1 <= dim <= 6:
132
- raise ValueError("Dimension must be between 1 and 6")
133
-
134
- class ArrayType(ARRAY):
135
- def __init__(self, item_type: Union[TypeEngine, type]):
136
- nested_type = item_type
137
- for _ in range(dim - 1):
138
- nested_type = ARRAY(nested_type)
139
- super().__init__(nested_type)
140
-
141
- ArrayType.__name__ = f"{dim}D_Array"
142
- return ArrayType
@@ -3,7 +3,7 @@ import re
3
3
  import logging
4
4
  from sqlalchemy.dialects.mysql.reflection import MySQLTableDefinitionParser, _re_compile, cleanup_text
5
5
 
6
- from pyobvector.schema.array import nested_array
6
+ from pyobvector.schema.array import ARRAY
7
7
 
8
8
  logger = logging.getLogger(__name__)
9
9
 
@@ -100,7 +100,9 @@ class OceanBaseTableDefinitionParser(MySQLTableDefinitionParser):
100
100
  item_type_args = [int(v) for v in self._re_csv_int.findall(item_type_arg)]
101
101
 
102
102
  nested_level = coltype_with_args.lower().count('array')
103
- type_instance = nested_array(nested_level)(item_type(*item_type_args))
103
+ type_instance = item_type(*item_type_args)
104
+ for _ in range(nested_level):
105
+ type_instance = ARRAY(type_instance)
104
106
 
105
107
  col_kw = {}
106
108
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pyobvector"
3
- version = "0.2.12"
3
+ version = "0.2.14"
4
4
  description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
5
5
  authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
6
6
  readme = "README.md"
File without changes