pyobvector 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pyobvector/__init__.py +6 -5
  2. pyobvector/client/__init__.py +5 -4
  3. pyobvector/client/collection_schema.py +5 -1
  4. pyobvector/client/enum.py +1 -1
  5. pyobvector/client/exceptions.py +9 -7
  6. pyobvector/client/fts_index_param.py +8 -4
  7. pyobvector/client/hybrid_search.py +10 -4
  8. pyobvector/client/index_param.py +56 -41
  9. pyobvector/client/milvus_like_client.py +71 -54
  10. pyobvector/client/ob_client.py +20 -16
  11. pyobvector/client/ob_vec_client.py +47 -40
  12. pyobvector/client/ob_vec_json_table_client.py +366 -274
  13. pyobvector/client/partitions.py +81 -39
  14. pyobvector/client/schema_type.py +3 -1
  15. pyobvector/json_table/__init__.py +4 -3
  16. pyobvector/json_table/json_value_returning_func.py +12 -10
  17. pyobvector/json_table/oceanbase_dialect.py +15 -8
  18. pyobvector/json_table/virtual_data_type.py +47 -28
  19. pyobvector/schema/__init__.py +7 -1
  20. pyobvector/schema/array.py +6 -2
  21. pyobvector/schema/dialect.py +4 -0
  22. pyobvector/schema/full_text_index.py +8 -3
  23. pyobvector/schema/geo_srid_point.py +5 -2
  24. pyobvector/schema/gis_func.py +23 -11
  25. pyobvector/schema/match_against_func.py +10 -5
  26. pyobvector/schema/ob_table.py +2 -0
  27. pyobvector/schema/reflection.py +25 -8
  28. pyobvector/schema/replace_stmt.py +4 -0
  29. pyobvector/schema/sparse_vector.py +7 -4
  30. pyobvector/schema/vec_dist_func.py +22 -9
  31. pyobvector/schema/vector.py +3 -1
  32. pyobvector/schema/vector_index.py +7 -3
  33. pyobvector/util/__init__.py +1 -0
  34. pyobvector/util/ob_version.py +2 -0
  35. pyobvector/util/sparse_vector.py +9 -6
  36. pyobvector/util/vector.py +2 -0
  37. {pyobvector-0.2.22.dist-info → pyobvector-0.2.24.dist-info}/METADATA +13 -14
  38. pyobvector-0.2.24.dist-info/RECORD +40 -0
  39. {pyobvector-0.2.22.dist-info → pyobvector-0.2.24.dist-info}/licenses/LICENSE +1 -1
  40. pyobvector-0.2.22.dist-info/RECORD +0 -40
  41. {pyobvector-0.2.22.dist-info → pyobvector-0.2.24.dist-info}/WHEEL +0 -0
pyobvector/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  """A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
2
2
 
3
- `pyobvector` supports two modes:
4
- 1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
3
+ `pyobvector` supports two modes:
4
+ 1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
5
5
  in a way similar to the Milvus API.
6
- 2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
7
- `ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
6
+ 2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
7
+ `ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
8
8
  In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
9
9
 
10
10
  * ObVecClient MySQL client in SQLAlchemy hybrid mode
@@ -19,7 +19,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
19
19
  * FtsIndex Full Text Search Index
20
20
  * FieldSchema Clas to define field schema in collection for MilvusLikeClient
21
21
  * CollectionSchema Class to define collection schema for MilvusLikeClient
22
- * PartType Specify partition type of table or collection
22
+ * PartType Specify partition type of table or collection
23
23
  for both ObVecClient and MilvusLikeClient
24
24
  * ObPartition Abstract type class of all kind of Partition strategy
25
25
  * RangeListPartInfo Specify Range/RangeColumns/List/ListColumns partition info
@@ -40,6 +40,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
40
40
  * FtsIndexParam Full Text Search index parameter
41
41
  * MatchAgainst Full Text Search clause
42
42
  """
43
+
43
44
  from .client import *
44
45
  from .schema import (
45
46
  ARRAY,
@@ -1,9 +1,9 @@
1
1
  """Multi-type Vector Store Client:
2
2
 
3
- 1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
3
+ 1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
4
4
  in a way similar to the Milvus API.
5
- 2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
6
- `ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
5
+ 2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
6
+ `ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
7
7
  In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
8
8
 
9
9
  * ObVecClient MySQL client in SQLAlchemy hybrid mode
@@ -14,7 +14,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
14
14
  * DataType Specify field type in collection schema for MilvusLikeClient
15
15
  * FieldSchema Clas to define field schema in collection for MilvusLikeClient
16
16
  * CollectionSchema Class to define collection schema for MilvusLikeClient
17
- * PartType Specify partition type of table or collection
17
+ * PartType Specify partition type of table or collection
18
18
  for both ObVecClient and MilvusLikeClient
19
19
  * ObPartition Abstract type class of all kind of Partition strategy
20
20
  * RangeListPartInfo Specify Range/RangeColumns/List/ListColumns partition info
@@ -30,6 +30,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
30
30
  * FtsParser Text Parser Type for Full Text Search
31
31
  * FtsIndexParam Full Text Search index parameter
32
32
  """
33
+
33
34
  from .ob_vec_client import ObVecClient
34
35
  from .milvus_like_client import MilvusLikeClient
35
36
  from .ob_vec_json_table_client import ObVecJsonTableClient
@@ -1,4 +1,5 @@
1
1
  """FieldSchema & CollectionSchema definition module to be compatible with Milvus."""
2
+
2
3
  import copy
3
4
  from typing import Optional
4
5
  from sqlalchemy import Column
@@ -6,6 +7,7 @@ from .schema_type import DataType, convert_datatype_to_sqltype
6
7
  from .exceptions import *
7
8
  from .partitions import *
8
9
 
10
+
9
11
  class FieldSchema:
10
12
  """FieldSchema definition.
11
13
 
@@ -18,6 +20,7 @@ class FieldSchema:
18
20
  nullable (bool) : whether the field can be null
19
21
  type_params (dict) : different parameters for different data type
20
22
  """
23
+
21
24
  def __init__(
22
25
  self,
23
26
  name: str,
@@ -117,12 +120,13 @@ class FieldSchema:
117
120
 
118
121
  class CollectionSchema:
119
122
  """CollectionSchema definition.
120
-
123
+
121
124
  Attributes:
122
125
  fields (List[FieldSchema]) : a list of FieldSchema
123
126
  description (string) : collection description (not used in OceanBase)
124
127
  partitions (ObPartition) : partition strategy of this collection
125
128
  """
129
+
126
130
  def __init__(
127
131
  self,
128
132
  fields: Optional[list[FieldSchema]] = None,
pyobvector/client/enum.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Common module for int type enumerate."""
2
+
2
3
  from enum import Enum
3
4
 
4
5
 
5
6
  class IntEnum(int, Enum):
6
7
  """Int type enumerate definition."""
7
-
@@ -1,9 +1,11 @@
1
1
  """Exception for MilvusLikeClient."""
2
+
2
3
  from .enum import IntEnum
3
4
 
4
5
 
5
6
  class ErrorCode(IntEnum):
6
7
  """Error codes for MilvusLikeClient."""
8
+
7
9
  SUCCESS = 0
8
10
  UNEXPECTED_ERROR = 1
9
11
  INVALID_ARGUMENT = 2
@@ -14,6 +16,7 @@ class ErrorCode(IntEnum):
14
16
 
15
17
  class ObException(Exception):
16
18
  """Base class for MilvusLikeClient exception."""
19
+
17
20
  def __init__(
18
21
  self,
19
22
  code: int = ErrorCode.UNEXPECTED_ERROR,
@@ -75,6 +78,7 @@ class ClusterVersionException(ObException):
75
78
 
76
79
  class ExceptionsMessage:
77
80
  """Exception Messages definition."""
81
+
78
82
  PartitionExprNotExists = "Partition expression string does not exist."
79
83
  PartitionMultiField = "Multi-Partition Field is not supported."
80
84
  PartitionLevelMoreThanTwo = "Partition Level should less than or equal to 2."
@@ -93,12 +97,8 @@ class ExceptionsMessage:
93
97
  PartitionListColNameListMissing = (
94
98
  "Column name list is necessary when partition type is ListColumns"
95
99
  )
96
- PartitionHashNameListAndPartCntMissing = (
97
- "One of hash_part_name_list and part_count must be set when partition type is Hash"
98
- )
99
- PartitionKeyNameListAndPartCntMissing = (
100
- "One of key_part_name_list and part_count must be set when partition type is Key"
101
- )
100
+ PartitionHashNameListAndPartCntMissing = "One of hash_part_name_list and part_count must be set when partition type is Hash"
101
+ PartitionKeyNameListAndPartCntMissing = "One of key_part_name_list and part_count must be set when partition type is Key"
102
102
  PrimaryFieldType = "Param primary_field must be int or str type."
103
103
  VectorFieldMissingDimParam = "Param 'dim' must be set for vector field."
104
104
  VarcharFieldMissingLengthParam = "Param 'max_length' must be set for varchar field."
@@ -108,7 +108,9 @@ class ExceptionsMessage:
108
108
  )
109
109
  CollectionNotExists = "Collection does not exist."
110
110
  MetricTypeParamTypeInvalid = "MetricType param type should be string."
111
- MetricTypeValueInvalid = "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
111
+ MetricTypeValueInvalid = (
112
+ "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
113
+ )
112
114
  UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
113
115
  ClusterVersionIsLow = (
114
116
  "OceanBase %s feature is not supported because cluster version is below %s."
@@ -1,9 +1,12 @@
1
1
  """A module to specify fts index parameters"""
2
+
2
3
  from enum import Enum
3
4
  from typing import Optional, Union
4
5
 
6
+
5
7
  class FtsParser(Enum):
6
8
  """Built-in full-text search parser types supported by OceanBase"""
9
+
7
10
  IK = 0
8
11
  NGRAM = 1
9
12
  NGRAM2 = 2 # NGRAM2 parser (supported from V4.3.5 BP2+)
@@ -13,13 +16,14 @@ class FtsParser(Enum):
13
16
 
14
17
  class FtsIndexParam:
15
18
  """Full-text search index parameter.
16
-
19
+
17
20
  Args:
18
21
  index_name: Index name
19
22
  field_names: List of field names to create full-text index on
20
23
  parser_type: Parser type, can be FtsParser enum or string (for custom parsers)
21
24
  If None, uses default Space parser
22
25
  """
26
+
23
27
  def __init__(
24
28
  self,
25
29
  index_name: str,
@@ -34,11 +38,11 @@ class FtsIndexParam:
34
38
  """Convert parser type to string format for SQL."""
35
39
  if self.parser_type is None:
36
40
  return None # Default Space parser, no need to specify
37
-
41
+
38
42
  if isinstance(self.parser_type, str):
39
43
  # Custom parser name (e.g., "thai_ftparser")
40
44
  return self.parser_type.lower()
41
-
45
+
42
46
  if isinstance(self.parser_type, FtsParser):
43
47
  if self.parser_type == FtsParser.IK:
44
48
  return "ik"
@@ -52,7 +56,7 @@ class FtsIndexParam:
52
56
  return "jieba"
53
57
  # Raise exception for unrecognized FtsParser enum values
54
58
  raise ValueError(f"Unrecognized FtsParser enum value: {self.parser_type}")
55
-
59
+
56
60
  return None
57
61
 
58
62
  def __iter__(self):
@@ -1,4 +1,5 @@
1
1
  """OceanBase Hybrid Search Client."""
2
+
2
3
  import json
3
4
  import logging
4
5
  from typing import Any
@@ -27,7 +28,7 @@ class HybridSearch(Client):
27
28
  super().__init__(uri, user, password, db_name, **kwargs)
28
29
 
29
30
  min_required_version = ObVersion.from_db_version_nums(4, 4, 1, 0)
30
-
31
+
31
32
  if self.ob_version < min_required_version:
32
33
  # For versions < 4.4.1.0, check if it's SeekDB
33
34
  if self._is_seekdb():
@@ -35,7 +36,8 @@ class HybridSearch(Client):
35
36
  return
36
37
  raise ClusterVersionException(
37
38
  code=ErrorCode.NOT_SUPPORTED,
38
- message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
39
+ message=ExceptionsMessage.ClusterVersionIsLow
40
+ % ("Hybrid Search", "4.4.1.0"),
39
41
  )
40
42
 
41
43
  def search(
@@ -60,7 +62,9 @@ class HybridSearch(Client):
60
62
 
61
63
  with self.engine.connect() as conn:
62
64
  with conn.begin():
63
- res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
65
+ res = conn.execute(
66
+ sql, {"index": index, "body_str": body_str}
67
+ ).fetchone()
64
68
  if res[0] is None:
65
69
  return []
66
70
  return json.loads(res[0])
@@ -85,7 +89,9 @@ class HybridSearch(Client):
85
89
 
86
90
  with self.engine.connect() as conn:
87
91
  with conn.begin():
88
- res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
92
+ res = conn.execute(
93
+ sql, {"index": index, "body_str": body_str}
94
+ ).fetchone()
89
95
  if res[0] is None:
90
96
  return ""
91
97
  return res[0]
@@ -1,9 +1,12 @@
1
1
  """A module to specify vector index parameters for MilvusLikeClient"""
2
+
2
3
  from enum import Enum
3
4
  from typing import Union
4
5
 
6
+
5
7
  class VecIndexType(Enum):
6
8
  """Vector index algorithm type"""
9
+
7
10
  HNSW = 0
8
11
  HNSW_SQ = 1
9
12
  IVFFLAT = 2
@@ -11,21 +14,23 @@ class VecIndexType(Enum):
11
14
  IVFPQ = 4
12
15
  DAAT = 5
13
16
 
17
+
14
18
  class IndexParam:
15
19
  """Vector index parameters.
16
-
20
+
17
21
  Attributes:
18
22
  index_name (string) : vector index name
19
23
  field_name (string) : vector index built on which field
20
24
  index_type (VecIndexType) :
21
25
  vector index algorithms (Only HNSW supported)
22
- kwargs :
26
+ kwargs :
23
27
  vector index parameters for different algorithms
24
28
  """
29
+
25
30
  HNSW_DEFAULT_M = 16
26
31
  HNSW_DEFAULT_EF_CONSTRUCTION = 200
27
32
  HNSW_DEFAULT_EF_SEARCH = 40
28
- OCEANBASE_DEFAULT_ALGO_LIB = 'vsag'
33
+ OCEANBASE_DEFAULT_ALGO_LIB = "vsag"
29
34
  HNSW_ALGO_NAME = "hnsw"
30
35
  HNSW_SQ_ALGO_NAME = "hnsw_sq"
31
36
  IVFFLAT_ALGO_NAME = "ivf_flat"
@@ -34,7 +39,11 @@ class IndexParam:
34
39
  DAAT_ALGO_NAME = "daat"
35
40
 
36
41
  def __init__(
37
- self, index_name: str, field_name: str, index_type: Union[VecIndexType, str], **kwargs
42
+ self,
43
+ index_name: str,
44
+ field_name: str,
45
+ index_type: Union[VecIndexType, str],
46
+ **kwargs,
38
47
  ):
39
48
  self.index_name = index_name
40
49
  self.field_name = field_name
@@ -44,21 +53,22 @@ class IndexParam:
44
53
 
45
54
  def is_index_type_hnsw_serial(self):
46
55
  return self.index_type in [
47
- IndexParam.HNSW_ALGO_NAME, IndexParam.HNSW_SQ_ALGO_NAME
56
+ IndexParam.HNSW_ALGO_NAME,
57
+ IndexParam.HNSW_SQ_ALGO_NAME,
48
58
  ]
49
-
59
+
50
60
  def is_index_type_ivf_serial(self):
51
61
  return self.index_type in [
52
62
  IndexParam.IVFFLAT_ALGO_NAME,
53
63
  IndexParam.IVFSQ_ALGO_NAME,
54
64
  IndexParam.IVFPQ_ALGO_NAME,
55
65
  ]
56
-
66
+
57
67
  def is_index_type_product_quantization(self):
58
68
  return self.index_type in [
59
69
  IndexParam.IVFPQ_ALGO_NAME,
60
70
  ]
61
-
71
+
62
72
  def is_index_type_sparse_vector(self):
63
73
  return self.index_type in [
64
74
  IndexParam.DAAT_ALGO_NAME,
@@ -97,48 +107,52 @@ class IndexParam:
97
107
  ob_params = {}
98
108
  # handle lib
99
109
  if self.is_index_type_hnsw_serial():
100
- ob_params['lib'] = 'vsag'
110
+ ob_params["lib"] = "vsag"
101
111
  else:
102
- ob_params['lib'] = 'OB'
112
+ ob_params["lib"] = "OB"
103
113
  # handle metric_type
104
- ob_params['distance'] = "l2"
105
- if 'metric_type' in self.kwargs:
106
- ob_params['distance'] = self.kwargs['metric_type']
114
+ ob_params["distance"] = "l2"
115
+ if "metric_type" in self.kwargs:
116
+ ob_params["distance"] = self.kwargs["metric_type"]
107
117
  # handle param
108
118
  if self.is_index_type_ivf_serial():
109
- if (self.is_index_type_product_quantization() and
110
- 'params' not in self.kwargs):
111
- raise ValueError('params must be configured for IVF index type')
112
-
113
- if 'params' not in self.kwargs:
119
+ if (
120
+ self.is_index_type_product_quantization()
121
+ and "params" not in self.kwargs
122
+ ):
123
+ raise ValueError("params must be configured for IVF index type")
124
+
125
+ if "params" not in self.kwargs:
114
126
  params = {}
115
127
  else:
116
- params = self.kwargs['params']
117
-
128
+ params = self.kwargs["params"]
129
+
118
130
  if self.is_index_type_product_quantization():
119
- if 'm' not in params:
120
- raise ValueError('m must be configured for IVFSQ or IVFPQ')
121
- ob_params['m'] = params['m']
122
- if 'nlist' in params:
123
- ob_params['nlist'] = params['nlist']
124
- if 'samples_per_nlist' in params:
125
- ob_params['samples_per_nlist'] = params['samples_per_nlist']
131
+ if "m" not in params:
132
+ raise ValueError("m must be configured for IVFSQ or IVFPQ")
133
+ ob_params["m"] = params["m"]
134
+ if "nlist" in params:
135
+ ob_params["nlist"] = params["nlist"]
136
+ if "samples_per_nlist" in params:
137
+ ob_params["samples_per_nlist"] = params["samples_per_nlist"]
126
138
 
127
139
  if self.is_index_type_hnsw_serial():
128
- if 'params' in self.kwargs:
129
- params = self.kwargs['params']
130
- if 'M' in params:
131
- ob_params['m'] = params['M']
132
- if 'efConstruction' in params:
133
- ob_params['ef_construction'] = params['efConstruction']
134
- if 'efSearch' in params:
135
- ob_params['ef_search'] = params['efSearch']
136
-
140
+ if "params" in self.kwargs:
141
+ params = self.kwargs["params"]
142
+ if "M" in params:
143
+ ob_params["m"] = params["M"]
144
+ if "efConstruction" in params:
145
+ ob_params["ef_construction"] = params["efConstruction"]
146
+ if "efSearch" in params:
147
+ ob_params["ef_search"] = params["efSearch"]
148
+
137
149
  if self.is_index_type_sparse_vector():
138
- if ob_params['distance'] != 'inner_product':
139
- raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
140
- if 'sparse_index_type' in self.kwargs:
141
- ob_params['type'] = self.kwargs['sparse_index_type']
150
+ if ob_params["distance"] != "inner_product":
151
+ raise ValueError(
152
+ "Metric type should be 'inner_product' for sparse vector index."
153
+ )
154
+ if "sparse_index_type" in self.kwargs:
155
+ ob_params["type"] = self.kwargs["sparse_index_type"]
142
156
  return ob_params
143
157
 
144
158
  def param_str(self):
@@ -172,6 +186,7 @@ class IndexParam:
172
186
 
173
187
  class IndexParams:
174
188
  """Vector index parameters for MilvusLikeClient"""
189
+
175
190
  def __init__(self):
176
191
  self._indexes = {}
177
192
 
@@ -179,7 +194,7 @@ class IndexParams:
179
194
  self, field_name: str, index_type: VecIndexType, index_name: str, **kwargs
180
195
  ):
181
196
  """Add `IndexParam` to `IndexParams`
182
-
197
+
183
198
  Args:
184
199
  field_name (string): vector index built on which field
185
200
  index_type (VecIndexType): vector index algorithms (Only HNSW supported)