pyobvector 0.2.16__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {pyobvector-0.2.16 → pyobvector-0.2.18}/PKG-INFO +69 -7
  2. {pyobvector-0.2.16 → pyobvector-0.2.18}/README.md +63 -3
  3. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/__init__.py +3 -0
  4. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/collection_schema.py +6 -6
  5. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/exceptions.py +4 -4
  6. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/fts_index_param.py +2 -3
  7. pyobvector-0.2.18/pyobvector/client/hybrid_search.py +81 -0
  8. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/index_param.py +21 -8
  9. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/milvus_like_client.py +124 -88
  10. pyobvector-0.2.18/pyobvector/client/ob_client.py +459 -0
  11. pyobvector-0.2.18/pyobvector/client/ob_vec_client.py +522 -0
  12. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/schema_type.py +4 -2
  13. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/__init__.py +3 -0
  14. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/dialect.py +3 -0
  15. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/reflection.py +1 -1
  16. pyobvector-0.2.18/pyobvector/schema/sparse_vector.py +35 -0
  17. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vector_index.py +1 -1
  18. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/__init__.py +3 -1
  19. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/ob_version.py +1 -1
  20. pyobvector-0.2.18/pyobvector/util/sparse_vector.py +48 -0
  21. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/vector.py +10 -4
  22. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyproject.toml +3 -3
  23. pyobvector-0.2.16/pyobvector/client/ob_vec_client.py +0 -862
  24. {pyobvector-0.2.16 → pyobvector-0.2.18}/LICENSE +0 -0
  25. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/__init__.py +0 -0
  26. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/enum.py +0 -0
  27. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/ob_vec_json_table_client.py +0 -0
  28. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/partitions.py +0 -0
  29. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/__init__.py +0 -0
  30. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/json_value_returning_func.py +0 -0
  31. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/oceanbase_dialect.py +0 -0
  32. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/virtual_data_type.py +0 -0
  33. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/array.py +0 -0
  34. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/full_text_index.py +0 -0
  35. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/geo_srid_point.py +0 -0
  36. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/gis_func.py +0 -0
  37. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/match_against_func.py +0 -0
  38. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/ob_table.py +0 -0
  39. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/replace_stmt.py +0 -0
  40. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vec_dist_func.py +0 -0
  41. {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vector.py +0 -0
@@ -1,7 +1,8 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: pyobvector
3
- Version: 0.2.16
3
+ Version: 0.2.18
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
+ License-File: LICENSE
5
6
  Author: shanhaikang.shk
6
7
  Author-email: shanhaikang.shk@oceanbase.com
7
8
  Requires-Python: >=3.9,<4.0
@@ -11,12 +12,13 @@ Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
12
13
  Classifier: Programming Language :: Python :: 3.12
13
14
  Classifier: Programming Language :: Python :: 3.13
14
- Requires-Dist: aiomysql (>=0.2.0,<0.3.0)
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
15
17
  Requires-Dist: numpy (>=1.17.0,<2.0.0)
16
18
  Requires-Dist: pydantic (>=2.7.0,<3)
17
19
  Requires-Dist: pymysql (>=1.1.1,<2.0.0)
18
20
  Requires-Dist: sqlalchemy (>=1.4,<=3)
19
- Requires-Dist: sqlglot (>=26.0.1,<27.0.0)
21
+ Requires-Dist: sqlglot (>=26.0.1)
20
22
  Description-Content-Type: text/markdown
21
23
 
22
24
  # pyobvector
@@ -36,7 +38,7 @@ poetry install
36
38
  - install with pip:
37
39
 
38
40
  ```shell
39
- pip install pyobvector==0.2.16
41
+ pip install pyobvector==0.2.18
40
42
  ```
41
43
 
42
44
  ## Build Doc
@@ -48,6 +50,10 @@ mkdir build
48
50
  make html
49
51
  ```
50
52
 
53
+ ## Release Notes
54
+
55
+ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
56
+
51
57
  ## Usage
52
58
 
53
59
  `pyobvector` supports two modes:
@@ -174,19 +180,75 @@ client.insert(test_collection_name, data=data1)
174
180
  - do ann search:
175
181
 
176
182
  ```python
177
- # perform ann search
183
+ # perform ann search with basic column selection
178
184
  res = self.client.ann_search(
179
185
  test_collection_name,
180
186
  vec_data=[0,0,0],
181
187
  vec_column_name='embedding',
182
188
  distance_func=l2_distance,
183
189
  topk=5,
184
- output_column_names=['id']
190
+ output_column_names=['id'] # Legacy parameter
185
191
  )
186
192
  # For example, the result will be:
187
193
  # [(112,), (111,), (10,), (11,), (12,)]
194
+
195
+ # perform ann search with SQLAlchemy expressions (recommended)
196
+ from sqlalchemy import Table, text, func
197
+
198
+ table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
199
+ res = self.client.ann_search(
200
+ test_collection_name,
201
+ vec_data=[0,0,0],
202
+ vec_column_name='embedding',
203
+ distance_func=l2_distance,
204
+ topk=5,
205
+ output_columns=[
206
+ table.c.id,
207
+ table.c.meta,
208
+ (table.c.id + 1000).label('id_plus_1000'),
209
+ text("JSON_EXTRACT(meta, '$.key') as extracted_key")
210
+ ]
211
+ )
212
+ # For example, the result will be:
213
+ # [(112, '{"key": "value"}', 1112, 'value'), ...]
214
+
215
+ # perform ann search with distance threshold (filter results by distance)
216
+ res = self.client.ann_search(
217
+ test_collection_name,
218
+ vec_data=[0,0,0],
219
+ vec_column_name='embedding',
220
+ distance_func=l2_distance,
221
+ with_dist=True,
222
+ topk=10,
223
+ output_column_names=['id'],
224
+ distance_threshold=0.5 # Only return results where distance <= 0.5
225
+ )
226
+ # Only returns results with distance <= 0.5
227
+ # For example, the result will be:
228
+ # [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
188
229
  ```
189
230
 
231
+ #### ann_search Parameters
232
+
233
+ The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
234
+
235
+ - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
236
+ - Column objects: `table.c.id`, `table.c.name`
237
+ - Expressions: `(table.c.age + 10).label('age_plus_10')`
238
+ - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
239
+ - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
240
+
241
+ - **`output_column_names`** (legacy): Accepts list of column name strings
242
+ - Example: `['id', 'name', 'meta']`
243
+
244
+ - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
245
+
246
+ - **`distance_threshold`** (optional): Filter results by distance threshold
247
+ - Type: `Optional[float]`
248
+ - Only returns results where `distance <= threshold`
249
+ - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
250
+ - Use case: Quality control for similarity search, only return highly similar results
251
+
190
252
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
191
253
 
192
254
  ```python
@@ -15,7 +15,7 @@ poetry install
15
15
  - install with pip:
16
16
 
17
17
  ```shell
18
- pip install pyobvector==0.2.16
18
+ pip install pyobvector==0.2.18
19
19
  ```
20
20
 
21
21
  ## Build Doc
@@ -27,6 +27,10 @@ mkdir build
27
27
  make html
28
28
  ```
29
29
 
30
+ ## Release Notes
31
+
32
+ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
33
+
30
34
  ## Usage
31
35
 
32
36
  `pyobvector` supports two modes:
@@ -153,19 +157,75 @@ client.insert(test_collection_name, data=data1)
153
157
  - do ann search:
154
158
 
155
159
  ```python
156
- # perform ann search
160
+ # perform ann search with basic column selection
157
161
  res = self.client.ann_search(
158
162
  test_collection_name,
159
163
  vec_data=[0,0,0],
160
164
  vec_column_name='embedding',
161
165
  distance_func=l2_distance,
162
166
  topk=5,
163
- output_column_names=['id']
167
+ output_column_names=['id'] # Legacy parameter
164
168
  )
165
169
  # For example, the result will be:
166
170
  # [(112,), (111,), (10,), (11,), (12,)]
171
+
172
+ # perform ann search with SQLAlchemy expressions (recommended)
173
+ from sqlalchemy import Table, text, func
174
+
175
+ table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
176
+ res = self.client.ann_search(
177
+ test_collection_name,
178
+ vec_data=[0,0,0],
179
+ vec_column_name='embedding',
180
+ distance_func=l2_distance,
181
+ topk=5,
182
+ output_columns=[
183
+ table.c.id,
184
+ table.c.meta,
185
+ (table.c.id + 1000).label('id_plus_1000'),
186
+ text("JSON_EXTRACT(meta, '$.key') as extracted_key")
187
+ ]
188
+ )
189
+ # For example, the result will be:
190
+ # [(112, '{"key": "value"}', 1112, 'value'), ...]
191
+
192
+ # perform ann search with distance threshold (filter results by distance)
193
+ res = self.client.ann_search(
194
+ test_collection_name,
195
+ vec_data=[0,0,0],
196
+ vec_column_name='embedding',
197
+ distance_func=l2_distance,
198
+ with_dist=True,
199
+ topk=10,
200
+ output_column_names=['id'],
201
+ distance_threshold=0.5 # Only return results where distance <= 0.5
202
+ )
203
+ # Only returns results with distance <= 0.5
204
+ # For example, the result will be:
205
+ # [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
167
206
  ```
168
207
 
208
+ #### ann_search Parameters
209
+
210
+ The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
211
+
212
+ - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
213
+ - Column objects: `table.c.id`, `table.c.name`
214
+ - Expressions: `(table.c.age + 10).label('age_plus_10')`
215
+ - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
216
+ - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
217
+
218
+ - **`output_column_names`** (legacy): Accepts list of column name strings
219
+ - Example: `['id', 'name', 'meta']`
220
+
221
+ - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
222
+
223
+ - **`distance_threshold`** (optional): Filter results by distance threshold
224
+ - Type: `Optional[float]`
225
+ - Only returns results where `distance <= threshold`
226
+ - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
227
+ - Use case: Quality control for similarity search, only return highly similar results
228
+
169
229
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
170
230
 
171
231
  ```python
@@ -14,6 +14,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
14
14
  * IndexParams A list of IndexParam to create vector index in batch
15
15
  * DataType Specify field type in collection schema for MilvusLikeClient
16
16
  * VECTOR An extended data type in SQLAlchemy for ObVecClient
17
+ * SPARSE_VECTOR An extended data type in SQLAlchemy for ObVecClient
17
18
  * VectorIndex An extended index type in SQLAlchemy for ObVecClient
18
19
  * FtsIndex Full Text Search Index
19
20
  * FieldSchema Clas to define field schema in collection for MilvusLikeClient
@@ -43,6 +44,7 @@ from .client import *
43
44
  from .schema import (
44
45
  ARRAY,
45
46
  VECTOR,
47
+ SPARSE_VECTOR,
46
48
  POINT,
47
49
  VectorIndex,
48
50
  OceanBaseDialect,
@@ -70,6 +72,7 @@ __all__ = [
70
72
  "DataType",
71
73
  "ARRAY",
72
74
  "VECTOR",
75
+ "SPARSE_VECTOR",
73
76
  "POINT",
74
77
  "VectorIndex",
75
78
  "FtsIndex",
@@ -79,14 +79,14 @@ class FieldSchema:
79
79
  if "max_length" not in self.kwargs:
80
80
  raise VarcharFieldParamException(
81
81
  code=ErrorCode.INVALID_ARGUMENT,
82
- message=ExceptionsMessage.VarcharFieldMissinglengthParam,
82
+ message=ExceptionsMessage.VarcharFieldMissingLengthParam,
83
83
  )
84
84
  self.type_params["length"] = self.kwargs["max_length"]
85
85
  elif self.dtype == DataType.ARRAY:
86
86
  if "element_type" not in self.kwargs:
87
87
  raise ArrayFieldParamException(
88
88
  code=ErrorCode.INVALID_ARGUMENT,
89
- message=ExceptionsMessage.ArrayFiledMissingElementType,
89
+ message=ExceptionsMessage.ArrayFieldMissingElementType,
90
90
  )
91
91
  if self.kwargs["element_type"] in (
92
92
  DataType.ARRAY,
@@ -95,7 +95,7 @@ class FieldSchema:
95
95
  ):
96
96
  raise ArrayFieldParamException(
97
97
  code=ErrorCode.INVALID_ARGUMENT,
98
- message=ExceptionsMessage.ArrayFiledInvalidElementType,
98
+ message=ExceptionsMessage.ArrayFieldInvalidElementType,
99
99
  )
100
100
 
101
101
  self.type_params["item_type"] = convert_datatype_to_sqltype(
@@ -147,9 +147,9 @@ class CollectionSchema:
147
147
  """Add field to collection.
148
148
 
149
149
  Args:
150
- :param field_name (string) : new field name
151
- :param datatype (DataType) : field data type
152
- :param kwargs : parameters for data type
150
+ field_name (string): new field name
151
+ datatype (DataType): field data type
152
+ **kwargs: parameters for data type
153
153
  """
154
154
  field = FieldSchema(field_name, datatype, **kwargs)
155
155
  cur_idx = len(self.fields)
@@ -101,9 +101,9 @@ class ExceptionsMessage:
101
101
  )
102
102
  PrimaryFieldType = "Param primary_field must be int or str type."
103
103
  VectorFieldMissingDimParam = "Param 'dim' must be set for vector field."
104
- VarcharFieldMissinglengthParam = "Param 'max_length' must be set for varchar field."
105
- ArrayFiledMissingElementType = "Param 'element_type' must be set for array field."
106
- ArrayFiledInvalidElementType = (
104
+ VarcharFieldMissingLengthParam = "Param 'max_length' must be set for varchar field."
105
+ ArrayFieldMissingElementType = "Param 'element_type' must be set for array field."
106
+ ArrayFieldInvalidElementType = (
107
107
  "Param 'element_type' can not be array/vector/varchar."
108
108
  )
109
109
  CollectionNotExists = "Collection does not exist."
@@ -111,5 +111,5 @@ class ExceptionsMessage:
111
111
  MetricTypeValueInvalid = "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
112
112
  UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
113
113
  ClusterVersionIsLow = (
114
- "OceanBase Vector Store is not supported because cluster version is below 4.3.3.0."
114
+ "OceanBase %s feature is not supported because cluster version is below %s."
115
115
  )
@@ -18,13 +18,12 @@ class FtsIndexParam:
18
18
  self.field_names = field_names
19
19
  self.parser_type = parser_type
20
20
 
21
- def param_str(self) -> str:
22
- if self.parser_type is None:
23
- return None
21
+ def param_str(self) -> str | None:
24
22
  if self.parser_type == FtsParser.IK:
25
23
  return "ik"
26
24
  if self.parser_type == FtsParser.NGRAM:
27
25
  return "ngram"
26
+ return None
28
27
 
29
28
  def __iter__(self):
30
29
  yield "index_name", self.index_name
@@ -0,0 +1,81 @@
1
+ """OceanBase Hybrid Search Client."""
2
+ import json
3
+ import logging
4
+ from typing import Dict, Any
5
+
6
+ from sqlalchemy import text
7
+
8
+ from .exceptions import ClusterVersionException, ErrorCode, ExceptionsMessage
9
+ from .ob_vec_client import ObVecClient as Client
10
+ from ..util import ObVersion
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.DEBUG)
14
+
15
+
16
+ class HybridSearch(Client):
17
+ """The OceanBase Hybrid Search Client"""
18
+
19
+ def __init__(
20
+ self,
21
+ uri: str = "127.0.0.1:2881",
22
+ user: str = "root@test",
23
+ password: str = "",
24
+ db_name: str = "test",
25
+ **kwargs,
26
+ ):
27
+ super().__init__(uri, user, password, db_name, **kwargs)
28
+
29
+ if self.ob_version < ObVersion.from_db_version_nums(4, 4, 1, 0):
30
+ raise ClusterVersionException(
31
+ code=ErrorCode.NOT_SUPPORTED,
32
+ message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
33
+ )
34
+
35
+ def search(
36
+ self,
37
+ index: str,
38
+ body: Dict[str, Any],
39
+ **kwargs,
40
+ ):
41
+ """Execute hybrid search with parameter compatible with Elasticsearch.
42
+
43
+ Args:
44
+ index: The name of the table to search
45
+ body: The search query body
46
+ **kwargs: Additional search parameters
47
+
48
+ Returns:
49
+ Search results
50
+ """
51
+ body_str = json.dumps(body)
52
+
53
+ sql = text("SELECT DBMS_HYBRID_SEARCH.SEARCH(:index, :body_str)")
54
+
55
+ with self.engine.connect() as conn:
56
+ with conn.begin():
57
+ res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
58
+ return json.loads(res[0])
59
+
60
+ def get_sql(
61
+ self,
62
+ index: str,
63
+ body: Dict[str, Any],
64
+ ) -> str:
65
+ """Get the SQL actually to be executed in hybrid search.
66
+
67
+ Args:
68
+ index: The name of the table to search
69
+ body: The hybrid search query body
70
+
71
+ Returns:
72
+ The SQL actually to be executed
73
+ """
74
+ body_str = json.dumps(body)
75
+
76
+ sql = text("SELECT DBMS_HYBRID_SEARCH.GET_SQL(:index, :body_str)")
77
+
78
+ with self.engine.connect() as conn:
79
+ with conn.begin():
80
+ res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
81
+ return res[0]
@@ -9,7 +9,7 @@ class VecIndexType(Enum):
9
9
  IVFFLAT = 2
10
10
  IVFSQ = 3
11
11
  IVFPQ = 4
12
-
12
+ DAAT = 5
13
13
 
14
14
  class IndexParam:
15
15
  """Vector index parameters.
@@ -31,6 +31,7 @@ class IndexParam:
31
31
  IVFFLAT_ALGO_NAME = "ivf_flat"
32
32
  IVFSQ_ALGO_NAME = "ivf_sq8"
33
33
  IVFPQ_ALGO_NAME = "ivf_pq"
34
+ DAAT_ALGO_NAME = "daat"
34
35
 
35
36
  def __init__(
36
37
  self, index_name: str, field_name: str, index_type: Union[VecIndexType, str], **kwargs
@@ -57,6 +58,11 @@ class IndexParam:
57
58
  return self.index_type in [
58
59
  IndexParam.IVFPQ_ALGO_NAME,
59
60
  ]
61
+
62
+ def is_index_type_sparse_vector(self):
63
+ return self.index_type in [
64
+ IndexParam.DAAT_ALGO_NAME,
65
+ ]
60
66
 
61
67
  def _get_vector_index_type_str(self):
62
68
  """Parse vector index type to string."""
@@ -71,6 +77,8 @@ class IndexParam:
71
77
  return IndexParam.IVFSQ_ALGO_NAME
72
78
  elif self.index_type == VecIndexType.IVFPQ:
73
79
  return IndexParam.IVFPQ_ALGO_NAME
80
+ elif self.index_type == VecIndexType.DAAT:
81
+ return IndexParam.DAAT_ALGO_NAME
74
82
  raise ValueError(f"unsupported vector index type: {self.index_type}")
75
83
  assert isinstance(self.index_type, str)
76
84
  index_type = self.index_type.lower()
@@ -80,6 +88,7 @@ class IndexParam:
80
88
  IndexParam.IVFFLAT_ALGO_NAME,
81
89
  IndexParam.IVFSQ_ALGO_NAME,
82
90
  IndexParam.IVFPQ_ALGO_NAME,
91
+ IndexParam.DAAT_ALGO_NAME,
83
92
  ]:
84
93
  raise ValueError(f"unsupported vector index type: {self.index_type}")
85
94
  return index_type
@@ -124,15 +133,19 @@ class IndexParam:
124
133
  ob_params['ef_construction'] = params['efConstruction']
125
134
  if 'efSearch' in params:
126
135
  ob_params['ef_search'] = params['efSearch']
136
+
137
+ if self.is_index_type_sparse_vector() and ob_params['distance'] != 'inner_product':
138
+ raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
127
139
  return ob_params
128
140
 
129
141
  def param_str(self):
130
142
  """Parse vector index parameters to string."""
131
143
  ob_param = self._parse_kwargs()
132
144
  partial_str = ",".join([f"{k}={v}" for k, v in ob_param.items()])
133
- if len(partial_str) > 0:
134
- partial_str += ","
135
- partial_str += f"type={self.index_type}"
145
+ if not self.is_index_type_sparse_vector():
146
+ if len(partial_str) > 0:
147
+ partial_str += ","
148
+ partial_str += f"type={self.index_type}"
136
149
  return partial_str
137
150
 
138
151
  def __iter__(self):
@@ -165,10 +178,10 @@ class IndexParams:
165
178
  """Add `IndexParam` to `IndexParams`
166
179
 
167
180
  Args:
168
- :param field_name (string) : vector index built on which field
169
- :param index_type (VecIndexType) :
170
- vector index algorithms (Only HNSW supported)
171
- :param index_name (string) : vector index name
181
+ field_name (string): vector index built on which field
182
+ index_type (VecIndexType): vector index algorithms (Only HNSW supported)
183
+ index_name (string): vector index name
184
+ **kwargs: additional parameters for different index types
172
185
  """
173
186
  index_param = IndexParam(index_name, field_name, index_type, **kwargs)
174
187
  pair_key = (field_name, index_name)