pyobvector 0.2.15__tar.gz → 0.2.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.15 → pyobvector-0.2.17}/PKG-INFO +65 -7
- {pyobvector-0.2.15 → pyobvector-0.2.17}/README.md +59 -3
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/__init__.py +3 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/collection_schema.py +6 -6
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/exceptions.py +4 -4
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/fts_index_param.py +2 -3
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/index_param.py +21 -8
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/milvus_like_client.py +126 -90
- pyobvector-0.2.17/pyobvector/client/ob_client.py +459 -0
- pyobvector-0.2.17/pyobvector/client/ob_vec_client.py +522 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/schema_type.py +4 -2
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/__init__.py +3 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/dialect.py +3 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/reflection.py +1 -1
- pyobvector-0.2.17/pyobvector/schema/sparse_vector.py +35 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/vector_index.py +1 -1
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/util/__init__.py +3 -1
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/util/ob_version.py +1 -1
- pyobvector-0.2.17/pyobvector/util/sparse_vector.py +48 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/util/vector.py +10 -4
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyproject.toml +3 -3
- pyobvector-0.2.15/pyobvector/client/ob_vec_client.py +0 -862
- {pyobvector-0.2.15 → pyobvector-0.2.17}/LICENSE +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/__init__.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/enum.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/ob_vec_json_table_client.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/client/partitions.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/json_table/__init__.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/json_table/json_value_returning_func.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/json_table/oceanbase_dialect.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/json_table/virtual_data_type.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/array.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/full_text_index.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/geo_srid_point.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/gis_func.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/match_against_func.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/ob_table.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/replace_stmt.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/vec_dist_func.py +0 -0
- {pyobvector-0.2.15 → pyobvector-0.2.17}/pyobvector/schema/vector.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobvector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.17
|
|
4
4
|
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
|
+
License-File: LICENSE
|
|
5
6
|
Author: shanhaikang.shk
|
|
6
7
|
Author-email: shanhaikang.shk@oceanbase.com
|
|
7
8
|
Requires-Python: >=3.9,<4.0
|
|
@@ -11,12 +12,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
-
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
|
|
15
17
|
Requires-Dist: numpy (>=1.17.0,<2.0.0)
|
|
16
18
|
Requires-Dist: pydantic (>=2.7.0,<3)
|
|
17
19
|
Requires-Dist: pymysql (>=1.1.1,<2.0.0)
|
|
18
20
|
Requires-Dist: sqlalchemy (>=1.4,<=3)
|
|
19
|
-
Requires-Dist: sqlglot (>=26.0.1
|
|
21
|
+
Requires-Dist: sqlglot (>=26.0.1)
|
|
20
22
|
Description-Content-Type: text/markdown
|
|
21
23
|
|
|
22
24
|
# pyobvector
|
|
@@ -36,7 +38,7 @@ poetry install
|
|
|
36
38
|
- install with pip:
|
|
37
39
|
|
|
38
40
|
```shell
|
|
39
|
-
pip install pyobvector==0.2.
|
|
41
|
+
pip install pyobvector==0.2.17
|
|
40
42
|
```
|
|
41
43
|
|
|
42
44
|
## Build Doc
|
|
@@ -174,19 +176,75 @@ client.insert(test_collection_name, data=data1)
|
|
|
174
176
|
- do ann search:
|
|
175
177
|
|
|
176
178
|
```python
|
|
177
|
-
# perform ann search
|
|
179
|
+
# perform ann search with basic column selection
|
|
178
180
|
res = self.client.ann_search(
|
|
179
181
|
test_collection_name,
|
|
180
182
|
vec_data=[0,0,0],
|
|
181
183
|
vec_column_name='embedding',
|
|
182
184
|
distance_func=l2_distance,
|
|
183
185
|
topk=5,
|
|
184
|
-
output_column_names=['id']
|
|
186
|
+
output_column_names=['id'] # Legacy parameter
|
|
185
187
|
)
|
|
186
188
|
# For example, the result will be:
|
|
187
189
|
# [(112,), (111,), (10,), (11,), (12,)]
|
|
190
|
+
|
|
191
|
+
# perform ann search with SQLAlchemy expressions (recommended)
|
|
192
|
+
from sqlalchemy import Table, text, func
|
|
193
|
+
|
|
194
|
+
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
195
|
+
res = self.client.ann_search(
|
|
196
|
+
test_collection_name,
|
|
197
|
+
vec_data=[0,0,0],
|
|
198
|
+
vec_column_name='embedding',
|
|
199
|
+
distance_func=l2_distance,
|
|
200
|
+
topk=5,
|
|
201
|
+
output_columns=[
|
|
202
|
+
table.c.id,
|
|
203
|
+
table.c.meta,
|
|
204
|
+
(table.c.id + 1000).label('id_plus_1000'),
|
|
205
|
+
text("JSON_EXTRACT(meta, '$.key') as extracted_key")
|
|
206
|
+
]
|
|
207
|
+
)
|
|
208
|
+
# For example, the result will be:
|
|
209
|
+
# [(112, '{"key": "value"}', 1112, 'value'), ...]
|
|
210
|
+
|
|
211
|
+
# perform ann search with distance threshold (filter results by distance)
|
|
212
|
+
res = self.client.ann_search(
|
|
213
|
+
test_collection_name,
|
|
214
|
+
vec_data=[0,0,0],
|
|
215
|
+
vec_column_name='embedding',
|
|
216
|
+
distance_func=l2_distance,
|
|
217
|
+
with_dist=True,
|
|
218
|
+
topk=10,
|
|
219
|
+
output_column_names=['id'],
|
|
220
|
+
distance_threshold=0.5 # Only return results where distance <= 0.5
|
|
221
|
+
)
|
|
222
|
+
# Only returns results with distance <= 0.5
|
|
223
|
+
# For example, the result will be:
|
|
224
|
+
# [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
|
|
188
225
|
```
|
|
189
226
|
|
|
227
|
+
#### ann_search Parameters
|
|
228
|
+
|
|
229
|
+
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
230
|
+
|
|
231
|
+
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
232
|
+
- Column objects: `table.c.id`, `table.c.name`
|
|
233
|
+
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
234
|
+
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
235
|
+
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
236
|
+
|
|
237
|
+
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
238
|
+
- Example: `['id', 'name', 'meta']`
|
|
239
|
+
|
|
240
|
+
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
241
|
+
|
|
242
|
+
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
243
|
+
- Type: `Optional[float]`
|
|
244
|
+
- Only returns results where `distance <= threshold`
|
|
245
|
+
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
246
|
+
- Use case: Quality control for similarity search, only return highly similar results
|
|
247
|
+
|
|
190
248
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
191
249
|
|
|
192
250
|
```python
|
|
@@ -15,7 +15,7 @@ poetry install
|
|
|
15
15
|
- install with pip:
|
|
16
16
|
|
|
17
17
|
```shell
|
|
18
|
-
pip install pyobvector==0.2.
|
|
18
|
+
pip install pyobvector==0.2.17
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
## Build Doc
|
|
@@ -153,19 +153,75 @@ client.insert(test_collection_name, data=data1)
|
|
|
153
153
|
- do ann search:
|
|
154
154
|
|
|
155
155
|
```python
|
|
156
|
-
# perform ann search
|
|
156
|
+
# perform ann search with basic column selection
|
|
157
157
|
res = self.client.ann_search(
|
|
158
158
|
test_collection_name,
|
|
159
159
|
vec_data=[0,0,0],
|
|
160
160
|
vec_column_name='embedding',
|
|
161
161
|
distance_func=l2_distance,
|
|
162
162
|
topk=5,
|
|
163
|
-
output_column_names=['id']
|
|
163
|
+
output_column_names=['id'] # Legacy parameter
|
|
164
164
|
)
|
|
165
165
|
# For example, the result will be:
|
|
166
166
|
# [(112,), (111,), (10,), (11,), (12,)]
|
|
167
|
+
|
|
168
|
+
# perform ann search with SQLAlchemy expressions (recommended)
|
|
169
|
+
from sqlalchemy import Table, text, func
|
|
170
|
+
|
|
171
|
+
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
172
|
+
res = self.client.ann_search(
|
|
173
|
+
test_collection_name,
|
|
174
|
+
vec_data=[0,0,0],
|
|
175
|
+
vec_column_name='embedding',
|
|
176
|
+
distance_func=l2_distance,
|
|
177
|
+
topk=5,
|
|
178
|
+
output_columns=[
|
|
179
|
+
table.c.id,
|
|
180
|
+
table.c.meta,
|
|
181
|
+
(table.c.id + 1000).label('id_plus_1000'),
|
|
182
|
+
text("JSON_EXTRACT(meta, '$.key') as extracted_key")
|
|
183
|
+
]
|
|
184
|
+
)
|
|
185
|
+
# For example, the result will be:
|
|
186
|
+
# [(112, '{"key": "value"}', 1112, 'value'), ...]
|
|
187
|
+
|
|
188
|
+
# perform ann search with distance threshold (filter results by distance)
|
|
189
|
+
res = self.client.ann_search(
|
|
190
|
+
test_collection_name,
|
|
191
|
+
vec_data=[0,0,0],
|
|
192
|
+
vec_column_name='embedding',
|
|
193
|
+
distance_func=l2_distance,
|
|
194
|
+
with_dist=True,
|
|
195
|
+
topk=10,
|
|
196
|
+
output_column_names=['id'],
|
|
197
|
+
distance_threshold=0.5 # Only return results where distance <= 0.5
|
|
198
|
+
)
|
|
199
|
+
# Only returns results with distance <= 0.5
|
|
200
|
+
# For example, the result will be:
|
|
201
|
+
# [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
|
|
167
202
|
```
|
|
168
203
|
|
|
204
|
+
#### ann_search Parameters
|
|
205
|
+
|
|
206
|
+
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
207
|
+
|
|
208
|
+
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
209
|
+
- Column objects: `table.c.id`, `table.c.name`
|
|
210
|
+
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
211
|
+
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
212
|
+
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
213
|
+
|
|
214
|
+
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
215
|
+
- Example: `['id', 'name', 'meta']`
|
|
216
|
+
|
|
217
|
+
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
218
|
+
|
|
219
|
+
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
220
|
+
- Type: `Optional[float]`
|
|
221
|
+
- Only returns results where `distance <= threshold`
|
|
222
|
+
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
223
|
+
- Use case: Quality control for similarity search, only return highly similar results
|
|
224
|
+
|
|
169
225
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
170
226
|
|
|
171
227
|
```python
|
|
@@ -14,6 +14,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
14
14
|
* IndexParams A list of IndexParam to create vector index in batch
|
|
15
15
|
* DataType Specify field type in collection schema for MilvusLikeClient
|
|
16
16
|
* VECTOR An extended data type in SQLAlchemy for ObVecClient
|
|
17
|
+
* SPARSE_VECTOR An extended data type in SQLAlchemy for ObVecClient
|
|
17
18
|
* VectorIndex An extended index type in SQLAlchemy for ObVecClient
|
|
18
19
|
* FtsIndex Full Text Search Index
|
|
19
20
|
* FieldSchema Clas to define field schema in collection for MilvusLikeClient
|
|
@@ -43,6 +44,7 @@ from .client import *
|
|
|
43
44
|
from .schema import (
|
|
44
45
|
ARRAY,
|
|
45
46
|
VECTOR,
|
|
47
|
+
SPARSE_VECTOR,
|
|
46
48
|
POINT,
|
|
47
49
|
VectorIndex,
|
|
48
50
|
OceanBaseDialect,
|
|
@@ -70,6 +72,7 @@ __all__ = [
|
|
|
70
72
|
"DataType",
|
|
71
73
|
"ARRAY",
|
|
72
74
|
"VECTOR",
|
|
75
|
+
"SPARSE_VECTOR",
|
|
73
76
|
"POINT",
|
|
74
77
|
"VectorIndex",
|
|
75
78
|
"FtsIndex",
|
|
@@ -79,14 +79,14 @@ class FieldSchema:
|
|
|
79
79
|
if "max_length" not in self.kwargs:
|
|
80
80
|
raise VarcharFieldParamException(
|
|
81
81
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
82
|
-
message=ExceptionsMessage.
|
|
82
|
+
message=ExceptionsMessage.VarcharFieldMissingLengthParam,
|
|
83
83
|
)
|
|
84
84
|
self.type_params["length"] = self.kwargs["max_length"]
|
|
85
85
|
elif self.dtype == DataType.ARRAY:
|
|
86
86
|
if "element_type" not in self.kwargs:
|
|
87
87
|
raise ArrayFieldParamException(
|
|
88
88
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
89
|
-
message=ExceptionsMessage.
|
|
89
|
+
message=ExceptionsMessage.ArrayFieldMissingElementType,
|
|
90
90
|
)
|
|
91
91
|
if self.kwargs["element_type"] in (
|
|
92
92
|
DataType.ARRAY,
|
|
@@ -95,7 +95,7 @@ class FieldSchema:
|
|
|
95
95
|
):
|
|
96
96
|
raise ArrayFieldParamException(
|
|
97
97
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
98
|
-
message=ExceptionsMessage.
|
|
98
|
+
message=ExceptionsMessage.ArrayFieldInvalidElementType,
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
self.type_params["item_type"] = convert_datatype_to_sqltype(
|
|
@@ -147,9 +147,9 @@ class CollectionSchema:
|
|
|
147
147
|
"""Add field to collection.
|
|
148
148
|
|
|
149
149
|
Args:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
150
|
+
field_name (string): new field name
|
|
151
|
+
datatype (DataType): field data type
|
|
152
|
+
**kwargs: parameters for data type
|
|
153
153
|
"""
|
|
154
154
|
field = FieldSchema(field_name, datatype, **kwargs)
|
|
155
155
|
cur_idx = len(self.fields)
|
|
@@ -101,14 +101,14 @@ class ExceptionsMessage:
|
|
|
101
101
|
)
|
|
102
102
|
PrimaryFieldType = "Param primary_field must be int or str type."
|
|
103
103
|
VectorFieldMissingDimParam = "Param 'dim' must be set for vector field."
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
VarcharFieldMissingLengthParam = "Param 'max_length' must be set for varchar field."
|
|
105
|
+
ArrayFieldMissingElementType = "Param 'element_type' must be set for array field."
|
|
106
|
+
ArrayFieldInvalidElementType = (
|
|
107
107
|
"Param 'element_type' can not be array/vector/varchar."
|
|
108
108
|
)
|
|
109
109
|
CollectionNotExists = "Collection does not exist."
|
|
110
110
|
MetricTypeParamTypeInvalid = "MetricType param type should be string."
|
|
111
|
-
MetricTypeValueInvalid = "MetricType should be 'l2'/'ip' in ann search."
|
|
111
|
+
MetricTypeValueInvalid = "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
|
|
112
112
|
UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
|
|
113
113
|
ClusterVersionIsLow = (
|
|
114
114
|
"OceanBase Vector Store is not supported because cluster version is below 4.3.3.0."
|
|
@@ -18,13 +18,12 @@ class FtsIndexParam:
|
|
|
18
18
|
self.field_names = field_names
|
|
19
19
|
self.parser_type = parser_type
|
|
20
20
|
|
|
21
|
-
def param_str(self) -> str:
|
|
22
|
-
if self.parser_type is None:
|
|
23
|
-
return None
|
|
21
|
+
def param_str(self) -> str | None:
|
|
24
22
|
if self.parser_type == FtsParser.IK:
|
|
25
23
|
return "ik"
|
|
26
24
|
if self.parser_type == FtsParser.NGRAM:
|
|
27
25
|
return "ngram"
|
|
26
|
+
return None
|
|
28
27
|
|
|
29
28
|
def __iter__(self):
|
|
30
29
|
yield "index_name", self.index_name
|
|
@@ -9,7 +9,7 @@ class VecIndexType(Enum):
|
|
|
9
9
|
IVFFLAT = 2
|
|
10
10
|
IVFSQ = 3
|
|
11
11
|
IVFPQ = 4
|
|
12
|
-
|
|
12
|
+
DAAT = 5
|
|
13
13
|
|
|
14
14
|
class IndexParam:
|
|
15
15
|
"""Vector index parameters.
|
|
@@ -31,6 +31,7 @@ class IndexParam:
|
|
|
31
31
|
IVFFLAT_ALGO_NAME = "ivf_flat"
|
|
32
32
|
IVFSQ_ALGO_NAME = "ivf_sq8"
|
|
33
33
|
IVFPQ_ALGO_NAME = "ivf_pq"
|
|
34
|
+
DAAT_ALGO_NAME = "daat"
|
|
34
35
|
|
|
35
36
|
def __init__(
|
|
36
37
|
self, index_name: str, field_name: str, index_type: Union[VecIndexType, str], **kwargs
|
|
@@ -57,6 +58,11 @@ class IndexParam:
|
|
|
57
58
|
return self.index_type in [
|
|
58
59
|
IndexParam.IVFPQ_ALGO_NAME,
|
|
59
60
|
]
|
|
61
|
+
|
|
62
|
+
def is_index_type_sparse_vector(self):
|
|
63
|
+
return self.index_type in [
|
|
64
|
+
IndexParam.DAAT_ALGO_NAME,
|
|
65
|
+
]
|
|
60
66
|
|
|
61
67
|
def _get_vector_index_type_str(self):
|
|
62
68
|
"""Parse vector index type to string."""
|
|
@@ -71,6 +77,8 @@ class IndexParam:
|
|
|
71
77
|
return IndexParam.IVFSQ_ALGO_NAME
|
|
72
78
|
elif self.index_type == VecIndexType.IVFPQ:
|
|
73
79
|
return IndexParam.IVFPQ_ALGO_NAME
|
|
80
|
+
elif self.index_type == VecIndexType.DAAT:
|
|
81
|
+
return IndexParam.DAAT_ALGO_NAME
|
|
74
82
|
raise ValueError(f"unsupported vector index type: {self.index_type}")
|
|
75
83
|
assert isinstance(self.index_type, str)
|
|
76
84
|
index_type = self.index_type.lower()
|
|
@@ -80,6 +88,7 @@ class IndexParam:
|
|
|
80
88
|
IndexParam.IVFFLAT_ALGO_NAME,
|
|
81
89
|
IndexParam.IVFSQ_ALGO_NAME,
|
|
82
90
|
IndexParam.IVFPQ_ALGO_NAME,
|
|
91
|
+
IndexParam.DAAT_ALGO_NAME,
|
|
83
92
|
]:
|
|
84
93
|
raise ValueError(f"unsupported vector index type: {self.index_type}")
|
|
85
94
|
return index_type
|
|
@@ -124,15 +133,19 @@ class IndexParam:
|
|
|
124
133
|
ob_params['ef_construction'] = params['efConstruction']
|
|
125
134
|
if 'efSearch' in params:
|
|
126
135
|
ob_params['ef_search'] = params['efSearch']
|
|
136
|
+
|
|
137
|
+
if self.is_index_type_sparse_vector() and ob_params['distance'] != 'inner_product':
|
|
138
|
+
raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
|
|
127
139
|
return ob_params
|
|
128
140
|
|
|
129
141
|
def param_str(self):
|
|
130
142
|
"""Parse vector index parameters to string."""
|
|
131
143
|
ob_param = self._parse_kwargs()
|
|
132
144
|
partial_str = ",".join([f"{k}={v}" for k, v in ob_param.items()])
|
|
133
|
-
if
|
|
134
|
-
partial_str
|
|
135
|
-
|
|
145
|
+
if not self.is_index_type_sparse_vector():
|
|
146
|
+
if len(partial_str) > 0:
|
|
147
|
+
partial_str += ","
|
|
148
|
+
partial_str += f"type={self.index_type}"
|
|
136
149
|
return partial_str
|
|
137
150
|
|
|
138
151
|
def __iter__(self):
|
|
@@ -165,10 +178,10 @@ class IndexParams:
|
|
|
165
178
|
"""Add `IndexParam` to `IndexParams`
|
|
166
179
|
|
|
167
180
|
Args:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
181
|
+
field_name (string): vector index built on which field
|
|
182
|
+
index_type (VecIndexType): vector index algorithms (Only HNSW supported)
|
|
183
|
+
index_name (string): vector index name
|
|
184
|
+
**kwargs: additional parameters for different index types
|
|
172
185
|
"""
|
|
173
186
|
index_param = IndexParam(index_name, field_name, index_type, **kwargs)
|
|
174
187
|
pair_key = (field_name, index_name)
|