pyobvector 0.2.16__tar.gz → 0.2.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.16 → pyobvector-0.2.18}/PKG-INFO +69 -7
- {pyobvector-0.2.16 → pyobvector-0.2.18}/README.md +63 -3
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/__init__.py +3 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/collection_schema.py +6 -6
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/exceptions.py +4 -4
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/fts_index_param.py +2 -3
- pyobvector-0.2.18/pyobvector/client/hybrid_search.py +81 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/index_param.py +21 -8
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/milvus_like_client.py +124 -88
- pyobvector-0.2.18/pyobvector/client/ob_client.py +459 -0
- pyobvector-0.2.18/pyobvector/client/ob_vec_client.py +522 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/schema_type.py +4 -2
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/__init__.py +3 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/dialect.py +3 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/reflection.py +1 -1
- pyobvector-0.2.18/pyobvector/schema/sparse_vector.py +35 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vector_index.py +1 -1
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/__init__.py +3 -1
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/ob_version.py +1 -1
- pyobvector-0.2.18/pyobvector/util/sparse_vector.py +48 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/util/vector.py +10 -4
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyproject.toml +3 -3
- pyobvector-0.2.16/pyobvector/client/ob_vec_client.py +0 -862
- {pyobvector-0.2.16 → pyobvector-0.2.18}/LICENSE +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/__init__.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/enum.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/ob_vec_json_table_client.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/client/partitions.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/__init__.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/json_value_returning_func.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/oceanbase_dialect.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/json_table/virtual_data_type.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/array.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/full_text_index.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/geo_srid_point.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/gis_func.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/match_against_func.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/ob_table.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/replace_stmt.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vec_dist_func.py +0 -0
- {pyobvector-0.2.16 → pyobvector-0.2.18}/pyobvector/schema/vector.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobvector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.18
|
|
4
4
|
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
|
+
License-File: LICENSE
|
|
5
6
|
Author: shanhaikang.shk
|
|
6
7
|
Author-email: shanhaikang.shk@oceanbase.com
|
|
7
8
|
Requires-Python: >=3.9,<4.0
|
|
@@ -11,12 +12,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
-
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
|
|
15
17
|
Requires-Dist: numpy (>=1.17.0,<2.0.0)
|
|
16
18
|
Requires-Dist: pydantic (>=2.7.0,<3)
|
|
17
19
|
Requires-Dist: pymysql (>=1.1.1,<2.0.0)
|
|
18
20
|
Requires-Dist: sqlalchemy (>=1.4,<=3)
|
|
19
|
-
Requires-Dist: sqlglot (>=26.0.1
|
|
21
|
+
Requires-Dist: sqlglot (>=26.0.1)
|
|
20
22
|
Description-Content-Type: text/markdown
|
|
21
23
|
|
|
22
24
|
# pyobvector
|
|
@@ -36,7 +38,7 @@ poetry install
|
|
|
36
38
|
- install with pip:
|
|
37
39
|
|
|
38
40
|
```shell
|
|
39
|
-
pip install pyobvector==0.2.
|
|
41
|
+
pip install pyobvector==0.2.18
|
|
40
42
|
```
|
|
41
43
|
|
|
42
44
|
## Build Doc
|
|
@@ -48,6 +50,10 @@ mkdir build
|
|
|
48
50
|
make html
|
|
49
51
|
```
|
|
50
52
|
|
|
53
|
+
## Release Notes
|
|
54
|
+
|
|
55
|
+
For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
|
|
56
|
+
|
|
51
57
|
## Usage
|
|
52
58
|
|
|
53
59
|
`pyobvector` supports two modes:
|
|
@@ -174,19 +180,75 @@ client.insert(test_collection_name, data=data1)
|
|
|
174
180
|
- do ann search:
|
|
175
181
|
|
|
176
182
|
```python
|
|
177
|
-
# perform ann search
|
|
183
|
+
# perform ann search with basic column selection
|
|
178
184
|
res = self.client.ann_search(
|
|
179
185
|
test_collection_name,
|
|
180
186
|
vec_data=[0,0,0],
|
|
181
187
|
vec_column_name='embedding',
|
|
182
188
|
distance_func=l2_distance,
|
|
183
189
|
topk=5,
|
|
184
|
-
output_column_names=['id']
|
|
190
|
+
output_column_names=['id'] # Legacy parameter
|
|
185
191
|
)
|
|
186
192
|
# For example, the result will be:
|
|
187
193
|
# [(112,), (111,), (10,), (11,), (12,)]
|
|
194
|
+
|
|
195
|
+
# perform ann search with SQLAlchemy expressions (recommended)
|
|
196
|
+
from sqlalchemy import Table, text, func
|
|
197
|
+
|
|
198
|
+
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
199
|
+
res = self.client.ann_search(
|
|
200
|
+
test_collection_name,
|
|
201
|
+
vec_data=[0,0,0],
|
|
202
|
+
vec_column_name='embedding',
|
|
203
|
+
distance_func=l2_distance,
|
|
204
|
+
topk=5,
|
|
205
|
+
output_columns=[
|
|
206
|
+
table.c.id,
|
|
207
|
+
table.c.meta,
|
|
208
|
+
(table.c.id + 1000).label('id_plus_1000'),
|
|
209
|
+
text("JSON_EXTRACT(meta, '$.key') as extracted_key")
|
|
210
|
+
]
|
|
211
|
+
)
|
|
212
|
+
# For example, the result will be:
|
|
213
|
+
# [(112, '{"key": "value"}', 1112, 'value'), ...]
|
|
214
|
+
|
|
215
|
+
# perform ann search with distance threshold (filter results by distance)
|
|
216
|
+
res = self.client.ann_search(
|
|
217
|
+
test_collection_name,
|
|
218
|
+
vec_data=[0,0,0],
|
|
219
|
+
vec_column_name='embedding',
|
|
220
|
+
distance_func=l2_distance,
|
|
221
|
+
with_dist=True,
|
|
222
|
+
topk=10,
|
|
223
|
+
output_column_names=['id'],
|
|
224
|
+
distance_threshold=0.5 # Only return results where distance <= 0.5
|
|
225
|
+
)
|
|
226
|
+
# Only returns results with distance <= 0.5
|
|
227
|
+
# For example, the result will be:
|
|
228
|
+
# [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
|
|
188
229
|
```
|
|
189
230
|
|
|
231
|
+
#### ann_search Parameters
|
|
232
|
+
|
|
233
|
+
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
234
|
+
|
|
235
|
+
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
236
|
+
- Column objects: `table.c.id`, `table.c.name`
|
|
237
|
+
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
238
|
+
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
239
|
+
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
240
|
+
|
|
241
|
+
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
242
|
+
- Example: `['id', 'name', 'meta']`
|
|
243
|
+
|
|
244
|
+
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
245
|
+
|
|
246
|
+
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
247
|
+
- Type: `Optional[float]`
|
|
248
|
+
- Only returns results where `distance <= threshold`
|
|
249
|
+
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
250
|
+
- Use case: Quality control for similarity search, only return highly similar results
|
|
251
|
+
|
|
190
252
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
191
253
|
|
|
192
254
|
```python
|
|
@@ -15,7 +15,7 @@ poetry install
|
|
|
15
15
|
- install with pip:
|
|
16
16
|
|
|
17
17
|
```shell
|
|
18
|
-
pip install pyobvector==0.2.
|
|
18
|
+
pip install pyobvector==0.2.18
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
## Build Doc
|
|
@@ -27,6 +27,10 @@ mkdir build
|
|
|
27
27
|
make html
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
+
## Release Notes
|
|
31
|
+
|
|
32
|
+
For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
|
|
33
|
+
|
|
30
34
|
## Usage
|
|
31
35
|
|
|
32
36
|
`pyobvector` supports two modes:
|
|
@@ -153,19 +157,75 @@ client.insert(test_collection_name, data=data1)
|
|
|
153
157
|
- do ann search:
|
|
154
158
|
|
|
155
159
|
```python
|
|
156
|
-
# perform ann search
|
|
160
|
+
# perform ann search with basic column selection
|
|
157
161
|
res = self.client.ann_search(
|
|
158
162
|
test_collection_name,
|
|
159
163
|
vec_data=[0,0,0],
|
|
160
164
|
vec_column_name='embedding',
|
|
161
165
|
distance_func=l2_distance,
|
|
162
166
|
topk=5,
|
|
163
|
-
output_column_names=['id']
|
|
167
|
+
output_column_names=['id'] # Legacy parameter
|
|
164
168
|
)
|
|
165
169
|
# For example, the result will be:
|
|
166
170
|
# [(112,), (111,), (10,), (11,), (12,)]
|
|
171
|
+
|
|
172
|
+
# perform ann search with SQLAlchemy expressions (recommended)
|
|
173
|
+
from sqlalchemy import Table, text, func
|
|
174
|
+
|
|
175
|
+
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
176
|
+
res = self.client.ann_search(
|
|
177
|
+
test_collection_name,
|
|
178
|
+
vec_data=[0,0,0],
|
|
179
|
+
vec_column_name='embedding',
|
|
180
|
+
distance_func=l2_distance,
|
|
181
|
+
topk=5,
|
|
182
|
+
output_columns=[
|
|
183
|
+
table.c.id,
|
|
184
|
+
table.c.meta,
|
|
185
|
+
(table.c.id + 1000).label('id_plus_1000'),
|
|
186
|
+
text("JSON_EXTRACT(meta, '$.key') as extracted_key")
|
|
187
|
+
]
|
|
188
|
+
)
|
|
189
|
+
# For example, the result will be:
|
|
190
|
+
# [(112, '{"key": "value"}', 1112, 'value'), ...]
|
|
191
|
+
|
|
192
|
+
# perform ann search with distance threshold (filter results by distance)
|
|
193
|
+
res = self.client.ann_search(
|
|
194
|
+
test_collection_name,
|
|
195
|
+
vec_data=[0,0,0],
|
|
196
|
+
vec_column_name='embedding',
|
|
197
|
+
distance_func=l2_distance,
|
|
198
|
+
with_dist=True,
|
|
199
|
+
topk=10,
|
|
200
|
+
output_column_names=['id'],
|
|
201
|
+
distance_threshold=0.5 # Only return results where distance <= 0.5
|
|
202
|
+
)
|
|
203
|
+
# Only returns results with distance <= 0.5
|
|
204
|
+
# For example, the result will be:
|
|
205
|
+
# [(10, 0.0), (11, 0.0), ...] # Only includes results with distance <= 0.5
|
|
167
206
|
```
|
|
168
207
|
|
|
208
|
+
#### ann_search Parameters
|
|
209
|
+
|
|
210
|
+
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
211
|
+
|
|
212
|
+
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
213
|
+
- Column objects: `table.c.id`, `table.c.name`
|
|
214
|
+
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
215
|
+
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
216
|
+
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
217
|
+
|
|
218
|
+
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
219
|
+
- Example: `['id', 'name', 'meta']`
|
|
220
|
+
|
|
221
|
+
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
222
|
+
|
|
223
|
+
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
224
|
+
- Type: `Optional[float]`
|
|
225
|
+
- Only returns results where `distance <= threshold`
|
|
226
|
+
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
227
|
+
- Use case: Quality control for similarity search, only return highly similar results
|
|
228
|
+
|
|
169
229
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
170
230
|
|
|
171
231
|
```python
|
|
@@ -14,6 +14,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
14
14
|
* IndexParams A list of IndexParam to create vector index in batch
|
|
15
15
|
* DataType Specify field type in collection schema for MilvusLikeClient
|
|
16
16
|
* VECTOR An extended data type in SQLAlchemy for ObVecClient
|
|
17
|
+
* SPARSE_VECTOR An extended data type in SQLAlchemy for ObVecClient
|
|
17
18
|
* VectorIndex An extended index type in SQLAlchemy for ObVecClient
|
|
18
19
|
* FtsIndex Full Text Search Index
|
|
19
20
|
* FieldSchema Clas to define field schema in collection for MilvusLikeClient
|
|
@@ -43,6 +44,7 @@ from .client import *
|
|
|
43
44
|
from .schema import (
|
|
44
45
|
ARRAY,
|
|
45
46
|
VECTOR,
|
|
47
|
+
SPARSE_VECTOR,
|
|
46
48
|
POINT,
|
|
47
49
|
VectorIndex,
|
|
48
50
|
OceanBaseDialect,
|
|
@@ -70,6 +72,7 @@ __all__ = [
|
|
|
70
72
|
"DataType",
|
|
71
73
|
"ARRAY",
|
|
72
74
|
"VECTOR",
|
|
75
|
+
"SPARSE_VECTOR",
|
|
73
76
|
"POINT",
|
|
74
77
|
"VectorIndex",
|
|
75
78
|
"FtsIndex",
|
|
@@ -79,14 +79,14 @@ class FieldSchema:
|
|
|
79
79
|
if "max_length" not in self.kwargs:
|
|
80
80
|
raise VarcharFieldParamException(
|
|
81
81
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
82
|
-
message=ExceptionsMessage.
|
|
82
|
+
message=ExceptionsMessage.VarcharFieldMissingLengthParam,
|
|
83
83
|
)
|
|
84
84
|
self.type_params["length"] = self.kwargs["max_length"]
|
|
85
85
|
elif self.dtype == DataType.ARRAY:
|
|
86
86
|
if "element_type" not in self.kwargs:
|
|
87
87
|
raise ArrayFieldParamException(
|
|
88
88
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
89
|
-
message=ExceptionsMessage.
|
|
89
|
+
message=ExceptionsMessage.ArrayFieldMissingElementType,
|
|
90
90
|
)
|
|
91
91
|
if self.kwargs["element_type"] in (
|
|
92
92
|
DataType.ARRAY,
|
|
@@ -95,7 +95,7 @@ class FieldSchema:
|
|
|
95
95
|
):
|
|
96
96
|
raise ArrayFieldParamException(
|
|
97
97
|
code=ErrorCode.INVALID_ARGUMENT,
|
|
98
|
-
message=ExceptionsMessage.
|
|
98
|
+
message=ExceptionsMessage.ArrayFieldInvalidElementType,
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
self.type_params["item_type"] = convert_datatype_to_sqltype(
|
|
@@ -147,9 +147,9 @@ class CollectionSchema:
|
|
|
147
147
|
"""Add field to collection.
|
|
148
148
|
|
|
149
149
|
Args:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
150
|
+
field_name (string): new field name
|
|
151
|
+
datatype (DataType): field data type
|
|
152
|
+
**kwargs: parameters for data type
|
|
153
153
|
"""
|
|
154
154
|
field = FieldSchema(field_name, datatype, **kwargs)
|
|
155
155
|
cur_idx = len(self.fields)
|
|
@@ -101,9 +101,9 @@ class ExceptionsMessage:
|
|
|
101
101
|
)
|
|
102
102
|
PrimaryFieldType = "Param primary_field must be int or str type."
|
|
103
103
|
VectorFieldMissingDimParam = "Param 'dim' must be set for vector field."
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
VarcharFieldMissingLengthParam = "Param 'max_length' must be set for varchar field."
|
|
105
|
+
ArrayFieldMissingElementType = "Param 'element_type' must be set for array field."
|
|
106
|
+
ArrayFieldInvalidElementType = (
|
|
107
107
|
"Param 'element_type' can not be array/vector/varchar."
|
|
108
108
|
)
|
|
109
109
|
CollectionNotExists = "Collection does not exist."
|
|
@@ -111,5 +111,5 @@ class ExceptionsMessage:
|
|
|
111
111
|
MetricTypeValueInvalid = "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
|
|
112
112
|
UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
|
|
113
113
|
ClusterVersionIsLow = (
|
|
114
|
-
"OceanBase
|
|
114
|
+
"OceanBase %s feature is not supported because cluster version is below %s."
|
|
115
115
|
)
|
|
@@ -18,13 +18,12 @@ class FtsIndexParam:
|
|
|
18
18
|
self.field_names = field_names
|
|
19
19
|
self.parser_type = parser_type
|
|
20
20
|
|
|
21
|
-
def param_str(self) -> str:
|
|
22
|
-
if self.parser_type is None:
|
|
23
|
-
return None
|
|
21
|
+
def param_str(self) -> str | None:
|
|
24
22
|
if self.parser_type == FtsParser.IK:
|
|
25
23
|
return "ik"
|
|
26
24
|
if self.parser_type == FtsParser.NGRAM:
|
|
27
25
|
return "ngram"
|
|
26
|
+
return None
|
|
28
27
|
|
|
29
28
|
def __iter__(self):
|
|
30
29
|
yield "index_name", self.index_name
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""OceanBase Hybrid Search Client."""
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import text
|
|
7
|
+
|
|
8
|
+
from .exceptions import ClusterVersionException, ErrorCode, ExceptionsMessage
|
|
9
|
+
from .ob_vec_client import ObVecClient as Client
|
|
10
|
+
from ..util import ObVersion
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
logger.setLevel(logging.DEBUG)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HybridSearch(Client):
|
|
17
|
+
"""The OceanBase Hybrid Search Client"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
uri: str = "127.0.0.1:2881",
|
|
22
|
+
user: str = "root@test",
|
|
23
|
+
password: str = "",
|
|
24
|
+
db_name: str = "test",
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
super().__init__(uri, user, password, db_name, **kwargs)
|
|
28
|
+
|
|
29
|
+
if self.ob_version < ObVersion.from_db_version_nums(4, 4, 1, 0):
|
|
30
|
+
raise ClusterVersionException(
|
|
31
|
+
code=ErrorCode.NOT_SUPPORTED,
|
|
32
|
+
message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def search(
|
|
36
|
+
self,
|
|
37
|
+
index: str,
|
|
38
|
+
body: Dict[str, Any],
|
|
39
|
+
**kwargs,
|
|
40
|
+
):
|
|
41
|
+
"""Execute hybrid search with parameter compatible with Elasticsearch.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
index: The name of the table to search
|
|
45
|
+
body: The search query body
|
|
46
|
+
**kwargs: Additional search parameters
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Search results
|
|
50
|
+
"""
|
|
51
|
+
body_str = json.dumps(body)
|
|
52
|
+
|
|
53
|
+
sql = text("SELECT DBMS_HYBRID_SEARCH.SEARCH(:index, :body_str)")
|
|
54
|
+
|
|
55
|
+
with self.engine.connect() as conn:
|
|
56
|
+
with conn.begin():
|
|
57
|
+
res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
|
|
58
|
+
return json.loads(res[0])
|
|
59
|
+
|
|
60
|
+
def get_sql(
|
|
61
|
+
self,
|
|
62
|
+
index: str,
|
|
63
|
+
body: Dict[str, Any],
|
|
64
|
+
) -> str:
|
|
65
|
+
"""Get the SQL actually to be executed in hybrid search.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
index: The name of the table to search
|
|
69
|
+
body: The hybrid search query body
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
The SQL actually to be executed
|
|
73
|
+
"""
|
|
74
|
+
body_str = json.dumps(body)
|
|
75
|
+
|
|
76
|
+
sql = text("SELECT DBMS_HYBRID_SEARCH.GET_SQL(:index, :body_str)")
|
|
77
|
+
|
|
78
|
+
with self.engine.connect() as conn:
|
|
79
|
+
with conn.begin():
|
|
80
|
+
res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
|
|
81
|
+
return res[0]
|
|
@@ -9,7 +9,7 @@ class VecIndexType(Enum):
|
|
|
9
9
|
IVFFLAT = 2
|
|
10
10
|
IVFSQ = 3
|
|
11
11
|
IVFPQ = 4
|
|
12
|
-
|
|
12
|
+
DAAT = 5
|
|
13
13
|
|
|
14
14
|
class IndexParam:
|
|
15
15
|
"""Vector index parameters.
|
|
@@ -31,6 +31,7 @@ class IndexParam:
|
|
|
31
31
|
IVFFLAT_ALGO_NAME = "ivf_flat"
|
|
32
32
|
IVFSQ_ALGO_NAME = "ivf_sq8"
|
|
33
33
|
IVFPQ_ALGO_NAME = "ivf_pq"
|
|
34
|
+
DAAT_ALGO_NAME = "daat"
|
|
34
35
|
|
|
35
36
|
def __init__(
|
|
36
37
|
self, index_name: str, field_name: str, index_type: Union[VecIndexType, str], **kwargs
|
|
@@ -57,6 +58,11 @@ class IndexParam:
|
|
|
57
58
|
return self.index_type in [
|
|
58
59
|
IndexParam.IVFPQ_ALGO_NAME,
|
|
59
60
|
]
|
|
61
|
+
|
|
62
|
+
def is_index_type_sparse_vector(self):
|
|
63
|
+
return self.index_type in [
|
|
64
|
+
IndexParam.DAAT_ALGO_NAME,
|
|
65
|
+
]
|
|
60
66
|
|
|
61
67
|
def _get_vector_index_type_str(self):
|
|
62
68
|
"""Parse vector index type to string."""
|
|
@@ -71,6 +77,8 @@ class IndexParam:
|
|
|
71
77
|
return IndexParam.IVFSQ_ALGO_NAME
|
|
72
78
|
elif self.index_type == VecIndexType.IVFPQ:
|
|
73
79
|
return IndexParam.IVFPQ_ALGO_NAME
|
|
80
|
+
elif self.index_type == VecIndexType.DAAT:
|
|
81
|
+
return IndexParam.DAAT_ALGO_NAME
|
|
74
82
|
raise ValueError(f"unsupported vector index type: {self.index_type}")
|
|
75
83
|
assert isinstance(self.index_type, str)
|
|
76
84
|
index_type = self.index_type.lower()
|
|
@@ -80,6 +88,7 @@ class IndexParam:
|
|
|
80
88
|
IndexParam.IVFFLAT_ALGO_NAME,
|
|
81
89
|
IndexParam.IVFSQ_ALGO_NAME,
|
|
82
90
|
IndexParam.IVFPQ_ALGO_NAME,
|
|
91
|
+
IndexParam.DAAT_ALGO_NAME,
|
|
83
92
|
]:
|
|
84
93
|
raise ValueError(f"unsupported vector index type: {self.index_type}")
|
|
85
94
|
return index_type
|
|
@@ -124,15 +133,19 @@ class IndexParam:
|
|
|
124
133
|
ob_params['ef_construction'] = params['efConstruction']
|
|
125
134
|
if 'efSearch' in params:
|
|
126
135
|
ob_params['ef_search'] = params['efSearch']
|
|
136
|
+
|
|
137
|
+
if self.is_index_type_sparse_vector() and ob_params['distance'] != 'inner_product':
|
|
138
|
+
raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
|
|
127
139
|
return ob_params
|
|
128
140
|
|
|
129
141
|
def param_str(self):
|
|
130
142
|
"""Parse vector index parameters to string."""
|
|
131
143
|
ob_param = self._parse_kwargs()
|
|
132
144
|
partial_str = ",".join([f"{k}={v}" for k, v in ob_param.items()])
|
|
133
|
-
if
|
|
134
|
-
partial_str
|
|
135
|
-
|
|
145
|
+
if not self.is_index_type_sparse_vector():
|
|
146
|
+
if len(partial_str) > 0:
|
|
147
|
+
partial_str += ","
|
|
148
|
+
partial_str += f"type={self.index_type}"
|
|
136
149
|
return partial_str
|
|
137
150
|
|
|
138
151
|
def __iter__(self):
|
|
@@ -165,10 +178,10 @@ class IndexParams:
|
|
|
165
178
|
"""Add `IndexParam` to `IndexParams`
|
|
166
179
|
|
|
167
180
|
Args:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
181
|
+
field_name (string): vector index built on which field
|
|
182
|
+
index_type (VecIndexType): vector index algorithms (Only HNSW supported)
|
|
183
|
+
index_name (string): vector index name
|
|
184
|
+
**kwargs: additional parameters for different index types
|
|
172
185
|
"""
|
|
173
186
|
index_param = IndexParam(index_name, field_name, index_type, **kwargs)
|
|
174
187
|
pair_key = (field_name, index_name)
|