pyobvector 0.2.18__tar.gz → 0.2.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.18 → pyobvector-0.2.20}/PKG-INFO +176 -8
- {pyobvector-0.2.18 → pyobvector-0.2.20}/README.md +174 -6
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/hybrid_search.py +7 -1
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/index_param.py +5 -2
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_client.py +20 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_vec_client.py +5 -1
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_vec_json_table_client.py +3 -2
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyproject.toml +2 -2
- {pyobvector-0.2.18 → pyobvector-0.2.20}/LICENSE +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/__init__.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/__init__.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/collection_schema.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/enum.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/exceptions.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/fts_index_param.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/milvus_like_client.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/partitions.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/schema_type.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/__init__.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/json_value_returning_func.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/oceanbase_dialect.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/virtual_data_type.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/__init__.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/array.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/dialect.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/full_text_index.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/geo_srid_point.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/gis_func.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/match_against_func.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/ob_table.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/reflection.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/replace_stmt.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/sparse_vector.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vec_dist_func.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vector.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vector_index.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/__init__.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/ob_version.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/sparse_vector.py +0 -0
- {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/vector.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobvector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.20
|
|
4
4
|
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Author: shanhaikang.shk
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.14
|
|
16
16
|
Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
|
|
17
|
-
Requires-Dist: numpy (>=1.17.0
|
|
17
|
+
Requires-Dist: numpy (>=1.17.0)
|
|
18
18
|
Requires-Dist: pydantic (>=2.7.0,<3)
|
|
19
19
|
Requires-Dist: pymysql (>=1.1.1,<2.0.0)
|
|
20
20
|
Requires-Dist: sqlalchemy (>=1.4,<=3)
|
|
@@ -38,7 +38,7 @@ poetry install
|
|
|
38
38
|
- install with pip:
|
|
39
39
|
|
|
40
40
|
```shell
|
|
41
|
-
pip install pyobvector==0.2.
|
|
41
|
+
pip install pyobvector==0.2.20
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
## Build Doc
|
|
@@ -56,10 +56,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
|
|
|
56
56
|
|
|
57
57
|
## Usage
|
|
58
58
|
|
|
59
|
-
`pyobvector` supports
|
|
59
|
+
`pyobvector` supports three modes:
|
|
60
60
|
|
|
61
61
|
- `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
|
|
62
62
|
- `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
63
|
+
- `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
|
|
63
64
|
|
|
64
65
|
### Milvus compatible mode
|
|
65
66
|
|
|
@@ -233,22 +234,21 @@ res = self.client.ann_search(
|
|
|
233
234
|
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
234
235
|
|
|
235
236
|
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
237
|
+
|
|
236
238
|
- Column objects: `table.c.id`, `table.c.name`
|
|
237
239
|
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
238
240
|
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
239
241
|
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
240
|
-
|
|
241
242
|
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
243
|
+
|
|
242
244
|
- Example: `['id', 'name', 'meta']`
|
|
243
|
-
|
|
244
245
|
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
245
|
-
|
|
246
246
|
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
247
|
+
|
|
247
248
|
- Type: `Optional[float]`
|
|
248
249
|
- Only returns results where `distance <= threshold`
|
|
249
250
|
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
250
251
|
- Use case: Quality control for similarity search, only return highly similar results
|
|
251
|
-
|
|
252
252
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
253
253
|
|
|
254
254
|
```python
|
|
@@ -287,3 +287,171 @@ engine = create_async_engine(connection_str)
|
|
|
287
287
|
|
|
288
288
|
- For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
|
|
289
289
|
|
|
290
|
+
### Hybrid Search Mode
|
|
291
|
+
|
|
292
|
+
`pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
|
|
293
|
+
|
|
294
|
+
- setup a client:
|
|
295
|
+
|
|
296
|
+
```python
|
|
297
|
+
from pyobvector import *
|
|
298
|
+
from pyobvector.client.hybrid_search import HybridSearch
|
|
299
|
+
from sqlalchemy import Column, Integer, VARCHAR
|
|
300
|
+
|
|
301
|
+
client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
**Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
|
|
305
|
+
|
|
306
|
+
- create a table with both vector index and full-text index:
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
test_table_name = "hybrid_search_test"
|
|
310
|
+
|
|
311
|
+
# create table with vector and text columns
|
|
312
|
+
client.create_table(
|
|
313
|
+
table_name=test_table_name,
|
|
314
|
+
columns=[
|
|
315
|
+
Column("id", Integer, primary_key=True, autoincrement=False),
|
|
316
|
+
Column("source_id", VARCHAR(32)),
|
|
317
|
+
Column("enabled", Integer),
|
|
318
|
+
Column("vector", VECTOR(3)), # vector column
|
|
319
|
+
Column("title", VARCHAR(255)), # text column for full-text search
|
|
320
|
+
Column("content", VARCHAR(255)), # text column for full-text search
|
|
321
|
+
],
|
|
322
|
+
indexes=[
|
|
323
|
+
VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
|
|
324
|
+
],
|
|
325
|
+
mysql_charset='utf8mb4',
|
|
326
|
+
mysql_collate='utf8mb4_unicode_ci',
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# create full-text indexes for text columns
|
|
330
|
+
from pyobvector import FtsIndexParam, FtsParser
|
|
331
|
+
|
|
332
|
+
for col in ["title", "content"]:
|
|
333
|
+
client.create_fts_idx_with_fts_index_param(
|
|
334
|
+
table_name=test_table_name,
|
|
335
|
+
fts_idx_param=FtsIndexParam(
|
|
336
|
+
index_name=f"fts_idx_{col}",
|
|
337
|
+
field_names=[col],
|
|
338
|
+
parser_type=FtsParser.IK, # or other parser types
|
|
339
|
+
),
|
|
340
|
+
)
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
- insert data:
|
|
344
|
+
|
|
345
|
+
```python
|
|
346
|
+
client.insert(
|
|
347
|
+
table_name=test_table_name,
|
|
348
|
+
data=[
|
|
349
|
+
{
|
|
350
|
+
"id": 1,
|
|
351
|
+
"source_id": "3b767712b57211f09c170242ac130008",
|
|
352
|
+
"enabled": 1,
|
|
353
|
+
"vector": [1, 1, 1],
|
|
354
|
+
"title": "企业版和社区版的功能差异",
|
|
355
|
+
"content": "OceanBase 数据库提供企业版和社区版两种形态。",
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
"id": 2,
|
|
359
|
+
"vector": [1, 2, 3],
|
|
360
|
+
"enabled": 1,
|
|
361
|
+
"source_id": "3b791472b57211f09c170242ac130008",
|
|
362
|
+
"title": "快速体验 OceanBase 社区版",
|
|
363
|
+
"content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
|
|
364
|
+
},
|
|
365
|
+
# ... more data
|
|
366
|
+
]
|
|
367
|
+
)
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
- perform hybrid search with Elasticsearch-compatible query syntax:
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
# build query body (compatible with Elasticsearch syntax)
|
|
374
|
+
query = {
|
|
375
|
+
"bool": {
|
|
376
|
+
"must": [
|
|
377
|
+
{
|
|
378
|
+
"query_string": {
|
|
379
|
+
"fields": ["title^10", "content"], # field weights
|
|
380
|
+
"type": "best_fields",
|
|
381
|
+
"query": "oceanbase 数据 迁移",
|
|
382
|
+
"minimum_should_match": "30%",
|
|
383
|
+
"boost": 1
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
],
|
|
387
|
+
"filter": [
|
|
388
|
+
{
|
|
389
|
+
"terms": {
|
|
390
|
+
"source_id": [
|
|
391
|
+
"3b791472b57211f09c170242ac130008",
|
|
392
|
+
"3b7af31eb57211f09c170242ac130008"
|
|
393
|
+
]
|
|
394
|
+
}
|
|
395
|
+
},
|
|
396
|
+
{
|
|
397
|
+
"bool": {
|
|
398
|
+
"must_not": [
|
|
399
|
+
{
|
|
400
|
+
"range": {
|
|
401
|
+
"enabled": {"lt": 1}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
]
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
],
|
|
408
|
+
"boost": 0.7
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
body = {
|
|
413
|
+
"query": query,
|
|
414
|
+
"knn": { # vector similarity search
|
|
415
|
+
"field": "vector",
|
|
416
|
+
"k": 1024,
|
|
417
|
+
"num_candidates": 1024,
|
|
418
|
+
"query_vector": [1, 2, 3],
|
|
419
|
+
"filter": query, # optional: apply same filter to KNN
|
|
420
|
+
"similarity": 0.2 # similarity threshold
|
|
421
|
+
},
|
|
422
|
+
"from": 0, # pagination offset
|
|
423
|
+
"size": 60 # pagination size
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
# execute hybrid search
|
|
427
|
+
results = client.search(index=test_table_name, body=body)
|
|
428
|
+
# results is a list of matching documents
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
#### Supported Query Types
|
|
432
|
+
|
|
433
|
+
The hybrid search supports Elasticsearch-compatible query syntax:
|
|
434
|
+
|
|
435
|
+
- **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
|
|
436
|
+
- **`query_string`**: Full-text search with field weights, boost, and matching options
|
|
437
|
+
- **`terms`**: Exact match filtering for multiple values
|
|
438
|
+
- **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
|
|
439
|
+
- **`knn`**: Vector similarity search (KNN) with:
|
|
440
|
+
- `field`: Vector field name
|
|
441
|
+
- `query_vector`: Query vector
|
|
442
|
+
- `k`: Number of results to return
|
|
443
|
+
- `num_candidates`: Number of candidates to consider
|
|
444
|
+
- `filter`: Optional filter to apply to KNN search
|
|
445
|
+
- `similarity`: Similarity threshold
|
|
446
|
+
- **Pagination**: `from` and `size` parameters
|
|
447
|
+
|
|
448
|
+
#### Get SQL Query
|
|
449
|
+
|
|
450
|
+
You can also get the actual SQL that will be executed:
|
|
451
|
+
|
|
452
|
+
```python
|
|
453
|
+
sql = client.get_sql(index=test_table_name, body=body)
|
|
454
|
+
print(sql) # prints the SQL query
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
|
|
@@ -15,7 +15,7 @@ poetry install
|
|
|
15
15
|
- install with pip:
|
|
16
16
|
|
|
17
17
|
```shell
|
|
18
|
-
pip install pyobvector==0.2.
|
|
18
|
+
pip install pyobvector==0.2.20
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
## Build Doc
|
|
@@ -33,10 +33,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
|
|
|
33
33
|
|
|
34
34
|
## Usage
|
|
35
35
|
|
|
36
|
-
`pyobvector` supports
|
|
36
|
+
`pyobvector` supports three modes:
|
|
37
37
|
|
|
38
38
|
- `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
|
|
39
39
|
- `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
40
|
+
- `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
|
|
40
41
|
|
|
41
42
|
### Milvus compatible mode
|
|
42
43
|
|
|
@@ -210,22 +211,21 @@ res = self.client.ann_search(
|
|
|
210
211
|
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
211
212
|
|
|
212
213
|
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
214
|
+
|
|
213
215
|
- Column objects: `table.c.id`, `table.c.name`
|
|
214
216
|
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
215
217
|
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
216
218
|
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
217
|
-
|
|
218
219
|
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
220
|
+
|
|
219
221
|
- Example: `['id', 'name', 'meta']`
|
|
220
|
-
|
|
221
222
|
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
222
|
-
|
|
223
223
|
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
224
|
+
|
|
224
225
|
- Type: `Optional[float]`
|
|
225
226
|
- Only returns results where `distance <= threshold`
|
|
226
227
|
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
227
228
|
- Use case: Quality control for similarity search, only return highly similar results
|
|
228
|
-
|
|
229
229
|
- If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
|
|
230
230
|
|
|
231
231
|
```python
|
|
@@ -263,3 +263,171 @@ engine = create_async_engine(connection_str)
|
|
|
263
263
|
```
|
|
264
264
|
|
|
265
265
|
- For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
|
|
266
|
+
|
|
267
|
+
### Hybrid Search Mode
|
|
268
|
+
|
|
269
|
+
`pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
|
|
270
|
+
|
|
271
|
+
- setup a client:
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
from pyobvector import *
|
|
275
|
+
from pyobvector.client.hybrid_search import HybridSearch
|
|
276
|
+
from sqlalchemy import Column, Integer, VARCHAR
|
|
277
|
+
|
|
278
|
+
client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
|
|
282
|
+
|
|
283
|
+
- create a table with both vector index and full-text index:
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
test_table_name = "hybrid_search_test"
|
|
287
|
+
|
|
288
|
+
# create table with vector and text columns
|
|
289
|
+
client.create_table(
|
|
290
|
+
table_name=test_table_name,
|
|
291
|
+
columns=[
|
|
292
|
+
Column("id", Integer, primary_key=True, autoincrement=False),
|
|
293
|
+
Column("source_id", VARCHAR(32)),
|
|
294
|
+
Column("enabled", Integer),
|
|
295
|
+
Column("vector", VECTOR(3)), # vector column
|
|
296
|
+
Column("title", VARCHAR(255)), # text column for full-text search
|
|
297
|
+
Column("content", VARCHAR(255)), # text column for full-text search
|
|
298
|
+
],
|
|
299
|
+
indexes=[
|
|
300
|
+
VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
|
|
301
|
+
],
|
|
302
|
+
mysql_charset='utf8mb4',
|
|
303
|
+
mysql_collate='utf8mb4_unicode_ci',
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# create full-text indexes for text columns
|
|
307
|
+
from pyobvector import FtsIndexParam, FtsParser
|
|
308
|
+
|
|
309
|
+
for col in ["title", "content"]:
|
|
310
|
+
client.create_fts_idx_with_fts_index_param(
|
|
311
|
+
table_name=test_table_name,
|
|
312
|
+
fts_idx_param=FtsIndexParam(
|
|
313
|
+
index_name=f"fts_idx_{col}",
|
|
314
|
+
field_names=[col],
|
|
315
|
+
parser_type=FtsParser.IK, # or other parser types
|
|
316
|
+
),
|
|
317
|
+
)
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
- insert data:
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
client.insert(
|
|
324
|
+
table_name=test_table_name,
|
|
325
|
+
data=[
|
|
326
|
+
{
|
|
327
|
+
"id": 1,
|
|
328
|
+
"source_id": "3b767712b57211f09c170242ac130008",
|
|
329
|
+
"enabled": 1,
|
|
330
|
+
"vector": [1, 1, 1],
|
|
331
|
+
"title": "企业版和社区版的功能差异",
|
|
332
|
+
"content": "OceanBase 数据库提供企业版和社区版两种形态。",
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
"id": 2,
|
|
336
|
+
"vector": [1, 2, 3],
|
|
337
|
+
"enabled": 1,
|
|
338
|
+
"source_id": "3b791472b57211f09c170242ac130008",
|
|
339
|
+
"title": "快速体验 OceanBase 社区版",
|
|
340
|
+
"content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
|
|
341
|
+
},
|
|
342
|
+
# ... more data
|
|
343
|
+
]
|
|
344
|
+
)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
- perform hybrid search with Elasticsearch-compatible query syntax:
|
|
348
|
+
|
|
349
|
+
```python
|
|
350
|
+
# build query body (compatible with Elasticsearch syntax)
|
|
351
|
+
query = {
|
|
352
|
+
"bool": {
|
|
353
|
+
"must": [
|
|
354
|
+
{
|
|
355
|
+
"query_string": {
|
|
356
|
+
"fields": ["title^10", "content"], # field weights
|
|
357
|
+
"type": "best_fields",
|
|
358
|
+
"query": "oceanbase 数据 迁移",
|
|
359
|
+
"minimum_should_match": "30%",
|
|
360
|
+
"boost": 1
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
],
|
|
364
|
+
"filter": [
|
|
365
|
+
{
|
|
366
|
+
"terms": {
|
|
367
|
+
"source_id": [
|
|
368
|
+
"3b791472b57211f09c170242ac130008",
|
|
369
|
+
"3b7af31eb57211f09c170242ac130008"
|
|
370
|
+
]
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
"bool": {
|
|
375
|
+
"must_not": [
|
|
376
|
+
{
|
|
377
|
+
"range": {
|
|
378
|
+
"enabled": {"lt": 1}
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
]
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
],
|
|
385
|
+
"boost": 0.7
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
body = {
|
|
390
|
+
"query": query,
|
|
391
|
+
"knn": { # vector similarity search
|
|
392
|
+
"field": "vector",
|
|
393
|
+
"k": 1024,
|
|
394
|
+
"num_candidates": 1024,
|
|
395
|
+
"query_vector": [1, 2, 3],
|
|
396
|
+
"filter": query, # optional: apply same filter to KNN
|
|
397
|
+
"similarity": 0.2 # similarity threshold
|
|
398
|
+
},
|
|
399
|
+
"from": 0, # pagination offset
|
|
400
|
+
"size": 60 # pagination size
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
# execute hybrid search
|
|
404
|
+
results = client.search(index=test_table_name, body=body)
|
|
405
|
+
# results is a list of matching documents
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
#### Supported Query Types
|
|
409
|
+
|
|
410
|
+
The hybrid search supports Elasticsearch-compatible query syntax:
|
|
411
|
+
|
|
412
|
+
- **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
|
|
413
|
+
- **`query_string`**: Full-text search with field weights, boost, and matching options
|
|
414
|
+
- **`terms`**: Exact match filtering for multiple values
|
|
415
|
+
- **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
|
|
416
|
+
- **`knn`**: Vector similarity search (KNN) with:
|
|
417
|
+
- `field`: Vector field name
|
|
418
|
+
- `query_vector`: Query vector
|
|
419
|
+
- `k`: Number of results to return
|
|
420
|
+
- `num_candidates`: Number of candidates to consider
|
|
421
|
+
- `filter`: Optional filter to apply to KNN search
|
|
422
|
+
- `similarity`: Similarity threshold
|
|
423
|
+
- **Pagination**: `from` and `size` parameters
|
|
424
|
+
|
|
425
|
+
#### Get SQL Query
|
|
426
|
+
|
|
427
|
+
You can also get the actual SQL that will be executed:
|
|
428
|
+
|
|
429
|
+
```python
|
|
430
|
+
sql = client.get_sql(index=test_table_name, body=body)
|
|
431
|
+
print(sql) # prints the SQL query
|
|
432
|
+
```
|
|
433
|
+
|
|
@@ -26,7 +26,13 @@ class HybridSearch(Client):
|
|
|
26
26
|
):
|
|
27
27
|
super().__init__(uri, user, password, db_name, **kwargs)
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
min_required_version = ObVersion.from_db_version_nums(4, 4, 1, 0)
|
|
30
|
+
|
|
31
|
+
if self.ob_version < min_required_version:
|
|
32
|
+
# For versions < 4.4.1.0, check if it's SeekDB
|
|
33
|
+
if self._is_seekdb():
|
|
34
|
+
logger.info("SeekDB detected, allowing hybrid search")
|
|
35
|
+
return
|
|
30
36
|
raise ClusterVersionException(
|
|
31
37
|
code=ErrorCode.NOT_SUPPORTED,
|
|
32
38
|
message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
|
|
@@ -134,8 +134,11 @@ class IndexParam:
|
|
|
134
134
|
if 'efSearch' in params:
|
|
135
135
|
ob_params['ef_search'] = params['efSearch']
|
|
136
136
|
|
|
137
|
-
if self.is_index_type_sparse_vector()
|
|
138
|
-
|
|
137
|
+
if self.is_index_type_sparse_vector():
|
|
138
|
+
if ob_params['distance'] != 'inner_product':
|
|
139
|
+
raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
|
|
140
|
+
if 'sparse_index_type' in self.kwargs:
|
|
141
|
+
ob_params['type'] = self.kwargs['sparse_index_type']
|
|
139
142
|
return ob_params
|
|
140
143
|
|
|
141
144
|
def param_str(self):
|
|
@@ -93,6 +93,26 @@ class ObClient:
|
|
|
93
93
|
self.metadata_obj.clear()
|
|
94
94
|
self.metadata_obj.reflect(bind=self.engine, extend_existing=True)
|
|
95
95
|
|
|
96
|
+
def _is_seekdb(self) -> bool:
|
|
97
|
+
"""Check if the database is SeekDB by querying version.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
bool: True if database is SeekDB, False otherwise
|
|
101
|
+
"""
|
|
102
|
+
is_seekdb = False
|
|
103
|
+
try:
|
|
104
|
+
if hasattr(self, '_is_seekdb_cached'):
|
|
105
|
+
return self._is_seekdb_cached
|
|
106
|
+
with self.engine.connect() as conn:
|
|
107
|
+
result = conn.execute(text("SELECT VERSION()"))
|
|
108
|
+
version_str = [r[0] for r in result][0]
|
|
109
|
+
is_seekdb = "SeekDB" in version_str
|
|
110
|
+
self._is_seekdb_cached = is_seekdb
|
|
111
|
+
logger.debug(f"Version query result: {version_str}, is_seekdb: {is_seekdb}")
|
|
112
|
+
except Exception as e:
|
|
113
|
+
logger.warning(f"Failed to query version: {e}")
|
|
114
|
+
return is_seekdb
|
|
115
|
+
|
|
96
116
|
def _insert_partition_hint_for_query_sql(self, sql: str, partition_hint: str):
|
|
97
117
|
from_index = sql.find("FROM")
|
|
98
118
|
assert from_index != -1
|
|
@@ -99,7 +99,11 @@ class ObVecClient(ObClient):
|
|
|
99
99
|
create_table_sql = str(CreateTable(table).compile(self.engine))
|
|
100
100
|
new_sql = create_table_sql[:create_table_sql.rfind(')')]
|
|
101
101
|
for sparse_vidx in sparse_vidxs:
|
|
102
|
-
|
|
102
|
+
sparse_params = sparse_vidx._parse_kwargs()
|
|
103
|
+
if 'type' in sparse_params:
|
|
104
|
+
new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (type={sparse_params['type']}, distance=inner_product)"
|
|
105
|
+
else:
|
|
106
|
+
new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (distance=inner_product)"
|
|
103
107
|
new_sql += "\n)"
|
|
104
108
|
conn.execute(text(new_sql))
|
|
105
109
|
else:
|
|
@@ -817,11 +817,12 @@ class ObVecJsonTableClient(ObVecClient):
|
|
|
817
817
|
):
|
|
818
818
|
real_user_id = opt_user_id or self.user_id
|
|
819
819
|
|
|
820
|
-
|
|
820
|
+
from_key = 'from_' if 'from_' in ast.args else 'from'
|
|
821
|
+
table_name = ast.args[from_key].this.this.this
|
|
821
822
|
if not self._check_table_exists(table_name):
|
|
822
823
|
raise ValueError(f"Table {table_name} does not exists")
|
|
823
824
|
|
|
824
|
-
ast.args[
|
|
825
|
+
ast.args[from_key].args['this'].args['this'] = to_identifier(name=JSON_TABLE_DATA_TABLE_NAME, quoted=False)
|
|
825
826
|
|
|
826
827
|
col_meta = self.jmetadata.meta_cache[table_name]
|
|
827
828
|
json_table_meta_str = []
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pyobvector"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.20"
|
|
4
4
|
description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
|
|
5
5
|
authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
|
|
8
8
|
[tool.poetry.dependencies]
|
|
9
9
|
python = ">=3.9,<4.0"
|
|
10
|
-
numpy = ">=1.17.0
|
|
10
|
+
numpy = ">=1.17.0"
|
|
11
11
|
sqlalchemy = ">=1.4,<=3"
|
|
12
12
|
pymysql = "^1.1.1"
|
|
13
13
|
aiomysql = "^0.3.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|