pyobvector 0.2.17__tar.gz → 0.2.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {pyobvector-0.2.17 → pyobvector-0.2.19}/PKG-INFO +179 -8
  2. {pyobvector-0.2.17 → pyobvector-0.2.19}/README.md +177 -6
  3. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/exceptions.py +1 -1
  4. pyobvector-0.2.19/pyobvector/client/hybrid_search.py +91 -0
  5. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/ob_vec_client.py +1 -1
  6. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyproject.toml +2 -2
  7. {pyobvector-0.2.17 → pyobvector-0.2.19}/LICENSE +0 -0
  8. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/__init__.py +0 -0
  9. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/__init__.py +0 -0
  10. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/collection_schema.py +0 -0
  11. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/enum.py +0 -0
  12. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/fts_index_param.py +0 -0
  13. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/index_param.py +0 -0
  14. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/milvus_like_client.py +0 -0
  15. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/ob_client.py +0 -0
  16. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/ob_vec_json_table_client.py +0 -0
  17. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/partitions.py +0 -0
  18. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/client/schema_type.py +0 -0
  19. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/json_table/__init__.py +0 -0
  20. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/json_table/json_value_returning_func.py +0 -0
  21. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/json_table/oceanbase_dialect.py +0 -0
  22. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/json_table/virtual_data_type.py +0 -0
  23. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/__init__.py +0 -0
  24. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/array.py +0 -0
  25. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/dialect.py +0 -0
  26. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/full_text_index.py +0 -0
  27. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/geo_srid_point.py +0 -0
  28. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/gis_func.py +0 -0
  29. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/match_against_func.py +0 -0
  30. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/ob_table.py +0 -0
  31. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/reflection.py +0 -0
  32. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/replace_stmt.py +0 -0
  33. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/sparse_vector.py +0 -0
  34. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/vec_dist_func.py +0 -0
  35. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/vector.py +0 -0
  36. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/schema/vector_index.py +0 -0
  37. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/util/__init__.py +0 -0
  38. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/util/ob_version.py +0 -0
  39. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/util/sparse_vector.py +0 -0
  40. {pyobvector-0.2.17 → pyobvector-0.2.19}/pyobvector/util/vector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyobvector
3
- Version: 0.2.17
3
+ Version: 0.2.19
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  License-File: LICENSE
6
6
  Author: shanhaikang.shk
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Classifier: Programming Language :: Python :: 3.14
16
16
  Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
17
- Requires-Dist: numpy (>=1.17.0,<2.0.0)
17
+ Requires-Dist: numpy (>=1.17.0)
18
18
  Requires-Dist: pydantic (>=2.7.0,<3)
19
19
  Requires-Dist: pymysql (>=1.1.1,<2.0.0)
20
20
  Requires-Dist: sqlalchemy (>=1.4,<=3)
@@ -38,7 +38,7 @@ poetry install
38
38
  - install with pip:
39
39
 
40
40
  ```shell
41
- pip install pyobvector==0.2.17
41
+ pip install pyobvector==0.2.19
42
42
  ```
43
43
 
44
44
  ## Build Doc
@@ -50,12 +50,17 @@ mkdir build
50
50
  make html
51
51
  ```
52
52
 
53
+ ## Release Notes
54
+
55
+ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
56
+
53
57
  ## Usage
54
58
 
55
- `pyobvector` supports two modes:
59
+ `pyobvector` supports three modes:
56
60
 
57
61
  - `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
58
62
  - `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
63
+ - `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
59
64
 
60
65
  ### Milvus compatible mode
61
66
 
@@ -229,22 +234,21 @@ res = self.client.ann_search(
229
234
  The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
230
235
 
231
236
  - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
237
+
232
238
  - Column objects: `table.c.id`, `table.c.name`
233
239
  - Expressions: `(table.c.age + 10).label('age_plus_10')`
234
240
  - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
235
241
  - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
236
-
237
242
  - **`output_column_names`** (legacy): Accepts list of column name strings
238
- - Example: `['id', 'name', 'meta']`
239
243
 
244
+ - Example: `['id', 'name', 'meta']`
240
245
  - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
241
-
242
246
  - **`distance_threshold`** (optional): Filter results by distance threshold
247
+
243
248
  - Type: `Optional[float]`
244
249
  - Only returns results where `distance <= threshold`
245
250
  - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
246
251
  - Use case: Quality control for similarity search, only return highly similar results
247
-
248
252
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
249
253
 
250
254
  ```python
@@ -283,3 +287,170 @@ engine = create_async_engine(connection_str)
283
287
 
284
288
  - For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
285
289
 
290
+ ### Hybrid Search Mode
291
+
292
+ `pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
293
+
294
+ - setup a client:
295
+
296
+ ```python
297
+ from pyobvector import *
298
+ from pyobvector.client.hybrid_search import HybridSearch
299
+ from sqlalchemy import Column, Integer, VARCHAR
300
+
301
+ client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
302
+ ```
303
+
304
+ **Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
305
+
306
+ - create a table with both vector index and full-text index:
307
+
308
+ ```python
309
+ test_table_name = "hybrid_search_test"
310
+
311
+ # create table with vector and text columns
312
+ client.create_table(
313
+ table_name=test_table_name,
314
+ columns=[
315
+ Column("id", Integer, primary_key=True, autoincrement=False),
316
+ Column("source_id", VARCHAR(32)),
317
+ Column("enabled", Integer),
318
+ Column("vector", VECTOR(3)), # vector column
319
+ Column("title", VARCHAR(255)), # text column for full-text search
320
+ Column("content", VARCHAR(255)), # text column for full-text search
321
+ ],
322
+ indexes=[
323
+ VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
324
+ ],
325
+ mysql_charset='utf8mb4',
326
+ mysql_collate='utf8mb4_unicode_ci',
327
+ )
328
+
329
+ # create full-text indexes for text columns
330
+ from pyobvector import FtsIndexParam, FtsParser
331
+
332
+ for col in ["title", "content"]:
333
+ client.create_fts_idx_with_fts_index_param(
334
+ table_name=test_table_name,
335
+ fts_idx_param=FtsIndexParam(
336
+ index_name=f"fts_idx_{col}",
337
+ field_names=[col],
338
+ parser_type=FtsParser.IK, # or other parser types
339
+ ),
340
+ )
341
+ ```
342
+
343
+ - insert data:
344
+
345
+ ```python
346
+ client.insert(
347
+ table_name=test_table_name,
348
+ data=[
349
+ {
350
+ "id": 1,
351
+ "source_id": "3b767712b57211f09c170242ac130008",
352
+ "enabled": 1,
353
+ "vector": [1, 1, 1],
354
+ "title": "企业版和社区版的功能差异",
355
+ "content": "OceanBase 数据库提供企业版和社区版两种形态。",
356
+ },
357
+ {
358
+ "id": 2,
359
+ "vector": [1, 2, 3],
360
+ "enabled": 1,
361
+ "source_id": "3b791472b57211f09c170242ac130008",
362
+ "title": "快速体验 OceanBase 社区版",
363
+ "content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
364
+ },
365
+ # ... more data
366
+ ]
367
+ )
368
+ ```
369
+
370
+ - perform hybrid search with Elasticsearch-compatible query syntax:
371
+
372
+ ```python
373
+ # build query body (compatible with Elasticsearch syntax)
374
+ query = {
375
+ "bool": {
376
+ "must": [
377
+ {
378
+ "query_string": {
379
+ "fields": ["title^10", "content"], # field weights
380
+ "type": "best_fields",
381
+ "query": "oceanbase 数据 迁移",
382
+ "minimum_should_match": "30%",
383
+ "boost": 1
384
+ }
385
+ }
386
+ ],
387
+ "filter": [
388
+ {
389
+ "terms": {
390
+ "source_id": [
391
+ "3b791472b57211f09c170242ac130008",
392
+ "3b7af31eb57211f09c170242ac130008"
393
+ ]
394
+ }
395
+ },
396
+ {
397
+ "bool": {
398
+ "must_not": [
399
+ {
400
+ "range": {
401
+ "enabled": {"lt": 1}
402
+ }
403
+ }
404
+ ]
405
+ }
406
+ }
407
+ ],
408
+ "boost": 0.7
409
+ }
410
+ }
411
+
412
+ body = {
413
+ "query": query,
414
+ "knn": { # vector similarity search
415
+ "field": "vector",
416
+ "k": 1024,
417
+ "num_candidates": 1024,
418
+ "query_vector": [1, 2, 3],
419
+ "filter": query, # optional: apply same filter to KNN
420
+ "similarity": 0.2 # similarity threshold
421
+ },
422
+ "from": 0, # pagination offset
423
+ "size": 60 # pagination size
424
+ }
425
+
426
+ # execute hybrid search
427
+ results = client.search(index=test_table_name, body=body)
428
+ # results is a list of matching documents
429
+ ```
430
+
431
+ #### Supported Query Types
432
+
433
+ The hybrid search supports Elasticsearch-compatible query syntax:
434
+
435
+ - **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
436
+ - **`query_string`**: Full-text search with field weights, boost, and matching options
437
+ - **`terms`**: Exact match filtering for multiple values
438
+ - **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
439
+ - **`knn`**: Vector similarity search (KNN) with:
440
+ - `field`: Vector field name
441
+ - `query_vector`: Query vector
442
+ - `k`: Number of results to return
443
+ - `num_candidates`: Number of candidates to consider
444
+ - `filter`: Optional filter to apply to KNN search
445
+ - `similarity`: Similarity threshold
446
+ - **Pagination**: `from` and `size` parameters
447
+
448
+ #### Get SQL Query
449
+
450
+ You can also get the actual SQL that will be executed:
451
+
452
+ ```python
453
+ sql = client.get_sql(index=test_table_name, body=body)
454
+ print(sql) # prints the SQL query
455
+ ```
456
+
@@ -15,7 +15,7 @@ poetry install
15
15
  - install with pip:
16
16
 
17
17
  ```shell
18
- pip install pyobvector==0.2.17
18
+ pip install pyobvector==0.2.19
19
19
  ```
20
20
 
21
21
  ## Build Doc
@@ -27,12 +27,17 @@ mkdir build
27
27
  make html
28
28
  ```
29
29
 
30
+ ## Release Notes
31
+
32
+ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.md).
33
+
30
34
  ## Usage
31
35
 
32
- `pyobvector` supports two modes:
36
+ `pyobvector` supports three modes:
33
37
 
34
38
  - `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
35
39
  - `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
40
+ - `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
36
41
 
37
42
  ### Milvus compatible mode
38
43
 
@@ -206,22 +211,21 @@ res = self.client.ann_search(
206
211
  The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
207
212
 
208
213
  - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
214
+
209
215
  - Column objects: `table.c.id`, `table.c.name`
210
216
  - Expressions: `(table.c.age + 10).label('age_plus_10')`
211
217
  - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
212
218
  - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
213
-
214
219
  - **`output_column_names`** (legacy): Accepts list of column name strings
215
- - Example: `['id', 'name', 'meta']`
216
220
 
221
+ - Example: `['id', 'name', 'meta']`
217
222
  - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
218
-
219
223
  - **`distance_threshold`** (optional): Filter results by distance threshold
224
+
220
225
  - Type: `Optional[float]`
221
226
  - Only returns results where `distance <= threshold`
222
227
  - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
223
228
  - Use case: Quality control for similarity search, only return highly similar results
224
-
225
229
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
226
230
 
227
231
  ```python
@@ -259,3 +263,170 @@ engine = create_async_engine(connection_str)
259
263
  ```
260
264
 
261
265
  - For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
266
+
267
+ ### Hybrid Search Mode
268
+
269
+ `pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
270
+
271
+ - setup a client:
272
+
273
+ ```python
274
+ from pyobvector import *
275
+ from pyobvector.client.hybrid_search import HybridSearch
276
+ from sqlalchemy import Column, Integer, VARCHAR
277
+
278
+ client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
279
+ ```
280
+
281
+ **Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
282
+
283
+ - create a table with both vector index and full-text index:
284
+
285
+ ```python
286
+ test_table_name = "hybrid_search_test"
287
+
288
+ # create table with vector and text columns
289
+ client.create_table(
290
+ table_name=test_table_name,
291
+ columns=[
292
+ Column("id", Integer, primary_key=True, autoincrement=False),
293
+ Column("source_id", VARCHAR(32)),
294
+ Column("enabled", Integer),
295
+ Column("vector", VECTOR(3)), # vector column
296
+ Column("title", VARCHAR(255)), # text column for full-text search
297
+ Column("content", VARCHAR(255)), # text column for full-text search
298
+ ],
299
+ indexes=[
300
+ VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
301
+ ],
302
+ mysql_charset='utf8mb4',
303
+ mysql_collate='utf8mb4_unicode_ci',
304
+ )
305
+
306
+ # create full-text indexes for text columns
307
+ from pyobvector import FtsIndexParam, FtsParser
308
+
309
+ for col in ["title", "content"]:
310
+ client.create_fts_idx_with_fts_index_param(
311
+ table_name=test_table_name,
312
+ fts_idx_param=FtsIndexParam(
313
+ index_name=f"fts_idx_{col}",
314
+ field_names=[col],
315
+ parser_type=FtsParser.IK, # or other parser types
316
+ ),
317
+ )
318
+ ```
319
+
320
+ - insert data:
321
+
322
+ ```python
323
+ client.insert(
324
+ table_name=test_table_name,
325
+ data=[
326
+ {
327
+ "id": 1,
328
+ "source_id": "3b767712b57211f09c170242ac130008",
329
+ "enabled": 1,
330
+ "vector": [1, 1, 1],
331
+ "title": "企业版和社区版的功能差异",
332
+ "content": "OceanBase 数据库提供企业版和社区版两种形态。",
333
+ },
334
+ {
335
+ "id": 2,
336
+ "vector": [1, 2, 3],
337
+ "enabled": 1,
338
+ "source_id": "3b791472b57211f09c170242ac130008",
339
+ "title": "快速体验 OceanBase 社区版",
340
+ "content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
341
+ },
342
+ # ... more data
343
+ ]
344
+ )
345
+ ```
346
+
347
+ - perform hybrid search with Elasticsearch-compatible query syntax:
348
+
349
+ ```python
350
+ # build query body (compatible with Elasticsearch syntax)
351
+ query = {
352
+ "bool": {
353
+ "must": [
354
+ {
355
+ "query_string": {
356
+ "fields": ["title^10", "content"], # field weights
357
+ "type": "best_fields",
358
+ "query": "oceanbase 数据 迁移",
359
+ "minimum_should_match": "30%",
360
+ "boost": 1
361
+ }
362
+ }
363
+ ],
364
+ "filter": [
365
+ {
366
+ "terms": {
367
+ "source_id": [
368
+ "3b791472b57211f09c170242ac130008",
369
+ "3b7af31eb57211f09c170242ac130008"
370
+ ]
371
+ }
372
+ },
373
+ {
374
+ "bool": {
375
+ "must_not": [
376
+ {
377
+ "range": {
378
+ "enabled": {"lt": 1}
379
+ }
380
+ }
381
+ ]
382
+ }
383
+ }
384
+ ],
385
+ "boost": 0.7
386
+ }
387
+ }
388
+
389
+ body = {
390
+ "query": query,
391
+ "knn": { # vector similarity search
392
+ "field": "vector",
393
+ "k": 1024,
394
+ "num_candidates": 1024,
395
+ "query_vector": [1, 2, 3],
396
+ "filter": query, # optional: apply same filter to KNN
397
+ "similarity": 0.2 # similarity threshold
398
+ },
399
+ "from": 0, # pagination offset
400
+ "size": 60 # pagination size
401
+ }
402
+
403
+ # execute hybrid search
404
+ results = client.search(index=test_table_name, body=body)
405
+ # results is a list of matching documents
406
+ ```
407
+
408
+ #### Supported Query Types
409
+
410
+ The hybrid search supports Elasticsearch-compatible query syntax:
411
+
412
+ - **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
413
+ - **`query_string`**: Full-text search with field weights, boost, and matching options
414
+ - **`terms`**: Exact match filtering for multiple values
415
+ - **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
416
+ - **`knn`**: Vector similarity search (KNN) with:
417
+ - `field`: Vector field name
418
+ - `query_vector`: Query vector
419
+ - `k`: Number of results to return
420
+ - `num_candidates`: Number of candidates to consider
421
+ - `filter`: Optional filter to apply to KNN search
422
+ - `similarity`: Similarity threshold
423
+ - **Pagination**: `from` and `size` parameters
424
+
425
+ #### Get SQL Query
426
+
427
+ You can also get the actual SQL that will be executed:
428
+
429
+ ```python
430
+ sql = client.get_sql(index=test_table_name, body=body)
431
+ print(sql) # prints the SQL query
432
+ ```
@@ -111,5 +111,5 @@ class ExceptionsMessage:
111
111
  MetricTypeValueInvalid = "MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
112
112
  UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
113
113
  ClusterVersionIsLow = (
114
- "OceanBase Vector Store is not supported because cluster version is below 4.3.3.0."
114
+ "OceanBase %s feature is not supported because cluster version is below %s."
115
115
  )
@@ -0,0 +1,91 @@
1
+ """OceanBase Hybrid Search Client."""
2
+ import json
3
+ import logging
4
+ from typing import Dict, Any
5
+
6
+ from sqlalchemy import text
7
+
8
+ from .exceptions import ClusterVersionException, ErrorCode, ExceptionsMessage
9
+ from .ob_vec_client import ObVecClient as Client
10
+ from ..util import ObVersion
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.DEBUG)
14
+
15
+
16
+ class HybridSearch(Client):
17
+ """The OceanBase Hybrid Search Client"""
18
+
19
+ def __init__(
20
+ self,
21
+ uri: str = "127.0.0.1:2881",
22
+ user: str = "root@test",
23
+ password: str = "",
24
+ db_name: str = "test",
25
+ **kwargs,
26
+ ):
27
+ super().__init__(uri, user, password, db_name, **kwargs)
28
+
29
+ min_required_version = ObVersion.from_db_version_nums(4, 4, 1, 0)
30
+
31
+ if self.ob_version < min_required_version:
32
+ # For versions < 4.4.1.0, check if it's SeekDB
33
+ with self.engine.connect() as conn:
34
+ with conn.begin():
35
+ res = conn.execute(text("SELECT version()"))
36
+ version_str = [r[0] for r in res][0]
37
+ if "SeekDB" in version_str:
38
+ logger.info(f"SeekDB detected in version string: {version_str}, allowing hybrid search")
39
+ return
40
+ raise ClusterVersionException(
41
+ code=ErrorCode.NOT_SUPPORTED,
42
+ message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
43
+ )
44
+
45
+ def search(
46
+ self,
47
+ index: str,
48
+ body: Dict[str, Any],
49
+ **kwargs,
50
+ ):
51
+ """Execute hybrid search with parameter compatible with Elasticsearch.
52
+
53
+ Args:
54
+ index: The name of the table to search
55
+ body: The search query body
56
+ **kwargs: Additional search parameters
57
+
58
+ Returns:
59
+ Search results
60
+ """
61
+ body_str = json.dumps(body)
62
+
63
+ sql = text("SELECT DBMS_HYBRID_SEARCH.SEARCH(:index, :body_str)")
64
+
65
+ with self.engine.connect() as conn:
66
+ with conn.begin():
67
+ res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
68
+ return json.loads(res[0])
69
+
70
+ def get_sql(
71
+ self,
72
+ index: str,
73
+ body: Dict[str, Any],
74
+ ) -> str:
75
+ """Get the SQL actually to be executed in hybrid search.
76
+
77
+ Args:
78
+ index: The name of the table to search
79
+ body: The hybrid search query body
80
+
81
+ Returns:
82
+ The SQL actually to be executed
83
+ """
84
+ body_str = json.dumps(body)
85
+
86
+ sql = text("SELECT DBMS_HYBRID_SEARCH.GET_SQL(:index, :body_str)")
87
+
88
+ with self.engine.connect() as conn:
89
+ with conn.begin():
90
+ res = conn.execute(sql, {"index": index, "body_str": body_str}).fetchone()
91
+ return res[0]
@@ -44,7 +44,7 @@ class ObVecClient(ObClient):
44
44
  if self.ob_version < ObVersion.from_db_version_nums(4, 3, 3, 0):
45
45
  raise ClusterVersionException(
46
46
  code=ErrorCode.NOT_SUPPORTED,
47
- message=ExceptionsMessage.ClusterVersionIsLow,
47
+ message=ExceptionsMessage.ClusterVersionIsLow % ("Vector Store", "4.3.3.0"),
48
48
  )
49
49
 
50
50
  def _get_sparse_vector_index_params(
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "pyobvector"
3
- version = "0.2.17"
3
+ version = "0.2.19"
4
4
  description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
5
5
  authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
6
6
  readme = "README.md"
7
7
 
8
8
  [tool.poetry.dependencies]
9
9
  python = ">=3.9,<4.0"
10
- numpy = ">=1.17.0,<2.0.0"
10
+ numpy = ">=1.17.0"
11
11
  sqlalchemy = ">=1.4,<=3"
12
12
  pymysql = "^1.1.1"
13
13
  aiomysql = "^0.3.2"
File without changes