pyobvector 0.2.18__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {pyobvector-0.2.18 → pyobvector-0.2.20}/PKG-INFO +176 -8
  2. {pyobvector-0.2.18 → pyobvector-0.2.20}/README.md +174 -6
  3. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/hybrid_search.py +7 -1
  4. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/index_param.py +5 -2
  5. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_client.py +20 -0
  6. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_vec_client.py +5 -1
  7. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/ob_vec_json_table_client.py +3 -2
  8. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyproject.toml +2 -2
  9. {pyobvector-0.2.18 → pyobvector-0.2.20}/LICENSE +0 -0
  10. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/__init__.py +0 -0
  11. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/__init__.py +0 -0
  12. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/collection_schema.py +0 -0
  13. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/enum.py +0 -0
  14. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/exceptions.py +0 -0
  15. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/fts_index_param.py +0 -0
  16. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/milvus_like_client.py +0 -0
  17. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/partitions.py +0 -0
  18. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/client/schema_type.py +0 -0
  19. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/__init__.py +0 -0
  20. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/json_value_returning_func.py +0 -0
  21. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/oceanbase_dialect.py +0 -0
  22. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/json_table/virtual_data_type.py +0 -0
  23. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/__init__.py +0 -0
  24. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/array.py +0 -0
  25. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/dialect.py +0 -0
  26. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/full_text_index.py +0 -0
  27. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/geo_srid_point.py +0 -0
  28. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/gis_func.py +0 -0
  29. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/match_against_func.py +0 -0
  30. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/ob_table.py +0 -0
  31. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/reflection.py +0 -0
  32. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/replace_stmt.py +0 -0
  33. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/sparse_vector.py +0 -0
  34. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vec_dist_func.py +0 -0
  35. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vector.py +0 -0
  36. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/schema/vector_index.py +0 -0
  37. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/__init__.py +0 -0
  38. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/ob_version.py +0 -0
  39. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/sparse_vector.py +0 -0
  40. {pyobvector-0.2.18 → pyobvector-0.2.20}/pyobvector/util/vector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyobvector
3
- Version: 0.2.18
3
+ Version: 0.2.20
4
4
  Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
5
5
  License-File: LICENSE
6
6
  Author: shanhaikang.shk
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Classifier: Programming Language :: Python :: 3.14
16
16
  Requires-Dist: aiomysql (>=0.3.2,<0.4.0)
17
- Requires-Dist: numpy (>=1.17.0,<2.0.0)
17
+ Requires-Dist: numpy (>=1.17.0)
18
18
  Requires-Dist: pydantic (>=2.7.0,<3)
19
19
  Requires-Dist: pymysql (>=1.1.1,<2.0.0)
20
20
  Requires-Dist: sqlalchemy (>=1.4,<=3)
@@ -38,7 +38,7 @@ poetry install
38
38
  - install with pip:
39
39
 
40
40
  ```shell
41
- pip install pyobvector==0.2.18
41
+ pip install pyobvector==0.2.20
42
42
  ```
43
43
 
44
44
  ## Build Doc
@@ -56,10 +56,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
56
56
 
57
57
  ## Usage
58
58
 
59
- `pyobvector` supports two modes:
59
+ `pyobvector` supports three modes:
60
60
 
61
61
  - `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
62
62
  - `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
63
+ - `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
63
64
 
64
65
  ### Milvus compatible mode
65
66
 
@@ -233,22 +234,21 @@ res = self.client.ann_search(
233
234
  The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
234
235
 
235
236
  - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
237
+
236
238
  - Column objects: `table.c.id`, `table.c.name`
237
239
  - Expressions: `(table.c.age + 10).label('age_plus_10')`
238
240
  - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
239
241
  - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
240
-
241
242
  - **`output_column_names`** (legacy): Accepts list of column name strings
243
+
242
244
  - Example: `['id', 'name', 'meta']`
243
-
244
245
  - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
245
-
246
246
  - **`distance_threshold`** (optional): Filter results by distance threshold
247
+
247
248
  - Type: `Optional[float]`
248
249
  - Only returns results where `distance <= threshold`
249
250
  - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
250
251
  - Use case: Quality control for similarity search, only return highly similar results
251
-
252
252
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
253
253
 
254
254
  ```python
@@ -287,3 +287,171 @@ engine = create_async_engine(connection_str)
287
287
 
288
288
  - For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
289
289
 
290
+ ### Hybrid Search Mode
291
+
292
+ `pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
293
+
294
+ - setup a client:
295
+
296
+ ```python
297
+ from pyobvector import *
298
+ from pyobvector.client.hybrid_search import HybridSearch
299
+ from sqlalchemy import Column, Integer, VARCHAR
300
+
301
+ client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
302
+ ```
303
+
304
+ **Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
305
+
306
+ - create a table with both vector index and full-text index:
307
+
308
+ ```python
309
+ test_table_name = "hybrid_search_test"
310
+
311
+ # create table with vector and text columns
312
+ client.create_table(
313
+ table_name=test_table_name,
314
+ columns=[
315
+ Column("id", Integer, primary_key=True, autoincrement=False),
316
+ Column("source_id", VARCHAR(32)),
317
+ Column("enabled", Integer),
318
+ Column("vector", VECTOR(3)), # vector column
319
+ Column("title", VARCHAR(255)), # text column for full-text search
320
+ Column("content", VARCHAR(255)), # text column for full-text search
321
+ ],
322
+ indexes=[
323
+ VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
324
+ ],
325
+ mysql_charset='utf8mb4',
326
+ mysql_collate='utf8mb4_unicode_ci',
327
+ )
328
+
329
+ # create full-text indexes for text columns
330
+ from pyobvector import FtsIndexParam, FtsParser
331
+
332
+ for col in ["title", "content"]:
333
+ client.create_fts_idx_with_fts_index_param(
334
+ table_name=test_table_name,
335
+ fts_idx_param=FtsIndexParam(
336
+ index_name=f"fts_idx_{col}",
337
+ field_names=[col],
338
+ parser_type=FtsParser.IK, # or other parser types
339
+ ),
340
+ )
341
+ ```
342
+
343
+ - insert data:
344
+
345
+ ```python
346
+ client.insert(
347
+ table_name=test_table_name,
348
+ data=[
349
+ {
350
+ "id": 1,
351
+ "source_id": "3b767712b57211f09c170242ac130008",
352
+ "enabled": 1,
353
+ "vector": [1, 1, 1],
354
+ "title": "企业版和社区版的功能差异",
355
+ "content": "OceanBase 数据库提供企业版和社区版两种形态。",
356
+ },
357
+ {
358
+ "id": 2,
359
+ "vector": [1, 2, 3],
360
+ "enabled": 1,
361
+ "source_id": "3b791472b57211f09c170242ac130008",
362
+ "title": "快速体验 OceanBase 社区版",
363
+ "content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
364
+ },
365
+ # ... more data
366
+ ]
367
+ )
368
+ ```
369
+
370
+ - perform hybrid search with Elasticsearch-compatible query syntax:
371
+
372
+ ```python
373
+ # build query body (compatible with Elasticsearch syntax)
374
+ query = {
375
+ "bool": {
376
+ "must": [
377
+ {
378
+ "query_string": {
379
+ "fields": ["title^10", "content"], # field weights
380
+ "type": "best_fields",
381
+ "query": "oceanbase 数据 迁移",
382
+ "minimum_should_match": "30%",
383
+ "boost": 1
384
+ }
385
+ }
386
+ ],
387
+ "filter": [
388
+ {
389
+ "terms": {
390
+ "source_id": [
391
+ "3b791472b57211f09c170242ac130008",
392
+ "3b7af31eb57211f09c170242ac130008"
393
+ ]
394
+ }
395
+ },
396
+ {
397
+ "bool": {
398
+ "must_not": [
399
+ {
400
+ "range": {
401
+ "enabled": {"lt": 1}
402
+ }
403
+ }
404
+ ]
405
+ }
406
+ }
407
+ ],
408
+ "boost": 0.7
409
+ }
410
+ }
411
+
412
+ body = {
413
+ "query": query,
414
+ "knn": { # vector similarity search
415
+ "field": "vector",
416
+ "k": 1024,
417
+ "num_candidates": 1024,
418
+ "query_vector": [1, 2, 3],
419
+ "filter": query, # optional: apply same filter to KNN
420
+ "similarity": 0.2 # similarity threshold
421
+ },
422
+ "from": 0, # pagination offset
423
+ "size": 60 # pagination size
424
+ }
425
+
426
+ # execute hybrid search
427
+ results = client.search(index=test_table_name, body=body)
428
+ # results is a list of matching documents
429
+ ```
430
+
431
+ #### Supported Query Types
432
+
433
+ The hybrid search supports Elasticsearch-compatible query syntax:
434
+
435
+ - **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
436
+ - **`query_string`**: Full-text search with field weights, boost, and matching options
437
+ - **`terms`**: Exact match filtering for multiple values
438
+ - **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
439
+ - **`knn`**: Vector similarity search (KNN) with:
440
+ - `field`: Vector field name
441
+ - `query_vector`: Query vector
442
+ - `k`: Number of results to return
443
+ - `num_candidates`: Number of candidates to consider
444
+ - `filter`: Optional filter to apply to KNN search
445
+ - `similarity`: Similarity threshold
446
+ - **Pagination**: `from` and `size` parameters
447
+
448
+ #### Get SQL Query
449
+
450
+ You can also get the actual SQL that will be executed:
451
+
452
+ ```python
453
+ sql = client.get_sql(index=test_table_name, body=body)
454
+ print(sql) # prints the SQL query
455
+ ```
456
+
457
+
@@ -15,7 +15,7 @@ poetry install
15
15
  - install with pip:
16
16
 
17
17
  ```shell
18
- pip install pyobvector==0.2.18
18
+ pip install pyobvector==0.2.20
19
19
  ```
20
20
 
21
21
  ## Build Doc
@@ -33,10 +33,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
33
33
 
34
34
  ## Usage
35
35
 
36
- `pyobvector` supports two modes:
36
+ `pyobvector` supports three modes:
37
37
 
38
38
  - `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
39
39
  - `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
40
+ - `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
40
41
 
41
42
  ### Milvus compatible mode
42
43
 
@@ -210,22 +211,21 @@ res = self.client.ann_search(
210
211
  The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
211
212
 
212
213
  - **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
214
+
213
215
  - Column objects: `table.c.id`, `table.c.name`
214
216
  - Expressions: `(table.c.age + 10).label('age_plus_10')`
215
217
  - JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
216
218
  - String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
217
-
218
219
  - **`output_column_names`** (legacy): Accepts list of column name strings
220
+
219
221
  - Example: `['id', 'name', 'meta']`
220
-
221
222
  - **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
222
-
223
223
  - **`distance_threshold`** (optional): Filter results by distance threshold
224
+
224
225
  - Type: `Optional[float]`
225
226
  - Only returns results where `distance <= threshold`
226
227
  - Example: `distance_threshold=0.5` returns only results with distance <= 0.5
227
228
  - Use case: Quality control for similarity search, only return highly similar results
228
-
229
229
  - If you want to use pure `SQLAlchemy` API with `OceanBase` dialect, you can just get an `SQLAlchemy.engine` via `client.engine`. The engine can also be created as following:
230
230
 
231
231
  ```python
@@ -263,3 +263,171 @@ engine = create_async_engine(connection_str)
263
263
  ```
264
264
 
265
265
  - For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
266
+
267
+ ### Hybrid Search Mode
268
+
269
+ `pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
270
+
271
+ - setup a client:
272
+
273
+ ```python
274
+ from pyobvector import *
275
+ from pyobvector.client.hybrid_search import HybridSearch
276
+ from sqlalchemy import Column, Integer, VARCHAR
277
+
278
+ client = HybridSearch(uri="127.0.0.1:2881", user="test@test")
279
+ ```
280
+
281
+ **Note**: Hybrid search requires OceanBase version >= 4.4.1.0, or SeekDB.
282
+
283
+ - create a table with both vector index and full-text index:
284
+
285
+ ```python
286
+ test_table_name = "hybrid_search_test"
287
+
288
+ # create table with vector and text columns
289
+ client.create_table(
290
+ table_name=test_table_name,
291
+ columns=[
292
+ Column("id", Integer, primary_key=True, autoincrement=False),
293
+ Column("source_id", VARCHAR(32)),
294
+ Column("enabled", Integer),
295
+ Column("vector", VECTOR(3)), # vector column
296
+ Column("title", VARCHAR(255)), # text column for full-text search
297
+ Column("content", VARCHAR(255)), # text column for full-text search
298
+ ],
299
+ indexes=[
300
+ VectorIndex("vec_idx", "vector", params="distance=l2, type=hnsw, lib=vsag"),
301
+ ],
302
+ mysql_charset='utf8mb4',
303
+ mysql_collate='utf8mb4_unicode_ci',
304
+ )
305
+
306
+ # create full-text indexes for text columns
307
+ from pyobvector import FtsIndexParam, FtsParser
308
+
309
+ for col in ["title", "content"]:
310
+ client.create_fts_idx_with_fts_index_param(
311
+ table_name=test_table_name,
312
+ fts_idx_param=FtsIndexParam(
313
+ index_name=f"fts_idx_{col}",
314
+ field_names=[col],
315
+ parser_type=FtsParser.IK, # or other parser types
316
+ ),
317
+ )
318
+ ```
319
+
320
+ - insert data:
321
+
322
+ ```python
323
+ client.insert(
324
+ table_name=test_table_name,
325
+ data=[
326
+ {
327
+ "id": 1,
328
+ "source_id": "3b767712b57211f09c170242ac130008",
329
+ "enabled": 1,
330
+ "vector": [1, 1, 1],
331
+ "title": "企业版和社区版的功能差异",
332
+ "content": "OceanBase 数据库提供企业版和社区版两种形态。",
333
+ },
334
+ {
335
+ "id": 2,
336
+ "vector": [1, 2, 3],
337
+ "enabled": 1,
338
+ "source_id": "3b791472b57211f09c170242ac130008",
339
+ "title": "快速体验 OceanBase 社区版",
340
+ "content": "本文根据使用场景详细介绍如何快速部署 OceanBase 数据库。",
341
+ },
342
+ # ... more data
343
+ ]
344
+ )
345
+ ```
346
+
347
+ - perform hybrid search with Elasticsearch-compatible query syntax:
348
+
349
+ ```python
350
+ # build query body (compatible with Elasticsearch syntax)
351
+ query = {
352
+ "bool": {
353
+ "must": [
354
+ {
355
+ "query_string": {
356
+ "fields": ["title^10", "content"], # field weights
357
+ "type": "best_fields",
358
+ "query": "oceanbase 数据 迁移",
359
+ "minimum_should_match": "30%",
360
+ "boost": 1
361
+ }
362
+ }
363
+ ],
364
+ "filter": [
365
+ {
366
+ "terms": {
367
+ "source_id": [
368
+ "3b791472b57211f09c170242ac130008",
369
+ "3b7af31eb57211f09c170242ac130008"
370
+ ]
371
+ }
372
+ },
373
+ {
374
+ "bool": {
375
+ "must_not": [
376
+ {
377
+ "range": {
378
+ "enabled": {"lt": 1}
379
+ }
380
+ }
381
+ ]
382
+ }
383
+ }
384
+ ],
385
+ "boost": 0.7
386
+ }
387
+ }
388
+
389
+ body = {
390
+ "query": query,
391
+ "knn": { # vector similarity search
392
+ "field": "vector",
393
+ "k": 1024,
394
+ "num_candidates": 1024,
395
+ "query_vector": [1, 2, 3],
396
+ "filter": query, # optional: apply same filter to KNN
397
+ "similarity": 0.2 # similarity threshold
398
+ },
399
+ "from": 0, # pagination offset
400
+ "size": 60 # pagination size
401
+ }
402
+
403
+ # execute hybrid search
404
+ results = client.search(index=test_table_name, body=body)
405
+ # results is a list of matching documents
406
+ ```
407
+
408
+ #### Supported Query Types
409
+
410
+ The hybrid search supports Elasticsearch-compatible query syntax:
411
+
412
+ - **`bool` query**: Combine multiple queries with `must`, `must_not`, `should`, `filter`
413
+ - **`query_string`**: Full-text search with field weights, boost, and matching options
414
+ - **`terms`**: Exact match filtering for multiple values
415
+ - **`range`**: Range queries (`lt`, `lte`, `gt`, `gte`)
416
+ - **`knn`**: Vector similarity search (KNN) with:
417
+ - `field`: Vector field name
418
+ - `query_vector`: Query vector
419
+ - `k`: Number of results to return
420
+ - `num_candidates`: Number of candidates to consider
421
+ - `filter`: Optional filter to apply to KNN search
422
+ - `similarity`: Similarity threshold
423
+ - **Pagination**: `from` and `size` parameters
424
+
425
+ #### Get SQL Query
426
+
427
+ You can also get the actual SQL that will be executed:
428
+
429
+ ```python
430
+ sql = client.get_sql(index=test_table_name, body=body)
431
+ print(sql) # prints the SQL query
432
+ ```
433
+
@@ -26,7 +26,13 @@ class HybridSearch(Client):
26
26
  ):
27
27
  super().__init__(uri, user, password, db_name, **kwargs)
28
28
 
29
- if self.ob_version < ObVersion.from_db_version_nums(4, 4, 1, 0):
29
+ min_required_version = ObVersion.from_db_version_nums(4, 4, 1, 0)
30
+
31
+ if self.ob_version < min_required_version:
32
+ # For versions < 4.4.1.0, check if it's SeekDB
33
+ if self._is_seekdb():
34
+ logger.info("SeekDB detected, allowing hybrid search")
35
+ return
30
36
  raise ClusterVersionException(
31
37
  code=ErrorCode.NOT_SUPPORTED,
32
38
  message=ExceptionsMessage.ClusterVersionIsLow % ("Hybrid Search", "4.4.1.0"),
@@ -134,8 +134,11 @@ class IndexParam:
134
134
  if 'efSearch' in params:
135
135
  ob_params['ef_search'] = params['efSearch']
136
136
 
137
- if self.is_index_type_sparse_vector() and ob_params['distance'] != 'inner_product':
138
- raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
137
+ if self.is_index_type_sparse_vector():
138
+ if ob_params['distance'] != 'inner_product':
139
+ raise ValueError("Metric type should be 'inner_product' for sparse vector index.")
140
+ if 'sparse_index_type' in self.kwargs:
141
+ ob_params['type'] = self.kwargs['sparse_index_type']
139
142
  return ob_params
140
143
 
141
144
  def param_str(self):
@@ -93,6 +93,26 @@ class ObClient:
93
93
  self.metadata_obj.clear()
94
94
  self.metadata_obj.reflect(bind=self.engine, extend_existing=True)
95
95
 
96
+ def _is_seekdb(self) -> bool:
97
+ """Check if the database is SeekDB by querying version.
98
+
99
+ Returns:
100
+ bool: True if database is SeekDB, False otherwise
101
+ """
102
+ is_seekdb = False
103
+ try:
104
+ if hasattr(self, '_is_seekdb_cached'):
105
+ return self._is_seekdb_cached
106
+ with self.engine.connect() as conn:
107
+ result = conn.execute(text("SELECT VERSION()"))
108
+ version_str = [r[0] for r in result][0]
109
+ is_seekdb = "SeekDB" in version_str
110
+ self._is_seekdb_cached = is_seekdb
111
+ logger.debug(f"Version query result: {version_str}, is_seekdb: {is_seekdb}")
112
+ except Exception as e:
113
+ logger.warning(f"Failed to query version: {e}")
114
+ return is_seekdb
115
+
96
116
  def _insert_partition_hint_for_query_sql(self, sql: str, partition_hint: str):
97
117
  from_index = sql.find("FROM")
98
118
  assert from_index != -1
@@ -99,7 +99,11 @@ class ObVecClient(ObClient):
99
99
  create_table_sql = str(CreateTable(table).compile(self.engine))
100
100
  new_sql = create_table_sql[:create_table_sql.rfind(')')]
101
101
  for sparse_vidx in sparse_vidxs:
102
- new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (distance=inner_product)"
102
+ sparse_params = sparse_vidx._parse_kwargs()
103
+ if 'type' in sparse_params:
104
+ new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (type={sparse_params['type']}, distance=inner_product)"
105
+ else:
106
+ new_sql += f",\n\tVECTOR INDEX {sparse_vidx.index_name}({sparse_vidx.field_name}) with (distance=inner_product)"
103
107
  new_sql += "\n)"
104
108
  conn.execute(text(new_sql))
105
109
  else:
@@ -817,11 +817,12 @@ class ObVecJsonTableClient(ObVecClient):
817
817
  ):
818
818
  real_user_id = opt_user_id or self.user_id
819
819
 
820
- table_name = ast.args['from'].this.this.this
820
+ from_key = 'from_' if 'from_' in ast.args else 'from'
821
+ table_name = ast.args[from_key].this.this.this
821
822
  if not self._check_table_exists(table_name):
822
823
  raise ValueError(f"Table {table_name} does not exists")
823
824
 
824
- ast.args['from'].args['this'].args['this'] = to_identifier(name=JSON_TABLE_DATA_TABLE_NAME, quoted=False)
825
+ ast.args[from_key].args['this'].args['this'] = to_identifier(name=JSON_TABLE_DATA_TABLE_NAME, quoted=False)
825
826
 
826
827
  col_meta = self.jmetadata.meta_cache[table_name]
827
828
  json_table_meta_str = []
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "pyobvector"
3
- version = "0.2.18"
3
+ version = "0.2.20"
4
4
  description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
5
5
  authors = ["shanhaikang.shk <shanhaikang.shk@oceanbase.com>"]
6
6
  readme = "README.md"
7
7
 
8
8
  [tool.poetry.dependencies]
9
9
  python = ">=3.9,<4.0"
10
- numpy = ">=1.17.0,<2.0.0"
10
+ numpy = ">=1.17.0"
11
11
  sqlalchemy = ">=1.4,<=3"
12
12
  pymysql = "^1.1.1"
13
13
  aiomysql = "^0.3.2"
File without changes