pyobvector 0.2.24__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.24 → pyobvector-0.2.26}/.github/workflows/ci.yml +23 -1
- pyobvector-0.2.24/README.md → pyobvector-0.2.26/PKG-INFO +94 -2
- pyobvector-0.2.24/PKG-INFO → pyobvector-0.2.26/README.md +73 -21
- {pyobvector-0.2.24 → pyobvector-0.2.26}/RELEASE_NOTES.md +9 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/__init__.py +1 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/__init__.py +52 -1
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/collection_schema.py +2 -3
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/fts_index_param.py +2 -3
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/index_param.py +1 -2
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/milvus_like_client.py +34 -35
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/ob_client.py +91 -51
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/ob_vec_client.py +34 -22
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/ob_vec_json_table_client.py +10 -10
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/partitions.py +13 -14
- pyobvector-0.2.26/pyobvector/client/seekdb_engine.py +156 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/json_table/oceanbase_dialect.py +24 -13
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/json_table/virtual_data_type.py +6 -7
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/array.py +7 -7
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/dialect.py +12 -1
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/geo_srid_point.py +1 -2
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyproject.toml +6 -2
- pyobvector-0.2.26/tests/test_seekdb_embedded.py +200 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/.github/workflows/python-publish.yml +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/.gitignore +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/.pre-commit-config.yaml +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/.pylintrc +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/LICENSE +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/Makefile +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/enum.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/exceptions.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/hybrid_search.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/client/schema_type.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/json_table/__init__.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/json_table/json_value_returning_func.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/__init__.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/full_text_index.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/gis_func.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/match_against_func.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/ob_table.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/reflection.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/replace_stmt.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/sparse_vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/vec_dist_func.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/schema/vector_index.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/util/__init__.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/util/ob_version.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/util/sparse_vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/pyobvector/util/vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/conf.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/index.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/modules.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/pyobvector.client.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/pyobvector.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/pyobvector.schema.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/source/pyobvector.util.rst +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/__init__.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_fts_index.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_geometry.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_hybrid_search.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_json_table.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_milvus_like_client.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_milvus_like_client_sparse_vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_ob_vec_client.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_ob_vec_client_sparse_vector.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_ob_vec_more_algorithm.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_oceanbase_dialect.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_partition_compile.py +0 -0
- {pyobvector-0.2.24 → pyobvector-0.2.26}/tests/test_reflection.py +0 -0
|
@@ -13,7 +13,7 @@ jobs:
|
|
|
13
13
|
runs-on: ubuntu-latest
|
|
14
14
|
strategy:
|
|
15
15
|
matrix:
|
|
16
|
-
python-version: ["3.
|
|
16
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
17
17
|
image_tag: ["4.4.1.0-100000032025101610"]
|
|
18
18
|
init_sql: ["ALTER SYSTEM ob_vector_memory_limit_percentage = 30; SET GLOBAL ob_query_timeout=100000000;"]
|
|
19
19
|
test_filter: ["tests/test_hybrid_search.py::HybridSearchTest"]
|
|
@@ -65,3 +65,25 @@ jobs:
|
|
|
65
65
|
- name: Run tests
|
|
66
66
|
run: |
|
|
67
67
|
make test TEST_FILTER='${{ matrix.test_filter }}'
|
|
68
|
+
|
|
69
|
+
test-embedded-seekdb:
|
|
70
|
+
name: Test embedded SeekDB
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
steps:
|
|
73
|
+
- name: Check out code
|
|
74
|
+
uses: actions/checkout@v6
|
|
75
|
+
|
|
76
|
+
- name: Install uv
|
|
77
|
+
uses: astral-sh/setup-uv@v6
|
|
78
|
+
with:
|
|
79
|
+
python-version: "3.12"
|
|
80
|
+
|
|
81
|
+
- name: Install dependencies
|
|
82
|
+
run: uv sync --dev
|
|
83
|
+
|
|
84
|
+
- name: Install pyseekdb (optional dependency for embedded SeekDB)
|
|
85
|
+
run: uv pip install pyseekdb
|
|
86
|
+
|
|
87
|
+
- name: Run embedded SeekDB tests
|
|
88
|
+
run: |
|
|
89
|
+
uv run python -m pytest tests/test_seekdb_embedded.py -v
|
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyobvector
|
|
3
|
+
Version: 0.2.26
|
|
4
|
+
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
|
+
Project-URL: Homepage, https://github.com/oceanbase/pyobvector
|
|
6
|
+
Project-URL: Repository, https://github.com/oceanbase/pyobvector.git
|
|
7
|
+
Author-email: "shanhaikang.shk" <shanhaikang.shk@oceanbase.com>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: obvector,oceanbase,vector store
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Requires-Dist: aiomysql>=0.3.2
|
|
13
|
+
Requires-Dist: numpy>=1.17.0
|
|
14
|
+
Requires-Dist: pydantic<3,>=2.7.0
|
|
15
|
+
Requires-Dist: pymysql>=1.1.1
|
|
16
|
+
Requires-Dist: sqlalchemy<=3,>=1.4
|
|
17
|
+
Requires-Dist: sqlglot>=26.0.1
|
|
18
|
+
Provides-Extra: pyseekdb
|
|
19
|
+
Requires-Dist: pyseekdb>=0.1.0; (python_version >= '3.11') and extra == 'pyseekdb'
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
1
22
|
# pyobvector
|
|
2
23
|
|
|
3
24
|
A python SDK for OceanBase Multimodal Store (Vector Store / Full Text Search / JSON Table), based on SQLAlchemy, compatible with Milvus API.
|
|
@@ -15,7 +36,13 @@ uv sync
|
|
|
15
36
|
- install with pip:
|
|
16
37
|
|
|
17
38
|
```shell
|
|
18
|
-
pip install pyobvector==0.2.
|
|
39
|
+
pip install pyobvector==0.2.26
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
- for **embedded SeekDB** support (local SeekDB without server):
|
|
43
|
+
|
|
44
|
+
```shell
|
|
45
|
+
pip install pyobvector[pyseekdb]
|
|
19
46
|
```
|
|
20
47
|
|
|
21
48
|
## Build Doc
|
|
@@ -33,10 +60,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
|
|
|
33
60
|
|
|
34
61
|
## Usage
|
|
35
62
|
|
|
36
|
-
`pyobvector` supports
|
|
63
|
+
`pyobvector` supports four modes:
|
|
37
64
|
|
|
38
65
|
- `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
|
|
39
66
|
- `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
67
|
+
- `Embedded SeekDB mode`: Use `ObVecClient` or `SeekdbRemoteClient` with local embedded SeekDB (no server). Same API as remote: `create_table`, `insert`, `ann_search`, etc. Requires optional dependency: `pip install pyobvector[pyseekdb]`.
|
|
40
68
|
- `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
|
|
41
69
|
|
|
42
70
|
### Milvus compatible mode
|
|
@@ -264,6 +292,70 @@ engine = create_async_engine(connection_str)
|
|
|
264
292
|
|
|
265
293
|
- For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
|
|
266
294
|
|
|
295
|
+
### Embedded SeekDB mode
|
|
296
|
+
|
|
297
|
+
Use the same ObClient/ObVecClient API with **embedded SeekDB** (local file, no server). Install the optional dependency:
|
|
298
|
+
|
|
299
|
+
```shell
|
|
300
|
+
pip install pyobvector[pyseekdb]
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
- connect with path or with an existing `pyseekdb.Client`:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
from pyobvector import SeekdbRemoteClient, ObVecClient
|
|
307
|
+
from pyobvector.client.ob_client import ObClient
|
|
308
|
+
|
|
309
|
+
# Option 1: path to SeekDB data directory
|
|
310
|
+
client = SeekdbRemoteClient(path="./seekdb_data", database="test")
|
|
311
|
+
|
|
312
|
+
# Option 2: use an existing pyseekdb.Client
|
|
313
|
+
import pyseekdb
|
|
314
|
+
pyseekdb_client = pyseekdb.Client(path="./seekdb_data", database="test")
|
|
315
|
+
client = SeekdbRemoteClient(pyseekdb_client=pyseekdb_client)
|
|
316
|
+
|
|
317
|
+
# Option 3: ObVecClient directly
|
|
318
|
+
client = ObVecClient(path="./seekdb_data", db_name="test")
|
|
319
|
+
|
|
320
|
+
assert isinstance(client, ObVecClient)
|
|
321
|
+
assert isinstance(client, ObClient)
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
- create table, insert, and ann search (same API as remote):
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
from sqlalchemy import Column, Integer, VARCHAR
|
|
328
|
+
from pyobvector import VECTOR, VectorIndex, l2_distance
|
|
329
|
+
|
|
330
|
+
client.drop_table_if_exist("vec_table")
|
|
331
|
+
client.create_table(
|
|
332
|
+
table_name="vec_table",
|
|
333
|
+
columns=[
|
|
334
|
+
Column("id", Integer, primary_key=True),
|
|
335
|
+
Column("title", VARCHAR(255)),
|
|
336
|
+
Column("vec", VECTOR(3)),
|
|
337
|
+
],
|
|
338
|
+
indexes=[VectorIndex("vec_idx", "vec", params="distance=l2, type=hnsw, lib=vsag")],
|
|
339
|
+
mysql_organization="heap",
|
|
340
|
+
)
|
|
341
|
+
client.insert("vec_table", data=[
|
|
342
|
+
{"id": 1, "title": "doc A", "vec": [1.0, 1.0, 1.0]},
|
|
343
|
+
{"id": 2, "title": "doc B", "vec": [1.0, 2.0, 3.0]},
|
|
344
|
+
])
|
|
345
|
+
res = client.ann_search(
|
|
346
|
+
"vec_table",
|
|
347
|
+
vec_data=[1.0, 2.0, 3.0],
|
|
348
|
+
vec_column_name="vec",
|
|
349
|
+
distance_func=l2_distance,
|
|
350
|
+
with_dist=True,
|
|
351
|
+
topk=5,
|
|
352
|
+
output_column_names=["id", "title"],
|
|
353
|
+
)
|
|
354
|
+
client.drop_table_if_exist("vec_table")
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
- See `tests/test_seekdb_embedded.py` for more examples.
|
|
358
|
+
|
|
267
359
|
### Hybrid Search Mode
|
|
268
360
|
|
|
269
361
|
`pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
|
|
@@ -1,22 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: pyobvector
|
|
3
|
-
Version: 0.2.24
|
|
4
|
-
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
|
-
Project-URL: Homepage, https://github.com/oceanbase/pyobvector
|
|
6
|
-
Project-URL: Repository, https://github.com/oceanbase/pyobvector.git
|
|
7
|
-
Author-email: "shanhaikang.shk" <shanhaikang.shk@oceanbase.com>
|
|
8
|
-
License-Expression: Apache-2.0
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Keywords: obvector,oceanbase,vector store
|
|
11
|
-
Requires-Python: >=3.9
|
|
12
|
-
Requires-Dist: aiomysql>=0.3.2
|
|
13
|
-
Requires-Dist: numpy>=1.17.0
|
|
14
|
-
Requires-Dist: pydantic<3,>=2.7.0
|
|
15
|
-
Requires-Dist: pymysql>=1.1.1
|
|
16
|
-
Requires-Dist: sqlalchemy<=3,>=1.4
|
|
17
|
-
Requires-Dist: sqlglot>=26.0.1
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
|
|
20
1
|
# pyobvector
|
|
21
2
|
|
|
22
3
|
A python SDK for OceanBase Multimodal Store (Vector Store / Full Text Search / JSON Table), based on SQLAlchemy, compatible with Milvus API.
|
|
@@ -34,7 +15,13 @@ uv sync
|
|
|
34
15
|
- install with pip:
|
|
35
16
|
|
|
36
17
|
```shell
|
|
37
|
-
pip install pyobvector==0.2.
|
|
18
|
+
pip install pyobvector==0.2.26
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
- for **embedded SeekDB** support (local SeekDB without server):
|
|
22
|
+
|
|
23
|
+
```shell
|
|
24
|
+
pip install pyobvector[pyseekdb]
|
|
38
25
|
```
|
|
39
26
|
|
|
40
27
|
## Build Doc
|
|
@@ -52,10 +39,11 @@ For detailed release notes and changelog, see [RELEASE_NOTES.md](RELEASE_NOTES.m
|
|
|
52
39
|
|
|
53
40
|
## Usage
|
|
54
41
|
|
|
55
|
-
`pyobvector` supports
|
|
42
|
+
`pyobvector` supports four modes:
|
|
56
43
|
|
|
57
44
|
- `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage in a way similar to the Milvus API
|
|
58
45
|
- `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the `ObVecClient` class and execute the relational database statement with the SQLAlchemy library. In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
46
|
+
- `Embedded SeekDB mode`: Use `ObVecClient` or `SeekdbRemoteClient` with local embedded SeekDB (no server). Same API as remote: `create_table`, `insert`, `ann_search`, etc. Requires optional dependency: `pip install pyobvector[pyseekdb]`.
|
|
59
47
|
- `Hybrid Search mode`: You can use the `HybridSearch` class to perform hybrid search that combines full-text search and vector similarity search, with Elasticsearch-compatible query syntax.
|
|
60
48
|
|
|
61
49
|
### Milvus compatible mode
|
|
@@ -283,6 +271,70 @@ engine = create_async_engine(connection_str)
|
|
|
283
271
|
|
|
284
272
|
- For further usage in pure `SQLAlchemy` mode, please refer to [SQLAlchemy](https://www.sqlalchemy.org/)
|
|
285
273
|
|
|
274
|
+
### Embedded SeekDB mode
|
|
275
|
+
|
|
276
|
+
Use the same ObClient/ObVecClient API with **embedded SeekDB** (local file, no server). Install the optional dependency:
|
|
277
|
+
|
|
278
|
+
```shell
|
|
279
|
+
pip install pyobvector[pyseekdb]
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
- connect with path or with an existing `pyseekdb.Client`:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from pyobvector import SeekdbRemoteClient, ObVecClient
|
|
286
|
+
from pyobvector.client.ob_client import ObClient
|
|
287
|
+
|
|
288
|
+
# Option 1: path to SeekDB data directory
|
|
289
|
+
client = SeekdbRemoteClient(path="./seekdb_data", database="test")
|
|
290
|
+
|
|
291
|
+
# Option 2: use an existing pyseekdb.Client
|
|
292
|
+
import pyseekdb
|
|
293
|
+
pyseekdb_client = pyseekdb.Client(path="./seekdb_data", database="test")
|
|
294
|
+
client = SeekdbRemoteClient(pyseekdb_client=pyseekdb_client)
|
|
295
|
+
|
|
296
|
+
# Option 3: ObVecClient directly
|
|
297
|
+
client = ObVecClient(path="./seekdb_data", db_name="test")
|
|
298
|
+
|
|
299
|
+
assert isinstance(client, ObVecClient)
|
|
300
|
+
assert isinstance(client, ObClient)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
- create table, insert, and ann search (same API as remote):
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
from sqlalchemy import Column, Integer, VARCHAR
|
|
307
|
+
from pyobvector import VECTOR, VectorIndex, l2_distance
|
|
308
|
+
|
|
309
|
+
client.drop_table_if_exist("vec_table")
|
|
310
|
+
client.create_table(
|
|
311
|
+
table_name="vec_table",
|
|
312
|
+
columns=[
|
|
313
|
+
Column("id", Integer, primary_key=True),
|
|
314
|
+
Column("title", VARCHAR(255)),
|
|
315
|
+
Column("vec", VECTOR(3)),
|
|
316
|
+
],
|
|
317
|
+
indexes=[VectorIndex("vec_idx", "vec", params="distance=l2, type=hnsw, lib=vsag")],
|
|
318
|
+
mysql_organization="heap",
|
|
319
|
+
)
|
|
320
|
+
client.insert("vec_table", data=[
|
|
321
|
+
{"id": 1, "title": "doc A", "vec": [1.0, 1.0, 1.0]},
|
|
322
|
+
{"id": 2, "title": "doc B", "vec": [1.0, 2.0, 3.0]},
|
|
323
|
+
])
|
|
324
|
+
res = client.ann_search(
|
|
325
|
+
"vec_table",
|
|
326
|
+
vec_data=[1.0, 2.0, 3.0],
|
|
327
|
+
vec_column_name="vec",
|
|
328
|
+
distance_func=l2_distance,
|
|
329
|
+
with_dist=True,
|
|
330
|
+
topk=5,
|
|
331
|
+
output_column_names=["id", "title"],
|
|
332
|
+
)
|
|
333
|
+
client.drop_table_if_exist("vec_table")
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
- See `tests/test_seekdb_embedded.py` for more examples.
|
|
337
|
+
|
|
286
338
|
### Hybrid Search Mode
|
|
287
339
|
|
|
288
340
|
`pyobvector` supports hybrid search that combines full-text search and vector similarity search, with query syntax compatible with Elasticsearch. This allows you to perform semantic search with both keyword matching and vector similarity in a single query.
|
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
This file documents all released versions and their notable changes for the pyobvector project. Changes are grouped by version and categorized as Added (new features), Changed (modifications), Fixed (bug fixes), and Security (security updates).
|
|
4
4
|
|
|
5
|
+
## [0.2.26](https://github.com/oceanbase/pyobvector/compare/release-v0.2.25...release-v0.2.26) - 2026-04-15
|
|
6
|
+
|
|
7
|
+
-Fix: Compatible with the latest version of sqlglot
|
|
8
|
+
|
|
9
|
+
## [0.2.25](https://github.com/oceanbase/pyobvector/compare/release-v0.2.24...release-v0.2.25) - 2026-03-10
|
|
10
|
+
|
|
11
|
+
- Fix: preserve table options when adding sparse vector indexes
|
|
12
|
+
- Feat: ObClient embedded SeekDB support, docs, tests and CI
|
|
13
|
+
|
|
5
14
|
## [0.2.24](https://github.com/oceanbase/pyobvector/compare/release-v0.2.23...release-v0.2.24) - 2026-02-05
|
|
6
15
|
|
|
7
16
|
- Fix: preserve table options when adding sparse vector indexes
|
|
@@ -5,8 +5,11 @@ in a way similar to the Milvus API.
|
|
|
5
5
|
2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
|
|
6
6
|
`ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
|
|
7
7
|
In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
8
|
+
3. `Embedded SeekDB`: ObClient/ObVecClient support path= or pyseekdb_client= for embedded
|
|
9
|
+
SeekDB (pip install pyobvector[pyseekdb]). Same API as remote: create_table, insert, etc.
|
|
8
10
|
|
|
9
|
-
*
|
|
11
|
+
* SeekdbRemoteClient Connect to embedded (path= / pyseekdb_client=) or remote; returns ObVecClient
|
|
12
|
+
* ObVecClient MySQL/SeekDB client in SQLAlchemy hybrid mode (uri, path, or pyseekdb_client)
|
|
10
13
|
* MilvusLikeClient Milvus compatible client
|
|
11
14
|
* VecIndexType VecIndexType is used to specify vector index type for MilvusLikeClient
|
|
12
15
|
* IndexParam Specify vector index parameters for MilvusLikeClient
|
|
@@ -31,6 +34,9 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
31
34
|
* FtsIndexParam Full Text Search index parameter
|
|
32
35
|
"""
|
|
33
36
|
|
|
37
|
+
import os
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
34
40
|
from .ob_vec_client import ObVecClient
|
|
35
41
|
from .milvus_like_client import MilvusLikeClient
|
|
36
42
|
from .ob_vec_json_table_client import ObVecJsonTableClient
|
|
@@ -40,7 +46,52 @@ from .collection_schema import FieldSchema, CollectionSchema
|
|
|
40
46
|
from .partitions import *
|
|
41
47
|
from .fts_index_param import FtsParser, FtsIndexParam
|
|
42
48
|
|
|
49
|
+
|
|
50
|
+
def _resolve_password(password: str) -> str:
|
|
51
|
+
return password or os.environ.get("SEEKDB_PASSWORD", "")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def SeekdbRemoteClient(
|
|
55
|
+
path: str | None = None,
|
|
56
|
+
uri: str | None = None,
|
|
57
|
+
host: str | None = None,
|
|
58
|
+
port: int | None = None,
|
|
59
|
+
tenant: str = "test",
|
|
60
|
+
database: str = "test",
|
|
61
|
+
user: str | None = None,
|
|
62
|
+
password: str = "",
|
|
63
|
+
pyseekdb_client: Any | None = None,
|
|
64
|
+
**kwargs: Any,
|
|
65
|
+
) -> Any:
|
|
66
|
+
"""
|
|
67
|
+
Connect to embedded SeekDB (path= or pyseekdb_client=) or remote OceanBase/SeekDB (uri/host=).
|
|
68
|
+
Returns ObVecClient with the same API (create_table, insert, ann_search, etc.).
|
|
69
|
+
Embedded requires: pip install pyobvector[pyseekdb]
|
|
70
|
+
"""
|
|
71
|
+
password = _resolve_password(password)
|
|
72
|
+
if pyseekdb_client is not None:
|
|
73
|
+
return ObVecClient(pyseekdb_client=pyseekdb_client, **kwargs)
|
|
74
|
+
if path is not None:
|
|
75
|
+
return ObVecClient(path=path, db_name=database, **kwargs)
|
|
76
|
+
if uri is None and host is not None:
|
|
77
|
+
port = port if port is not None else 2881
|
|
78
|
+
uri = f"{host}:{port}"
|
|
79
|
+
if uri is None:
|
|
80
|
+
uri = "127.0.0.1:2881"
|
|
81
|
+
ob_user = user if user is not None else "root"
|
|
82
|
+
if "@" not in ob_user:
|
|
83
|
+
ob_user = f"{ob_user}@{tenant}"
|
|
84
|
+
return ObVecClient(
|
|
85
|
+
uri=uri,
|
|
86
|
+
user=ob_user,
|
|
87
|
+
password=password,
|
|
88
|
+
db_name=database,
|
|
89
|
+
**kwargs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
43
93
|
__all__ = [
|
|
94
|
+
"SeekdbRemoteClient",
|
|
44
95
|
"ObVecClient",
|
|
45
96
|
"MilvusLikeClient",
|
|
46
97
|
"ObVecJsonTableClient",
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""FieldSchema & CollectionSchema definition module to be compatible with Milvus."""
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
|
-
from typing import Optional
|
|
5
4
|
from sqlalchemy import Column
|
|
6
5
|
from .schema_type import DataType, convert_datatype_to_sqltype
|
|
7
6
|
from .exceptions import *
|
|
@@ -129,8 +128,8 @@ class CollectionSchema:
|
|
|
129
128
|
|
|
130
129
|
def __init__(
|
|
131
130
|
self,
|
|
132
|
-
fields:
|
|
133
|
-
partitions:
|
|
131
|
+
fields: list[FieldSchema] | None = None,
|
|
132
|
+
partitions: ObPartition | None = None,
|
|
134
133
|
description: str = "", # ignored in oceanbase
|
|
135
134
|
**kwargs,
|
|
136
135
|
):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""A module to specify fts index parameters"""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Optional, Union
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class FtsParser(Enum):
|
|
@@ -28,13 +27,13 @@ class FtsIndexParam:
|
|
|
28
27
|
self,
|
|
29
28
|
index_name: str,
|
|
30
29
|
field_names: list[str],
|
|
31
|
-
parser_type:
|
|
30
|
+
parser_type: FtsParser | str | None = None,
|
|
32
31
|
):
|
|
33
32
|
self.index_name = index_name
|
|
34
33
|
self.field_names = field_names
|
|
35
34
|
self.parser_type = parser_type
|
|
36
35
|
|
|
37
|
-
def param_str(self) ->
|
|
36
|
+
def param_str(self) -> str | None:
|
|
38
37
|
"""Convert parser type to string format for SQL."""
|
|
39
38
|
if self.parser_type is None:
|
|
40
39
|
return None # Default Space parser, no need to specify
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""A module to specify vector index parameters for MilvusLikeClient"""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Union
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class VecIndexType(Enum):
|
|
@@ -42,7 +41,7 @@ class IndexParam:
|
|
|
42
41
|
self,
|
|
43
42
|
index_name: str,
|
|
44
43
|
field_name: str,
|
|
45
|
-
index_type:
|
|
44
|
+
index_type: VecIndexType | str,
|
|
46
45
|
**kwargs,
|
|
47
46
|
):
|
|
48
47
|
self.index_name = index_name
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import json
|
|
5
|
-
from typing import Optional, Union
|
|
6
5
|
|
|
7
6
|
from sqlalchemy.exc import NoSuchTableError
|
|
8
7
|
from sqlalchemy import (
|
|
@@ -52,15 +51,15 @@ class MilvusLikeClient(Client):
|
|
|
52
51
|
def create_collection(
|
|
53
52
|
self,
|
|
54
53
|
collection_name: str,
|
|
55
|
-
dimension:
|
|
54
|
+
dimension: int | None = None,
|
|
56
55
|
primary_field_name: str = "id",
|
|
57
|
-
id_type:
|
|
56
|
+
id_type: DataType | str = DataType.INT64,
|
|
58
57
|
vector_field_name: str = "vector",
|
|
59
58
|
metric_type: str = "l2",
|
|
60
59
|
auto_id: bool = False,
|
|
61
|
-
timeout:
|
|
62
|
-
schema:
|
|
63
|
-
index_params:
|
|
60
|
+
timeout: float | None = None,
|
|
61
|
+
schema: CollectionSchema | None = None, # Used for custom setup
|
|
62
|
+
index_params: IndexParams | None = None, # Used for custom setup
|
|
64
63
|
max_length: int = 16384,
|
|
65
64
|
**kwargs,
|
|
66
65
|
): # pylint: disable=unused-argument
|
|
@@ -149,7 +148,7 @@ class MilvusLikeClient(Client):
|
|
|
149
148
|
def get_collection_stats(
|
|
150
149
|
self,
|
|
151
150
|
collection_name: str,
|
|
152
|
-
timeout:
|
|
151
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
153
152
|
) -> dict:
|
|
154
153
|
"""Get collection row count.
|
|
155
154
|
|
|
@@ -171,7 +170,7 @@ class MilvusLikeClient(Client):
|
|
|
171
170
|
def has_collection(
|
|
172
171
|
self,
|
|
173
172
|
collection_name: str,
|
|
174
|
-
timeout:
|
|
173
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
175
174
|
) -> bool: # pylint: disable=unused-argument
|
|
176
175
|
"""Check if collection exists.
|
|
177
176
|
|
|
@@ -196,7 +195,7 @@ class MilvusLikeClient(Client):
|
|
|
196
195
|
self,
|
|
197
196
|
old_name: str,
|
|
198
197
|
new_name: str,
|
|
199
|
-
timeout:
|
|
198
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
200
199
|
) -> None:
|
|
201
200
|
"""Rename collection.
|
|
202
201
|
|
|
@@ -236,7 +235,7 @@ class MilvusLikeClient(Client):
|
|
|
236
235
|
self,
|
|
237
236
|
collection_name: str,
|
|
238
237
|
index_params: IndexParams,
|
|
239
|
-
timeout:
|
|
238
|
+
timeout: float | None = None,
|
|
240
239
|
**kwargs,
|
|
241
240
|
): # pylint: disable=unused-argument
|
|
242
241
|
"""Create vector index with index params.
|
|
@@ -269,7 +268,7 @@ class MilvusLikeClient(Client):
|
|
|
269
268
|
self,
|
|
270
269
|
collection_name: str,
|
|
271
270
|
index_name: str,
|
|
272
|
-
timeout:
|
|
271
|
+
timeout: float | None = None,
|
|
273
272
|
**kwargs,
|
|
274
273
|
): # pylint: disable=unused-argument
|
|
275
274
|
"""Drop index on specified collection.
|
|
@@ -357,15 +356,15 @@ class MilvusLikeClient(Client):
|
|
|
357
356
|
def search(
|
|
358
357
|
self,
|
|
359
358
|
collection_name: str,
|
|
360
|
-
data:
|
|
359
|
+
data: list | dict,
|
|
361
360
|
anns_field: str,
|
|
362
361
|
with_dist: bool = False,
|
|
363
362
|
flter=None,
|
|
364
363
|
limit: int = 10,
|
|
365
|
-
output_fields:
|
|
366
|
-
search_params:
|
|
367
|
-
timeout:
|
|
368
|
-
partition_names:
|
|
364
|
+
output_fields: list[str] | None = None,
|
|
365
|
+
search_params: dict | None = None,
|
|
366
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
367
|
+
partition_names: list[str] | None = None,
|
|
369
368
|
**kwargs, # pylint: disable=unused-argument
|
|
370
369
|
) -> list[dict]:
|
|
371
370
|
"""Perform ann search.
|
|
@@ -482,9 +481,9 @@ class MilvusLikeClient(Client):
|
|
|
482
481
|
self,
|
|
483
482
|
collection_name: str,
|
|
484
483
|
flter=None,
|
|
485
|
-
output_fields:
|
|
486
|
-
timeout:
|
|
487
|
-
partition_names:
|
|
484
|
+
output_fields: list[str] | None = None,
|
|
485
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
486
|
+
partition_names: list[str] | None = None,
|
|
488
487
|
**kwargs, # pylint: disable=unused-argument
|
|
489
488
|
) -> list[dict]:
|
|
490
489
|
"""Query records.
|
|
@@ -549,10 +548,10 @@ class MilvusLikeClient(Client):
|
|
|
549
548
|
def get(
|
|
550
549
|
self,
|
|
551
550
|
collection_name: str,
|
|
552
|
-
ids:
|
|
553
|
-
output_fields:
|
|
554
|
-
timeout:
|
|
555
|
-
partition_names:
|
|
551
|
+
ids: list | str | int = None,
|
|
552
|
+
output_fields: list[str] | None = None,
|
|
553
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
554
|
+
partition_names: list[str] | None = None,
|
|
556
555
|
**kwargs, # pylint: disable=unused-argument
|
|
557
556
|
) -> list[dict]:
|
|
558
557
|
"""Get records with specified primary field `ids`.
|
|
@@ -592,7 +591,7 @@ class MilvusLikeClient(Client):
|
|
|
592
591
|
)
|
|
593
592
|
if isinstance(ids, list):
|
|
594
593
|
where_in_clause = table.c[pkey_names[0]].in_(ids)
|
|
595
|
-
elif isinstance(ids,
|
|
594
|
+
elif isinstance(ids, str | int):
|
|
596
595
|
where_in_clause = table.c[pkey_names[0]].in_([ids])
|
|
597
596
|
else:
|
|
598
597
|
raise TypeError("'ids' is not a list/str/int")
|
|
@@ -629,10 +628,10 @@ class MilvusLikeClient(Client):
|
|
|
629
628
|
def delete(
|
|
630
629
|
self,
|
|
631
630
|
collection_name: str,
|
|
632
|
-
ids:
|
|
633
|
-
timeout:
|
|
631
|
+
ids: list | str | int | None = None,
|
|
632
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
634
633
|
flter=None,
|
|
635
|
-
partition_name:
|
|
634
|
+
partition_name: str | None = "",
|
|
636
635
|
**kwargs, # pylint: disable=unused-argument
|
|
637
636
|
) -> dict:
|
|
638
637
|
"""Delete data in collection.
|
|
@@ -667,7 +666,7 @@ class MilvusLikeClient(Client):
|
|
|
667
666
|
)
|
|
668
667
|
if isinstance(ids, list):
|
|
669
668
|
where_in_clause = table.c[pkey_names[0]].in_(ids)
|
|
670
|
-
elif isinstance(ids,
|
|
669
|
+
elif isinstance(ids, str | int):
|
|
671
670
|
where_in_clause = table.c[pkey_names[0]].in_([ids])
|
|
672
671
|
else:
|
|
673
672
|
raise TypeError("'ids' is not a list/str/int")
|
|
@@ -691,9 +690,9 @@ class MilvusLikeClient(Client):
|
|
|
691
690
|
def insert(
|
|
692
691
|
self,
|
|
693
692
|
collection_name: str,
|
|
694
|
-
data:
|
|
695
|
-
timeout:
|
|
696
|
-
partition_name:
|
|
693
|
+
data: dict | list[dict],
|
|
694
|
+
timeout: float | None = None,
|
|
695
|
+
partition_name: str | None = "",
|
|
697
696
|
) -> None: # pylint: disable=unused-argument
|
|
698
697
|
"""Insert data into collection.
|
|
699
698
|
|
|
@@ -717,10 +716,10 @@ class MilvusLikeClient(Client):
|
|
|
717
716
|
def upsert(
|
|
718
717
|
self,
|
|
719
718
|
collection_name: str,
|
|
720
|
-
data:
|
|
721
|
-
timeout:
|
|
722
|
-
partition_name:
|
|
723
|
-
) -> list[
|
|
719
|
+
data: dict | list[dict],
|
|
720
|
+
timeout: float | None = None, # pylint: disable=unused-argument
|
|
721
|
+
partition_name: str | None = "",
|
|
722
|
+
) -> list[str | int]:
|
|
724
723
|
"""Update data in table. If primary key is duplicated, replace it.
|
|
725
724
|
|
|
726
725
|
Args:
|