pyobvector 0.2.21__tar.gz → 0.2.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyobvector-0.2.21 → pyobvector-0.2.23}/.github/workflows/ci.yml +3 -3
- {pyobvector-0.2.21 → pyobvector-0.2.23}/.github/workflows/python-publish.yml +1 -1
- pyobvector-0.2.23/.pre-commit-config.yaml +26 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/LICENSE +1 -1
- pyobvector-0.2.23/Makefile +45 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/PKG-INFO +13 -14
- {pyobvector-0.2.21 → pyobvector-0.2.23}/README.md +12 -13
- {pyobvector-0.2.21 → pyobvector-0.2.23}/RELEASE_NOTES.md +10 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/__init__.py +6 -5
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/__init__.py +5 -4
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/collection_schema.py +5 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/enum.py +1 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/exceptions.py +9 -7
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/fts_index_param.py +8 -4
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/hybrid_search.py +14 -4
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/index_param.py +56 -41
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/milvus_like_client.py +71 -54
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/ob_client.py +20 -16
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/ob_vec_client.py +45 -41
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/ob_vec_json_table_client.py +366 -274
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/partitions.py +81 -39
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/client/schema_type.py +3 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/json_table/__init__.py +4 -3
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/json_table/json_value_returning_func.py +12 -10
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/json_table/oceanbase_dialect.py +15 -8
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/json_table/virtual_data_type.py +47 -28
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/__init__.py +7 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/array.py +6 -2
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/dialect.py +4 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/full_text_index.py +8 -3
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/geo_srid_point.py +5 -2
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/gis_func.py +23 -11
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/match_against_func.py +10 -5
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/ob_table.py +2 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/reflection.py +25 -8
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/replace_stmt.py +4 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/sparse_vector.py +7 -4
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/vec_dist_func.py +22 -9
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/vector.py +3 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/schema/vector_index.py +7 -3
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/util/__init__.py +1 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/util/ob_version.py +2 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/util/sparse_vector.py +9 -6
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyobvector/util/vector.py +2 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/pyproject.toml +3 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/conf.py +11 -10
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/index.rst +0 -1
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_fts_index.py +128 -82
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_geometry.py +8 -14
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_hybrid_search.py +16 -29
- pyobvector-0.2.23/tests/test_json_table.py +1353 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_milvus_like_client.py +18 -21
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_milvus_like_client_sparse_vector.py +78 -58
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_ob_vec_client.py +52 -39
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_ob_vec_client_sparse_vector.py +33 -44
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_ob_vec_more_algorithm.py +72 -57
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_oceanbase_dialect.py +2 -1
- pyobvector-0.2.21/Makefile +0 -20
- pyobvector-0.2.21/tests/test_json_table.py +0 -745
- {pyobvector-0.2.21 → pyobvector-0.2.23}/.gitignore +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/.pylintrc +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/modules.rst +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/pyobvector.client.rst +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/pyobvector.rst +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/pyobvector.schema.rst +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/source/pyobvector.util.rst +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/__init__.py +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_partition_compile.py +0 -0
- {pyobvector-0.2.21 → pyobvector-0.2.23}/tests/test_reflection.py +0 -0
|
@@ -36,10 +36,10 @@ jobs:
|
|
|
36
36
|
run: uv sync --dev
|
|
37
37
|
|
|
38
38
|
- name: Lint
|
|
39
|
-
run:
|
|
39
|
+
run: make check
|
|
40
40
|
|
|
41
41
|
- name: Package build test
|
|
42
|
-
run:
|
|
42
|
+
run: make build
|
|
43
43
|
|
|
44
44
|
- name: Free disk space
|
|
45
45
|
uses: kfir4444/free-disk-space@main
|
|
@@ -64,4 +64,4 @@ jobs:
|
|
|
64
64
|
|
|
65
65
|
- name: Run tests
|
|
66
66
|
run: |
|
|
67
|
-
|
|
67
|
+
make test TEST_FILTER='${{ matrix.test_filter }}'
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: "v5.0.0"
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-case-conflict
|
|
6
|
+
- id: check-merge-conflict
|
|
7
|
+
- id: check-toml
|
|
8
|
+
- id: check-yaml
|
|
9
|
+
- id: check-json
|
|
10
|
+
- id: end-of-file-fixer
|
|
11
|
+
- id: trailing-whitespace
|
|
12
|
+
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
rev: "v0.12.7"
|
|
15
|
+
hooks:
|
|
16
|
+
- id: ruff-check
|
|
17
|
+
args: [ --exit-non-zero-on-fix ]
|
|
18
|
+
- id: ruff-format
|
|
19
|
+
|
|
20
|
+
- repo: local
|
|
21
|
+
hooks:
|
|
22
|
+
- id: compileall
|
|
23
|
+
name: compileall
|
|
24
|
+
entry: make compileall
|
|
25
|
+
language: system
|
|
26
|
+
pass_filenames: false
|
|
@@ -199,4 +199,4 @@
|
|
|
199
199
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
200
200
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
201
201
|
See the License for the specific language governing permissions and
|
|
202
|
-
limitations under the License.
|
|
202
|
+
limitations under the License.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Minimal makefile for Sphinx documentation
|
|
2
|
+
#
|
|
3
|
+
|
|
4
|
+
# You can set these variables from the command line, and also
|
|
5
|
+
# from the environment for the first two.
|
|
6
|
+
SPHINXOPTS ?=
|
|
7
|
+
SPHINXBUILD ?= uv run sphinx-build
|
|
8
|
+
SOURCEDIR = source
|
|
9
|
+
BUILDDIR = build
|
|
10
|
+
|
|
11
|
+
.PHONY: install
|
|
12
|
+
install: ## Install the virtual environment and install the pre-commit hooks
|
|
13
|
+
@echo "Creating virtual environment using uv"
|
|
14
|
+
@uv sync --dev
|
|
15
|
+
@uv run prek install
|
|
16
|
+
|
|
17
|
+
.PHONY: check
|
|
18
|
+
check: ## Run code quality tools.
|
|
19
|
+
@echo "Linting code: Running pre-commit via prek"
|
|
20
|
+
@uv run prek run -a
|
|
21
|
+
|
|
22
|
+
.PHONY: test
|
|
23
|
+
test: ## Test the code with pytest
|
|
24
|
+
@echo "Testing code: Running pytest"
|
|
25
|
+
@uv run python -m pytest $(TEST_FILTER)
|
|
26
|
+
|
|
27
|
+
.PHONY: compileall
|
|
28
|
+
compileall: ## Byte-compile Python sources.
|
|
29
|
+
@uv run python -m compileall pyobvector tests
|
|
30
|
+
|
|
31
|
+
.PHONY: build
|
|
32
|
+
build: ## Build wheel file
|
|
33
|
+
@echo "Creating wheel file"
|
|
34
|
+
@uv build
|
|
35
|
+
|
|
36
|
+
# Put it first so that "make" without argument is like "make help".
|
|
37
|
+
help:
|
|
38
|
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
39
|
+
|
|
40
|
+
.PHONY: help install check test build compileall Makefile
|
|
41
|
+
|
|
42
|
+
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
43
|
+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
44
|
+
%: Makefile
|
|
45
|
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobvector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.23
|
|
4
4
|
Summary: A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
5
5
|
Project-URL: Homepage, https://github.com/oceanbase/pyobvector
|
|
6
6
|
Project-URL: Repository, https://github.com/oceanbase/pyobvector.git
|
|
@@ -34,7 +34,7 @@ uv sync
|
|
|
34
34
|
- install with pip:
|
|
35
35
|
|
|
36
36
|
```shell
|
|
37
|
-
pip install pyobvector==0.2.
|
|
37
|
+
pip install pyobvector==0.2.23
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
## Build Doc
|
|
@@ -154,8 +154,8 @@ client.create_table(test_collection_name, columns=cols, partitions=range_part)
|
|
|
154
154
|
|
|
155
155
|
# create vector index
|
|
156
156
|
client.create_index(
|
|
157
|
-
test_collection_name,
|
|
158
|
-
is_vec_index=True,
|
|
157
|
+
test_collection_name,
|
|
158
|
+
is_vec_index=True,
|
|
159
159
|
index_name='vidx',
|
|
160
160
|
column_names=['embedding'],
|
|
161
161
|
vidx_params='distance=l2, type=hnsw, lib=vsag',
|
|
@@ -179,8 +179,8 @@ client.insert(test_collection_name, data=data1)
|
|
|
179
179
|
```python
|
|
180
180
|
# perform ann search with basic column selection
|
|
181
181
|
res = self.client.ann_search(
|
|
182
|
-
test_collection_name,
|
|
183
|
-
vec_data=[0,0,0],
|
|
182
|
+
test_collection_name,
|
|
183
|
+
vec_data=[0,0,0],
|
|
184
184
|
vec_column_name='embedding',
|
|
185
185
|
distance_func=l2_distance,
|
|
186
186
|
topk=5,
|
|
@@ -194,8 +194,8 @@ from sqlalchemy import Table, text, func
|
|
|
194
194
|
|
|
195
195
|
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
196
196
|
res = self.client.ann_search(
|
|
197
|
-
test_collection_name,
|
|
198
|
-
vec_data=[0,0,0],
|
|
197
|
+
test_collection_name,
|
|
198
|
+
vec_data=[0,0,0],
|
|
199
199
|
vec_column_name='embedding',
|
|
200
200
|
distance_func=l2_distance,
|
|
201
201
|
topk=5,
|
|
@@ -211,8 +211,8 @@ res = self.client.ann_search(
|
|
|
211
211
|
|
|
212
212
|
# perform ann search with distance threshold (filter results by distance)
|
|
213
213
|
res = self.client.ann_search(
|
|
214
|
-
test_collection_name,
|
|
215
|
-
vec_data=[0,0,0],
|
|
214
|
+
test_collection_name,
|
|
215
|
+
vec_data=[0,0,0],
|
|
216
216
|
vec_column_name='embedding',
|
|
217
217
|
distance_func=l2_distance,
|
|
218
218
|
with_dist=True,
|
|
@@ -230,17 +230,17 @@ res = self.client.ann_search(
|
|
|
230
230
|
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
231
231
|
|
|
232
232
|
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
233
|
-
|
|
233
|
+
|
|
234
234
|
- Column objects: `table.c.id`, `table.c.name`
|
|
235
235
|
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
236
236
|
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
237
237
|
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
238
238
|
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
239
|
-
|
|
239
|
+
|
|
240
240
|
- Example: `['id', 'name', 'meta']`
|
|
241
241
|
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
242
242
|
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
243
|
-
|
|
243
|
+
|
|
244
244
|
- Type: `Optional[float]`
|
|
245
245
|
- Only returns results where `distance <= threshold`
|
|
246
246
|
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
@@ -449,4 +449,3 @@ You can also get the actual SQL that will be executed:
|
|
|
449
449
|
sql = client.get_sql(index=test_table_name, body=body)
|
|
450
450
|
print(sql) # prints the SQL query
|
|
451
451
|
```
|
|
452
|
-
|
|
@@ -15,7 +15,7 @@ uv sync
|
|
|
15
15
|
- install with pip:
|
|
16
16
|
|
|
17
17
|
```shell
|
|
18
|
-
pip install pyobvector==0.2.
|
|
18
|
+
pip install pyobvector==0.2.23
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
## Build Doc
|
|
@@ -135,8 +135,8 @@ client.create_table(test_collection_name, columns=cols, partitions=range_part)
|
|
|
135
135
|
|
|
136
136
|
# create vector index
|
|
137
137
|
client.create_index(
|
|
138
|
-
test_collection_name,
|
|
139
|
-
is_vec_index=True,
|
|
138
|
+
test_collection_name,
|
|
139
|
+
is_vec_index=True,
|
|
140
140
|
index_name='vidx',
|
|
141
141
|
column_names=['embedding'],
|
|
142
142
|
vidx_params='distance=l2, type=hnsw, lib=vsag',
|
|
@@ -160,8 +160,8 @@ client.insert(test_collection_name, data=data1)
|
|
|
160
160
|
```python
|
|
161
161
|
# perform ann search with basic column selection
|
|
162
162
|
res = self.client.ann_search(
|
|
163
|
-
test_collection_name,
|
|
164
|
-
vec_data=[0,0,0],
|
|
163
|
+
test_collection_name,
|
|
164
|
+
vec_data=[0,0,0],
|
|
165
165
|
vec_column_name='embedding',
|
|
166
166
|
distance_func=l2_distance,
|
|
167
167
|
topk=5,
|
|
@@ -175,8 +175,8 @@ from sqlalchemy import Table, text, func
|
|
|
175
175
|
|
|
176
176
|
table = Table(test_collection_name, client.metadata_obj, autoload_with=client.engine)
|
|
177
177
|
res = self.client.ann_search(
|
|
178
|
-
test_collection_name,
|
|
179
|
-
vec_data=[0,0,0],
|
|
178
|
+
test_collection_name,
|
|
179
|
+
vec_data=[0,0,0],
|
|
180
180
|
vec_column_name='embedding',
|
|
181
181
|
distance_func=l2_distance,
|
|
182
182
|
topk=5,
|
|
@@ -192,8 +192,8 @@ res = self.client.ann_search(
|
|
|
192
192
|
|
|
193
193
|
# perform ann search with distance threshold (filter results by distance)
|
|
194
194
|
res = self.client.ann_search(
|
|
195
|
-
test_collection_name,
|
|
196
|
-
vec_data=[0,0,0],
|
|
195
|
+
test_collection_name,
|
|
196
|
+
vec_data=[0,0,0],
|
|
197
197
|
vec_column_name='embedding',
|
|
198
198
|
distance_func=l2_distance,
|
|
199
199
|
with_dist=True,
|
|
@@ -211,17 +211,17 @@ res = self.client.ann_search(
|
|
|
211
211
|
The `ann_search` method supports flexible output column selection through the `output_columns` parameter:
|
|
212
212
|
|
|
213
213
|
- **`output_columns`** (recommended): Accepts SQLAlchemy Column objects, expressions, or a mix of both
|
|
214
|
-
|
|
214
|
+
|
|
215
215
|
- Column objects: `table.c.id`, `table.c.name`
|
|
216
216
|
- Expressions: `(table.c.age + 10).label('age_plus_10')`
|
|
217
217
|
- JSON queries: `text("JSON_EXTRACT(meta, '$.key') as extracted_key")`
|
|
218
218
|
- String functions: `func.concat(table.c.name, ' (', table.c.age, ')').label('name_age')`
|
|
219
219
|
- **`output_column_names`** (legacy): Accepts list of column name strings
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
- Example: `['id', 'name', 'meta']`
|
|
222
222
|
- **Parameter Priority**: `output_columns` takes precedence over `output_column_names` when both are provided
|
|
223
223
|
- **`distance_threshold`** (optional): Filter results by distance threshold
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
- Type: `Optional[float]`
|
|
226
226
|
- Only returns results where `distance <= threshold`
|
|
227
227
|
- Example: `distance_threshold=0.5` returns only results with distance <= 0.5
|
|
@@ -430,4 +430,3 @@ You can also get the actual SQL that will be executed:
|
|
|
430
430
|
sql = client.get_sql(index=test_table_name, body=body)
|
|
431
431
|
print(sql) # prints the SQL query
|
|
432
432
|
```
|
|
433
|
-
|
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
This file documents all released versions and their notable changes for the pyobvector project. Changes are grouped by version and categorized as Added (new features), Changed (modifications), Fixed (bug fixes), and Security (security updates).
|
|
4
4
|
|
|
5
|
+
## [0.2.23](https://github.com/oceanbase/pyobvector/compare/release-v0.2.22...release-v0.2.23) - 2026-01-29
|
|
6
|
+
|
|
7
|
+
- Cchore: intro pre commit
|
|
8
|
+
- Fix: make SeekDB version check case-insensitive
|
|
9
|
+
|
|
10
|
+
## [0.2.22](https://github.com/oceanbase/pyobvector/compare/release-v0.2.21...release-v0.2.22) - 2026-01-15
|
|
11
|
+
|
|
12
|
+
- Fix: HybridSearch.search() crashes when OceanBase returns NULL for empty results
|
|
13
|
+
- Feat: add **kwargs support to create_table_with_index_params for heap organization
|
|
14
|
+
|
|
5
15
|
## [0.2.21](https://github.com/oceanbase/pyobvector/compare/release-v0.2.20...release-v0.2.21) - 2026-01-13
|
|
6
16
|
|
|
7
17
|
- Migrate tool.poetry section to project section following PEP 518
|
|
@@ -352,4 +362,3 @@ This file documents all released versions and their notable changes for the pyob
|
|
|
352
362
|
### Fixed
|
|
353
363
|
|
|
354
364
|
- Fix upsert JSON column: no literal value renderer
|
|
355
|
-
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API.
|
|
2
2
|
|
|
3
|
-
`pyobvector` supports two modes:
|
|
4
|
-
1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
|
|
3
|
+
`pyobvector` supports two modes:
|
|
4
|
+
1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
|
|
5
5
|
in a way similar to the Milvus API.
|
|
6
|
-
2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
|
|
7
|
-
`ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
|
|
6
|
+
2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
|
|
7
|
+
`ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
|
|
8
8
|
In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
9
9
|
|
|
10
10
|
* ObVecClient MySQL client in SQLAlchemy hybrid mode
|
|
@@ -19,7 +19,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
19
19
|
* FtsIndex Full Text Search Index
|
|
20
20
|
* FieldSchema Clas to define field schema in collection for MilvusLikeClient
|
|
21
21
|
* CollectionSchema Class to define collection schema for MilvusLikeClient
|
|
22
|
-
* PartType Specify partition type of table or collection
|
|
22
|
+
* PartType Specify partition type of table or collection
|
|
23
23
|
for both ObVecClient and MilvusLikeClient
|
|
24
24
|
* ObPartition Abstract type class of all kind of Partition strategy
|
|
25
25
|
* RangeListPartInfo Specify Range/RangeColumns/List/ListColumns partition info
|
|
@@ -40,6 +40,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
40
40
|
* FtsIndexParam Full Text Search index parameter
|
|
41
41
|
* MatchAgainst Full Text Search clause
|
|
42
42
|
"""
|
|
43
|
+
|
|
43
44
|
from .client import *
|
|
44
45
|
from .schema import (
|
|
45
46
|
ARRAY,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"""Multi-type Vector Store Client:
|
|
2
2
|
|
|
3
|
-
1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
|
|
3
|
+
1. `Milvus compatible mode`: You can use the `MilvusLikeClient` class to use vector storage
|
|
4
4
|
in a way similar to the Milvus API.
|
|
5
|
-
2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
|
|
6
|
-
`ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
|
|
5
|
+
2. `SQLAlchemy hybrid mode`: You can use the vector storage function provided by the
|
|
6
|
+
`ObVecClient` class and execute the relational database statement with the SQLAlchemy library.
|
|
7
7
|
In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
8
8
|
|
|
9
9
|
* ObVecClient MySQL client in SQLAlchemy hybrid mode
|
|
@@ -14,7 +14,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
14
14
|
* DataType Specify field type in collection schema for MilvusLikeClient
|
|
15
15
|
* FieldSchema Clas to define field schema in collection for MilvusLikeClient
|
|
16
16
|
* CollectionSchema Class to define collection schema for MilvusLikeClient
|
|
17
|
-
* PartType Specify partition type of table or collection
|
|
17
|
+
* PartType Specify partition type of table or collection
|
|
18
18
|
for both ObVecClient and MilvusLikeClient
|
|
19
19
|
* ObPartition Abstract type class of all kind of Partition strategy
|
|
20
20
|
* RangeListPartInfo Specify Range/RangeColumns/List/ListColumns partition info
|
|
@@ -30,6 +30,7 @@ In this mode, you can regard `pyobvector` as an extension of SQLAlchemy.
|
|
|
30
30
|
* FtsParser Text Parser Type for Full Text Search
|
|
31
31
|
* FtsIndexParam Full Text Search index parameter
|
|
32
32
|
"""
|
|
33
|
+
|
|
33
34
|
from .ob_vec_client import ObVecClient
|
|
34
35
|
from .milvus_like_client import MilvusLikeClient
|
|
35
36
|
from .ob_vec_json_table_client import ObVecJsonTableClient
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""FieldSchema & CollectionSchema definition module to be compatible with Milvus."""
|
|
2
|
+
|
|
2
3
|
import copy
|
|
3
4
|
from typing import Optional
|
|
4
5
|
from sqlalchemy import Column
|
|
@@ -6,6 +7,7 @@ from .schema_type import DataType, convert_datatype_to_sqltype
|
|
|
6
7
|
from .exceptions import *
|
|
7
8
|
from .partitions import *
|
|
8
9
|
|
|
10
|
+
|
|
9
11
|
class FieldSchema:
|
|
10
12
|
"""FieldSchema definition.
|
|
11
13
|
|
|
@@ -18,6 +20,7 @@ class FieldSchema:
|
|
|
18
20
|
nullable (bool) : whether the field can be null
|
|
19
21
|
type_params (dict) : different parameters for different data type
|
|
20
22
|
"""
|
|
23
|
+
|
|
21
24
|
def __init__(
|
|
22
25
|
self,
|
|
23
26
|
name: str,
|
|
@@ -117,12 +120,13 @@ class FieldSchema:
|
|
|
117
120
|
|
|
118
121
|
class CollectionSchema:
|
|
119
122
|
"""CollectionSchema definition.
|
|
120
|
-
|
|
123
|
+
|
|
121
124
|
Attributes:
|
|
122
125
|
fields (List[FieldSchema]) : a list of FieldSchema
|
|
123
126
|
description (string) : collection description (not used in OceanBase)
|
|
124
127
|
partitions (ObPartition) : partition strategy of this collection
|
|
125
128
|
"""
|
|
129
|
+
|
|
126
130
|
def __init__(
|
|
127
131
|
self,
|
|
128
132
|
fields: Optional[list[FieldSchema]] = None,
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Exception for MilvusLikeClient."""
|
|
2
|
+
|
|
2
3
|
from .enum import IntEnum
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class ErrorCode(IntEnum):
|
|
6
7
|
"""Error codes for MilvusLikeClient."""
|
|
8
|
+
|
|
7
9
|
SUCCESS = 0
|
|
8
10
|
UNEXPECTED_ERROR = 1
|
|
9
11
|
INVALID_ARGUMENT = 2
|
|
@@ -14,6 +16,7 @@ class ErrorCode(IntEnum):
|
|
|
14
16
|
|
|
15
17
|
class ObException(Exception):
|
|
16
18
|
"""Base class for MilvusLikeClient exception."""
|
|
19
|
+
|
|
17
20
|
def __init__(
|
|
18
21
|
self,
|
|
19
22
|
code: int = ErrorCode.UNEXPECTED_ERROR,
|
|
@@ -75,6 +78,7 @@ class ClusterVersionException(ObException):
|
|
|
75
78
|
|
|
76
79
|
class ExceptionsMessage:
|
|
77
80
|
"""Exception Messages definition."""
|
|
81
|
+
|
|
78
82
|
PartitionExprNotExists = "Partition expression string does not exist."
|
|
79
83
|
PartitionMultiField = "Multi-Partition Field is not supported."
|
|
80
84
|
PartitionLevelMoreThanTwo = "Partition Level should less than or equal to 2."
|
|
@@ -93,12 +97,8 @@ class ExceptionsMessage:
|
|
|
93
97
|
PartitionListColNameListMissing = (
|
|
94
98
|
"Column name list is necessary when partition type is ListColumns"
|
|
95
99
|
)
|
|
96
|
-
PartitionHashNameListAndPartCntMissing =
|
|
97
|
-
|
|
98
|
-
)
|
|
99
|
-
PartitionKeyNameListAndPartCntMissing = (
|
|
100
|
-
"One of key_part_name_list and part_count must be set when partition type is Key"
|
|
101
|
-
)
|
|
100
|
+
PartitionHashNameListAndPartCntMissing = "One of hash_part_name_list and part_count must be set when partition type is Hash"
|
|
101
|
+
PartitionKeyNameListAndPartCntMissing = "One of key_part_name_list and part_count must be set when partition type is Key"
|
|
102
102
|
PrimaryFieldType = "Param primary_field must be int or str type."
|
|
103
103
|
VectorFieldMissingDimParam = "Param 'dim' must be set for vector field."
|
|
104
104
|
VarcharFieldMissingLengthParam = "Param 'max_length' must be set for varchar field."
|
|
@@ -108,7 +108,9 @@ class ExceptionsMessage:
|
|
|
108
108
|
)
|
|
109
109
|
CollectionNotExists = "Collection does not exist."
|
|
110
110
|
MetricTypeParamTypeInvalid = "MetricType param type should be string."
|
|
111
|
-
MetricTypeValueInvalid =
|
|
111
|
+
MetricTypeValueInvalid = (
|
|
112
|
+
"MetricType should be 'l2'/'ip'/'neg_ip'/'cosine' in ann search."
|
|
113
|
+
)
|
|
112
114
|
UsingInIDsWhenMultiPrimaryKey = "Using 'ids' when table has multi primary key."
|
|
113
115
|
ClusterVersionIsLow = (
|
|
114
116
|
"OceanBase %s feature is not supported because cluster version is below %s."
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
"""A module to specify fts index parameters"""
|
|
2
|
+
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Optional, Union
|
|
4
5
|
|
|
6
|
+
|
|
5
7
|
class FtsParser(Enum):
|
|
6
8
|
"""Built-in full-text search parser types supported by OceanBase"""
|
|
9
|
+
|
|
7
10
|
IK = 0
|
|
8
11
|
NGRAM = 1
|
|
9
12
|
NGRAM2 = 2 # NGRAM2 parser (supported from V4.3.5 BP2+)
|
|
@@ -13,13 +16,14 @@ class FtsParser(Enum):
|
|
|
13
16
|
|
|
14
17
|
class FtsIndexParam:
|
|
15
18
|
"""Full-text search index parameter.
|
|
16
|
-
|
|
19
|
+
|
|
17
20
|
Args:
|
|
18
21
|
index_name: Index name
|
|
19
22
|
field_names: List of field names to create full-text index on
|
|
20
23
|
parser_type: Parser type, can be FtsParser enum or string (for custom parsers)
|
|
21
24
|
If None, uses default Space parser
|
|
22
25
|
"""
|
|
26
|
+
|
|
23
27
|
def __init__(
|
|
24
28
|
self,
|
|
25
29
|
index_name: str,
|
|
@@ -34,11 +38,11 @@ class FtsIndexParam:
|
|
|
34
38
|
"""Convert parser type to string format for SQL."""
|
|
35
39
|
if self.parser_type is None:
|
|
36
40
|
return None # Default Space parser, no need to specify
|
|
37
|
-
|
|
41
|
+
|
|
38
42
|
if isinstance(self.parser_type, str):
|
|
39
43
|
# Custom parser name (e.g., "thai_ftparser")
|
|
40
44
|
return self.parser_type.lower()
|
|
41
|
-
|
|
45
|
+
|
|
42
46
|
if isinstance(self.parser_type, FtsParser):
|
|
43
47
|
if self.parser_type == FtsParser.IK:
|
|
44
48
|
return "ik"
|
|
@@ -52,7 +56,7 @@ class FtsIndexParam:
|
|
|
52
56
|
return "jieba"
|
|
53
57
|
# Raise exception for unrecognized FtsParser enum values
|
|
54
58
|
raise ValueError(f"Unrecognized FtsParser enum value: {self.parser_type}")
|
|
55
|
-
|
|
59
|
+
|
|
56
60
|
return None
|
|
57
61
|
|
|
58
62
|
def __iter__(self):
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""OceanBase Hybrid Search Client."""
|
|
2
|
+
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Any
|
|
@@ -27,7 +28,7 @@ class HybridSearch(Client):
|
|
|
27
28
|
super().__init__(uri, user, password, db_name, **kwargs)
|
|
28
29
|
|
|
29
30
|
min_required_version = ObVersion.from_db_version_nums(4, 4, 1, 0)
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
if self.ob_version < min_required_version:
|
|
32
33
|
# For versions < 4.4.1.0, check if it's SeekDB
|
|
33
34
|
if self._is_seekdb():
|
|
@@ -35,7 +36,8 @@ class HybridSearch(Client):
|
|
|
35
36
|
return
|
|
36
37
|
raise ClusterVersionException(
|
|
37
38
|
code=ErrorCode.NOT_SUPPORTED,
|
|
38
|
-
message=ExceptionsMessage.ClusterVersionIsLow
|
|
39
|
+
message=ExceptionsMessage.ClusterVersionIsLow
|
|
40
|
+
% ("Hybrid Search", "4.4.1.0"),
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
def search(
|
|
@@ -60,7 +62,11 @@ class HybridSearch(Client):
|
|
|
60
62
|
|
|
61
63
|
with self.engine.connect() as conn:
|
|
62
64
|
with conn.begin():
|
|
63
|
-
res = conn.execute(
|
|
65
|
+
res = conn.execute(
|
|
66
|
+
sql, {"index": index, "body_str": body_str}
|
|
67
|
+
).fetchone()
|
|
68
|
+
if res[0] is None:
|
|
69
|
+
return []
|
|
64
70
|
return json.loads(res[0])
|
|
65
71
|
|
|
66
72
|
def get_sql(
|
|
@@ -83,5 +89,9 @@ class HybridSearch(Client):
|
|
|
83
89
|
|
|
84
90
|
with self.engine.connect() as conn:
|
|
85
91
|
with conn.begin():
|
|
86
|
-
res = conn.execute(
|
|
92
|
+
res = conn.execute(
|
|
93
|
+
sql, {"index": index, "body_str": body_str}
|
|
94
|
+
).fetchone()
|
|
95
|
+
if res[0] is None:
|
|
96
|
+
return ""
|
|
87
97
|
return res[0]
|