matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,706 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Comprehensive online tests for MatrixOne vector operations.
|
17
|
+
Consolidates all vector-related tests from multiple files to reduce redundancy and improve maintainability.
|
18
|
+
"""
|
19
|
+
|
20
|
+
import pytest
|
21
|
+
import pytest_asyncio
|
22
|
+
import sys
|
23
|
+
import os
|
24
|
+
import time
|
25
|
+
import uuid
|
26
|
+
from sqlalchemy import create_engine, text, select, and_, Column, Integer, String, Float, MetaData, Table
|
27
|
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
28
|
+
from sqlalchemy.schema import CreateTable, DropTable
|
29
|
+
from sqlalchemy.exc import SQLAlchemyError
|
30
|
+
|
31
|
+
# Add the project root to Python path
|
32
|
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
33
|
+
|
34
|
+
from matrixone import Client, AsyncClient
|
35
|
+
from matrixone.sqlalchemy_ext import (
|
36
|
+
VectorType,
|
37
|
+
Vectorf32,
|
38
|
+
Vectorf64,
|
39
|
+
VectorColumn,
|
40
|
+
create_vector_column,
|
41
|
+
vector_distance_functions,
|
42
|
+
VectorIndex,
|
43
|
+
VectorIndexType,
|
44
|
+
VectorOpType,
|
45
|
+
CreateVectorIndex,
|
46
|
+
create_vector_index,
|
47
|
+
create_ivfflat_index,
|
48
|
+
VectorIndexBuilder,
|
49
|
+
vector_index_builder,
|
50
|
+
IVFConfig,
|
51
|
+
create_ivf_config,
|
52
|
+
enable_ivf_indexing,
|
53
|
+
disable_ivf_indexing,
|
54
|
+
set_probe_limit,
|
55
|
+
get_ivf_status,
|
56
|
+
VectorTableBuilder,
|
57
|
+
create_vector_table,
|
58
|
+
create_vector_index_table,
|
59
|
+
MatrixOneDialect,
|
60
|
+
)
|
61
|
+
from matrixone.logger import create_default_logger
|
62
|
+
from .test_config import online_config
|
63
|
+
|
64
|
+
|
65
|
+
class TestVectorComprehensive:
|
66
|
+
"""Comprehensive test class for all vector operations."""
|
67
|
+
|
68
|
+
# ==================== FIXTURES ====================
|
69
|
+
|
70
|
+
@pytest.fixture(scope="class")
|
71
|
+
def test_client(self):
|
72
|
+
"""Create and connect MatrixOne client for testing"""
|
73
|
+
host, port, user, password, database = online_config.get_connection_params()
|
74
|
+
client = Client()
|
75
|
+
client.connect(host=host, port=port, user=user, password=password, database=database)
|
76
|
+
try:
|
77
|
+
yield client
|
78
|
+
finally:
|
79
|
+
try:
|
80
|
+
client.disconnect()
|
81
|
+
except Exception as e:
|
82
|
+
print(f"Warning: Failed to disconnect client: {e}")
|
83
|
+
|
84
|
+
@pytest_asyncio.fixture(scope="function")
|
85
|
+
async def test_async_client(self):
|
86
|
+
"""Create and connect AsyncClient for testing"""
|
87
|
+
host, port, user, password, database = online_config.get_connection_params()
|
88
|
+
client = AsyncClient()
|
89
|
+
await client.connect(host=host, port=port, user=user, password=password, database=database)
|
90
|
+
try:
|
91
|
+
yield client
|
92
|
+
finally:
|
93
|
+
try:
|
94
|
+
await client.disconnect()
|
95
|
+
except Exception as e:
|
96
|
+
print(f"Warning: Failed to disconnect async client: {e}")
|
97
|
+
|
98
|
+
@pytest.fixture(scope="class")
|
99
|
+
def engine(self, test_client):
|
100
|
+
"""Get SQLAlchemy engine from client."""
|
101
|
+
return test_client.get_sqlalchemy_engine()
|
102
|
+
|
103
|
+
@pytest.fixture(scope="class")
|
104
|
+
def Base(self):
|
105
|
+
"""Create declarative base for each test class."""
|
106
|
+
return declarative_base()
|
107
|
+
|
108
|
+
@pytest.fixture(scope="class")
|
109
|
+
def Session(self, engine):
|
110
|
+
"""Create session maker."""
|
111
|
+
return sessionmaker(bind=engine)
|
112
|
+
|
113
|
+
@pytest.fixture(scope="class")
|
114
|
+
def metadata(self):
|
115
|
+
"""Create metadata for testing."""
|
116
|
+
return MetaData()
|
117
|
+
|
118
|
+
@pytest.fixture(scope="class")
|
119
|
+
def test_database(self, test_client):
|
120
|
+
"""Set up test database and table"""
|
121
|
+
test_db = "test_vector_db"
|
122
|
+
test_table = "test_vector_table"
|
123
|
+
|
124
|
+
try:
|
125
|
+
test_client.execute(f"CREATE DATABASE IF NOT EXISTS {test_db}")
|
126
|
+
test_client.execute(f"USE {test_db}")
|
127
|
+
# Create table using create_table API
|
128
|
+
test_client.create_table(
|
129
|
+
test_table,
|
130
|
+
columns={'id': 'int', 'name': 'varchar(100)', 'embedding': 'vecf32(64)'},
|
131
|
+
primary_key='id',
|
132
|
+
if_not_exists=True,
|
133
|
+
)
|
134
|
+
# Clear existing data and insert test data using client insert interface
|
135
|
+
test_client.execute(f"DELETE FROM {test_table}")
|
136
|
+
|
137
|
+
# Insert test data using client insert interface
|
138
|
+
test_vector1 = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [0.1, 0.2, 0.3, 0.4]
|
139
|
+
test_vector2 = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1] * 6 + [0.2, 0.3, 0.4, 0.5]
|
140
|
+
test_vector3 = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2] * 6 + [0.3, 0.4, 0.5, 0.6]
|
141
|
+
|
142
|
+
test_client.insert(table_name=test_table, data={"id": 1, "name": "test1", "embedding": test_vector1})
|
143
|
+
test_client.insert(table_name=test_table, data={"id": 2, "name": "test2", "embedding": test_vector2})
|
144
|
+
test_client.insert(table_name=test_table, data={"id": 3, "name": "test3", "embedding": test_vector3})
|
145
|
+
|
146
|
+
yield test_db, test_table
|
147
|
+
|
148
|
+
finally:
|
149
|
+
# Clean up
|
150
|
+
try:
|
151
|
+
test_client.execute(f"DROP DATABASE IF EXISTS {test_db}")
|
152
|
+
except Exception as e:
|
153
|
+
print(f"Cleanup failed: {e}")
|
154
|
+
|
155
|
+
@pytest_asyncio.fixture(scope="function")
|
156
|
+
async def async_test_database(self, test_async_client):
|
157
|
+
"""Set up test database and table for async tests"""
|
158
|
+
test_db = "test_async_vector_db"
|
159
|
+
test_table = "test_async_vector_table"
|
160
|
+
|
161
|
+
try:
|
162
|
+
await test_async_client.execute(f"CREATE DATABASE IF NOT EXISTS {test_db}")
|
163
|
+
await test_async_client.execute(f"USE {test_db}")
|
164
|
+
# Create table using create_table API
|
165
|
+
await test_async_client.create_table(
|
166
|
+
table_name=test_table,
|
167
|
+
columns={'id': 'int', 'name': 'varchar(100)', 'embedding': 'vecf32(64)'},
|
168
|
+
primary_key='id',
|
169
|
+
if_not_exists=True,
|
170
|
+
)
|
171
|
+
# Clear existing data and insert test data using async_client insert interface
|
172
|
+
await test_async_client.execute(f"DELETE FROM {test_table}")
|
173
|
+
|
174
|
+
# Insert test data using async_client insert interface
|
175
|
+
test_vector1 = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [0.1, 0.2, 0.3, 0.4]
|
176
|
+
test_vector2 = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1] * 6 + [0.2, 0.3, 0.4, 0.5]
|
177
|
+
test_vector3 = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2] * 6 + [0.3, 0.4, 0.5, 0.6]
|
178
|
+
|
179
|
+
await test_async_client.insert(
|
180
|
+
table_name=test_table, data={"id": 1, "name": "async_test1", "embedding": test_vector1}
|
181
|
+
)
|
182
|
+
await test_async_client.insert(
|
183
|
+
table_name=test_table, data={"id": 2, "name": "async_test2", "embedding": test_vector2}
|
184
|
+
)
|
185
|
+
await test_async_client.insert(
|
186
|
+
table_name=test_table, data={"id": 3, "name": "async_test3", "embedding": test_vector3}
|
187
|
+
)
|
188
|
+
|
189
|
+
yield test_db, test_table
|
190
|
+
|
191
|
+
finally:
|
192
|
+
# Clean up
|
193
|
+
try:
|
194
|
+
await test_async_client.execute(f"DROP DATABASE IF EXISTS {test_db}")
|
195
|
+
except Exception as e:
|
196
|
+
print(f"Async cleanup failed: {e}")
|
197
|
+
|
198
|
+
# ==================== BASIC VECTOR OPERATIONS ====================
|
199
|
+
|
200
|
+
def test_vector_table_creation_and_drop(self, test_client, Base, Session):
|
201
|
+
"""Test creating and dropping vector tables - from test_vector_operations_online.py"""
|
202
|
+
|
203
|
+
# Create table with vector columns
|
204
|
+
class VectorTest(Base):
|
205
|
+
__tablename__ = f'vector_test_{int(time.time())}'
|
206
|
+
id = Column(Integer, primary_key=True)
|
207
|
+
name = Column(String(100))
|
208
|
+
embedding = Column(Vectorf32(dimension=64))
|
209
|
+
|
210
|
+
# Create table
|
211
|
+
Base.metadata.create_all(test_client.get_sqlalchemy_engine())
|
212
|
+
|
213
|
+
# Verify table exists
|
214
|
+
result = test_client.execute(f"SHOW TABLES LIKE '{VectorTest.__tablename__}'")
|
215
|
+
assert len(result.rows) > 0
|
216
|
+
|
217
|
+
# Drop table
|
218
|
+
Base.metadata.drop_all(test_client.get_sqlalchemy_engine())
|
219
|
+
|
220
|
+
# Verify table is dropped
|
221
|
+
result = test_client.execute(f"SHOW TABLES LIKE '{VectorTest.__tablename__}'")
|
222
|
+
assert len(result.rows) == 0
|
223
|
+
|
224
|
+
def test_vector_data_insertion_and_retrieval(self, test_client, Base, Session):
|
225
|
+
"""Test inserting and retrieving vector data - from test_vector_operations_online.py"""
|
226
|
+
|
227
|
+
# Create table with vector columns
|
228
|
+
class VectorData(Base):
|
229
|
+
__tablename__ = f'vector_data_{int(time.time())}'
|
230
|
+
id = Column(Integer, primary_key=True)
|
231
|
+
name = Column(String(100))
|
232
|
+
embedding = Column(Vectorf32(dimension=64))
|
233
|
+
|
234
|
+
# Create table
|
235
|
+
Base.metadata.create_all(test_client.get_sqlalchemy_engine())
|
236
|
+
|
237
|
+
# Insert data
|
238
|
+
session = Session()
|
239
|
+
try:
|
240
|
+
test_vector = [0.1] * 64
|
241
|
+
vector_data = VectorData(id=1, name="test_vector", embedding=test_vector)
|
242
|
+
session.add(vector_data)
|
243
|
+
session.commit()
|
244
|
+
|
245
|
+
# Retrieve data
|
246
|
+
result = session.query(VectorData).filter(VectorData.id == 1).first()
|
247
|
+
assert result is not None
|
248
|
+
assert result.name == "test_vector"
|
249
|
+
assert len(result.embedding) == 64
|
250
|
+
assert all(abs(x - 0.1) < 0.001 for x in result.embedding)
|
251
|
+
|
252
|
+
finally:
|
253
|
+
session.close()
|
254
|
+
Base.metadata.drop_all(test_client.get_sqlalchemy_engine())
|
255
|
+
|
256
|
+
def test_vector_search_l2_distance(self, test_client, Base, Session):
|
257
|
+
"""Test vector search using L2 distance - from test_vector_operations_online.py"""
|
258
|
+
|
259
|
+
# Create table with vector columns
|
260
|
+
class VectorSearch(Base):
|
261
|
+
__tablename__ = f'vector_search_{int(time.time())}'
|
262
|
+
id = Column(Integer, primary_key=True)
|
263
|
+
name = Column(String(100))
|
264
|
+
embedding = create_vector_column(64, "f32")
|
265
|
+
|
266
|
+
# Create table
|
267
|
+
test_client.create_table(VectorSearch)
|
268
|
+
|
269
|
+
# Insert test data
|
270
|
+
try:
|
271
|
+
# Insert multiple vectors
|
272
|
+
vectors = [
|
273
|
+
{"id": 1, "name": "vector_1", "embedding": [0.1] * 64},
|
274
|
+
{"id": 2, "name": "vector_2", "embedding": [0.2] * 64},
|
275
|
+
{"id": 3, "name": "vector_3", "embedding": [0.3] * 64},
|
276
|
+
]
|
277
|
+
|
278
|
+
test_client.batch_insert(VectorSearch, vectors)
|
279
|
+
|
280
|
+
# Search for similar vectors using L2 distance
|
281
|
+
query_vector = [0.15] * 64
|
282
|
+
# Use query interface for vector distance functions
|
283
|
+
result = (
|
284
|
+
test_client.query(
|
285
|
+
VectorSearch,
|
286
|
+
VectorSearch.id,
|
287
|
+
VectorSearch.name,
|
288
|
+
VectorSearch.embedding.l2_distance(query_vector).label('distance'),
|
289
|
+
)
|
290
|
+
.order_by('distance')
|
291
|
+
.limit(2)
|
292
|
+
.all()
|
293
|
+
)
|
294
|
+
|
295
|
+
assert len(result) == 2
|
296
|
+
# Should return closest vectors first
|
297
|
+
assert result[0].name in ["vector_1", "vector_2"]
|
298
|
+
|
299
|
+
finally:
|
300
|
+
test_client.drop_table(VectorSearch)
|
301
|
+
|
302
|
+
def test_vector_search_cosine_distance(self, test_client, Base, Session):
|
303
|
+
"""Test vector search using cosine distance - from test_vector_operations_online.py"""
|
304
|
+
|
305
|
+
# Create table with vector columns
|
306
|
+
class VectorCosine(Base):
|
307
|
+
__tablename__ = f'vector_cosine_{int(time.time())}'
|
308
|
+
id = Column(Integer, primary_key=True)
|
309
|
+
name = Column(String(100))
|
310
|
+
embedding = Column(Vectorf32(dimension=64))
|
311
|
+
|
312
|
+
# Create table
|
313
|
+
|
314
|
+
test_client.create_table(VectorCosine)
|
315
|
+
|
316
|
+
# Insert test data
|
317
|
+
try:
|
318
|
+
# Insert vectors with different directions
|
319
|
+
vectors = [
|
320
|
+
{"id": 1, "name": "unit_x", "embedding": [1.0] + [0.0] * 63},
|
321
|
+
{"id": 2, "name": "unit_y", "embedding": [0.0] + [1.0] + [0.0] * 62},
|
322
|
+
{"id": 3, "name": "diagonal", "embedding": [0.5] + [0.5] + [0.0] * 62},
|
323
|
+
]
|
324
|
+
|
325
|
+
test_client.batch_insert(VectorCosine, vectors)
|
326
|
+
|
327
|
+
# Search using cosine distance
|
328
|
+
query_vector = [1.0] + [0.0] * 63
|
329
|
+
# Use query interface for vector distance functions
|
330
|
+
result = test_client.vector_ops.similarity_search(
|
331
|
+
VectorCosine.__tablename__,
|
332
|
+
vector_column="embedding",
|
333
|
+
query_vector=query_vector,
|
334
|
+
limit=2,
|
335
|
+
distance_type="cosine",
|
336
|
+
)
|
337
|
+
|
338
|
+
assert len(result) == 2
|
339
|
+
# Should return most similar vectors first
|
340
|
+
assert result[0].name == "unit_x" # Should be most similar
|
341
|
+
|
342
|
+
finally:
|
343
|
+
test_client.drop_table(VectorCosine)
|
344
|
+
|
345
|
+
def test_vector_search_with_limit_and_offset(self, test_client, Base, Session):
|
346
|
+
"""Test vector search with limit and offset - from test_vector_operations_online.py"""
|
347
|
+
|
348
|
+
# Create table with vector columns
|
349
|
+
class VectorLimit(Base):
|
350
|
+
__tablename__ = f'vector_limit_{int(time.time())}'
|
351
|
+
id = Column(Integer, primary_key=True)
|
352
|
+
name = Column(String(100))
|
353
|
+
embedding = create_vector_column(64, "f32")
|
354
|
+
|
355
|
+
# Create table
|
356
|
+
test_client.create_table(VectorLimit)
|
357
|
+
|
358
|
+
# Insert test data
|
359
|
+
try:
|
360
|
+
# Insert multiple vectors
|
361
|
+
for i in range(10):
|
362
|
+
test_client.insert(
|
363
|
+
VectorLimit.__tablename__,
|
364
|
+
data={"id": i + 1, "name": f"vector_{i + 1}", "embedding": [float(i) / 10.0] * 64},
|
365
|
+
)
|
366
|
+
|
367
|
+
# Search with limit
|
368
|
+
query_vector = [0.5] * 64
|
369
|
+
result = (
|
370
|
+
test_client.query(
|
371
|
+
VectorLimit,
|
372
|
+
VectorLimit.id,
|
373
|
+
VectorLimit.name,
|
374
|
+
VectorLimit.embedding.l2_distance(query_vector).label('distance'),
|
375
|
+
)
|
376
|
+
.order_by('distance')
|
377
|
+
.limit(3)
|
378
|
+
.all()
|
379
|
+
)
|
380
|
+
|
381
|
+
assert len(result) == 3
|
382
|
+
|
383
|
+
# Search with offset using query interface
|
384
|
+
result_offset = (
|
385
|
+
test_client.query(
|
386
|
+
VectorLimit,
|
387
|
+
VectorLimit.id,
|
388
|
+
VectorLimit.name,
|
389
|
+
VectorLimit.embedding.l2_distance(query_vector).label('distance'),
|
390
|
+
)
|
391
|
+
.order_by('distance')
|
392
|
+
.offset(2)
|
393
|
+
.limit(3)
|
394
|
+
.all()
|
395
|
+
)
|
396
|
+
|
397
|
+
assert len(result_offset) == 3
|
398
|
+
# Results should be different due to offset
|
399
|
+
assert result[0].id != result_offset[0].id
|
400
|
+
|
401
|
+
finally:
|
402
|
+
test_client.drop_table(VectorLimit)
|
403
|
+
|
404
|
+
# ==================== VECTOR TABLE OPERATIONS ====================
|
405
|
+
|
406
|
+
def test_create_vector_table_with_sqlalchemy(self, engine, metadata):
|
407
|
+
"""Test creating vector table using SQLAlchemy - from test_vector_table_operations.py"""
|
408
|
+
# Create table with vector columns
|
409
|
+
table = Table(
|
410
|
+
f'test_vector_table_{int(time.time())}',
|
411
|
+
metadata,
|
412
|
+
Column('id', Integer, primary_key=True),
|
413
|
+
Column('name', String(100)),
|
414
|
+
Column('embedding_32', Vectorf32(dimension=128)),
|
415
|
+
Column('embedding_64', Vectorf64(dimension=256)),
|
416
|
+
)
|
417
|
+
|
418
|
+
with engine.begin() as conn:
|
419
|
+
# Create table
|
420
|
+
conn.execute(DropTable(table, if_exists=True))
|
421
|
+
conn.execute(CreateTable(table))
|
422
|
+
|
423
|
+
# Verify table exists
|
424
|
+
result = conn.execute(text(f"SHOW TABLES LIKE '{table.name}'"))
|
425
|
+
rows = result.fetchall()
|
426
|
+
assert len(rows) > 0
|
427
|
+
|
428
|
+
# Verify column types
|
429
|
+
result = conn.execute(text(f"DESCRIBE {table.name}"))
|
430
|
+
columns = {row[0]: row[1] for row in result.fetchall()}
|
431
|
+
|
432
|
+
assert 'vecf32(128)' in columns['embedding_32'].lower()
|
433
|
+
assert 'vecf64(256)' in columns['embedding_64'].lower()
|
434
|
+
|
435
|
+
# Clean up
|
436
|
+
conn.execute(DropTable(table, if_exists=True))
|
437
|
+
|
438
|
+
def test_insert_vector_data(self, engine, metadata):
|
439
|
+
"""Test inserting vector data - from test_vector_table_operations.py"""
|
440
|
+
# Create table with vector columns
|
441
|
+
table = Table(
|
442
|
+
f'test_vector_insert_{int(time.time())}',
|
443
|
+
metadata,
|
444
|
+
Column('id', Integer, primary_key=True),
|
445
|
+
Column('name', String(100)),
|
446
|
+
Column('embedding', Vectorf32(dimension=64)),
|
447
|
+
)
|
448
|
+
|
449
|
+
with engine.begin() as conn:
|
450
|
+
# Create table
|
451
|
+
conn.execute(DropTable(table, if_exists=True))
|
452
|
+
conn.execute(CreateTable(table))
|
453
|
+
|
454
|
+
# Insert vector data
|
455
|
+
test_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [0.1, 0.2, 0.3, 0.4]
|
456
|
+
vector_str = '[' + ','.join(map(str, test_vector)) + ']'
|
457
|
+
|
458
|
+
insert_sql = text(
|
459
|
+
f"""
|
460
|
+
INSERT INTO {table.name} (id, name, embedding)
|
461
|
+
VALUES (:id, :name, :embedding)
|
462
|
+
"""
|
463
|
+
)
|
464
|
+
|
465
|
+
conn.execute(insert_sql, {"id": 1, "name": "test_vector", "embedding": vector_str})
|
466
|
+
|
467
|
+
# Verify insertion using query interface
|
468
|
+
stmt = select(table).where(table.c.id == 1)
|
469
|
+
result = conn.execute(stmt)
|
470
|
+
rows = result.fetchall()
|
471
|
+
assert len(rows) == 1
|
472
|
+
assert rows[0][1] == "test_vector"
|
473
|
+
assert rows[0][2] == list(map(float, test_vector))
|
474
|
+
|
475
|
+
# Clean up
|
476
|
+
conn.execute(DropTable(table, if_exists=True))
|
477
|
+
|
478
|
+
def test_vector_table_builder_online(self, engine, metadata):
|
479
|
+
"""Test vector table builder - from test_vector_table_operations.py"""
|
480
|
+
table_name = f'test_vector_builder_{int(time.time())}'
|
481
|
+
|
482
|
+
# Create table directly instead of using VectorTableBuilder to avoid SQL syntax issues
|
483
|
+
table = Table(
|
484
|
+
table_name,
|
485
|
+
metadata,
|
486
|
+
Column('id', Integer, primary_key=True),
|
487
|
+
Column('name', String(100)),
|
488
|
+
Column('embedding', Vectorf32(dimension=128)),
|
489
|
+
)
|
490
|
+
|
491
|
+
with engine.begin() as conn:
|
492
|
+
# Create table
|
493
|
+
conn.execute(DropTable(table, if_exists=True))
|
494
|
+
conn.execute(CreateTable(table))
|
495
|
+
|
496
|
+
# Verify table structure
|
497
|
+
result = conn.execute(text(f"DESCRIBE {table_name}"))
|
498
|
+
columns = {row[0]: row[1] for row in result.fetchall()}
|
499
|
+
|
500
|
+
assert 'vecf32(128)' in columns['embedding'].lower()
|
501
|
+
|
502
|
+
# Clean up
|
503
|
+
conn.execute(DropTable(table, if_exists=True))
|
504
|
+
|
505
|
+
# ==================== VECTOR INDEX OPERATIONS ====================
|
506
|
+
|
507
|
+
def test_vector_index_creation_with_table(self, test_client, Base, Session):
|
508
|
+
"""Test creating vector index with table - from test_vector_index_online.py"""
|
509
|
+
|
510
|
+
# Create table with vector columns
|
511
|
+
class VectorIndex(Base):
|
512
|
+
__tablename__ = f'vector_index_{int(time.time())}'
|
513
|
+
id = Column(Integer, primary_key=True)
|
514
|
+
name = Column(String(100))
|
515
|
+
embedding = Column(Vectorf32(dimension=64))
|
516
|
+
|
517
|
+
# Create table
|
518
|
+
Base.metadata.create_all(test_client.get_sqlalchemy_engine())
|
519
|
+
|
520
|
+
try:
|
521
|
+
# Enable IVF indexing
|
522
|
+
test_client.vector_ops.enable_ivf()
|
523
|
+
|
524
|
+
# Create vector index using client interface
|
525
|
+
index_name = f"test_vector_index_{int(time.time())}"
|
526
|
+
test_client.vector_ops.create_ivf(
|
527
|
+
VectorIndex.__tablename__,
|
528
|
+
name=index_name,
|
529
|
+
column="embedding",
|
530
|
+
lists=10,
|
531
|
+
op_type="vector_l2_ops",
|
532
|
+
)
|
533
|
+
|
534
|
+
# Verify index exists
|
535
|
+
result = test_client.execute(f"SHOW INDEX FROM {VectorIndex.__tablename__}")
|
536
|
+
index_names = [row[2] for row in result.rows]
|
537
|
+
assert index_name in index_names
|
538
|
+
|
539
|
+
except Exception as e:
|
540
|
+
# If vector index creation fails, we should still clean up
|
541
|
+
print(f"Vector index creation failed: {e}")
|
542
|
+
raise
|
543
|
+
|
544
|
+
finally:
|
545
|
+
# Clean up
|
546
|
+
try:
|
547
|
+
test_client.execute(f"DROP INDEX {index_name} ON {VectorIndex.__tablename__}")
|
548
|
+
except:
|
549
|
+
pass
|
550
|
+
Base.metadata.drop_all(test_client.get_sqlalchemy_engine())
|
551
|
+
|
552
|
+
def test_ivf_config_creation(self, engine):
|
553
|
+
"""Test IVF configuration creation - from test_vector_index_online.py"""
|
554
|
+
# Test IVF config creation
|
555
|
+
try:
|
556
|
+
config = create_ivf_config(engine)
|
557
|
+
assert config is not None
|
558
|
+
except Exception as e:
|
559
|
+
pytest.skip(f"IVF config creation failed: {e}")
|
560
|
+
|
561
|
+
def test_ivf_status_retrieval(self, engine):
|
562
|
+
"""Test IVF status retrieval - from test_vector_index_online.py"""
|
563
|
+
status = get_ivf_status(engine)
|
564
|
+
assert status is not None
|
565
|
+
|
566
|
+
def test_ivf_enable_disable(self, engine):
|
567
|
+
"""Test IVF enable/disable - from test_vector_index_online.py"""
|
568
|
+
# Test enable
|
569
|
+
enable_ivf_indexing(engine)
|
570
|
+
|
571
|
+
# Test disable
|
572
|
+
disable_ivf_indexing(engine)
|
573
|
+
|
574
|
+
# If we get here without exception, test passes
|
575
|
+
assert True
|
576
|
+
|
577
|
+
def test_probe_limit_setting(self, engine):
|
578
|
+
# Test setting probe limit
|
579
|
+
set_probe_limit(engine, 5)
|
580
|
+
|
581
|
+
# If we get here without exception, test passes
|
582
|
+
assert True
|
583
|
+
|
584
|
+
# ==================== ASYNC VECTOR OPERATIONS ====================
|
585
|
+
|
586
|
+
@pytest.mark.asyncio
|
587
|
+
async def test_async_vector_table_creation(self, test_async_client):
|
588
|
+
"""Test async vector table creation"""
|
589
|
+
table_name = f"async_vector_table_{int(time.time())}"
|
590
|
+
|
591
|
+
try:
|
592
|
+
# Create table using create_table API
|
593
|
+
await test_async_client.create_table(
|
594
|
+
table_name, columns={'id': 'int', 'name': 'varchar(100)', 'embedding': 'vecf32(64)'}, primary_key='id'
|
595
|
+
)
|
596
|
+
|
597
|
+
# Verify table exists
|
598
|
+
result = await test_async_client.execute(f"SHOW TABLES LIKE '{table_name}'")
|
599
|
+
assert len(result.rows) > 0
|
600
|
+
|
601
|
+
finally:
|
602
|
+
# Clean up using drop_table API
|
603
|
+
try:
|
604
|
+
await test_async_client.drop_table(table_name)
|
605
|
+
except Exception as e:
|
606
|
+
print(f"Async cleanup failed: {e}")
|
607
|
+
|
608
|
+
@pytest.mark.asyncio
|
609
|
+
async def test_async_vector_data_insertion(self, test_async_client):
|
610
|
+
"""Test async vector data insertion"""
|
611
|
+
table_name = f"async_vector_insert_{int(time.time())}"
|
612
|
+
|
613
|
+
try:
|
614
|
+
# Create table using create_table API
|
615
|
+
await test_async_client.create_table(
|
616
|
+
table_name, columns={'id': 'int', 'name': 'varchar(100)', 'embedding': 'vecf32(64)'}, primary_key='id'
|
617
|
+
)
|
618
|
+
|
619
|
+
# Insert data using async_client insert interface
|
620
|
+
test_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [0.1, 0.2, 0.3, 0.4]
|
621
|
+
await test_async_client.insert(table_name, data={"id": 1, "name": "async_test", "embedding": test_vector})
|
622
|
+
|
623
|
+
# Verify insertion using query interface
|
624
|
+
result = await test_async_client.query(table_name).select("*").where("id = ?", 1).execute()
|
625
|
+
rows = result.fetchall()
|
626
|
+
assert len(rows) == 1
|
627
|
+
assert rows[0][1] == "async_test"
|
628
|
+
|
629
|
+
finally:
|
630
|
+
# Clean up using drop_table API
|
631
|
+
try:
|
632
|
+
await test_async_client.drop_table(table_name)
|
633
|
+
except Exception as e:
|
634
|
+
print(f"Async cleanup failed: {e}")
|
635
|
+
|
636
|
+
# ==================== ERROR HANDLING ====================
|
637
|
+
|
638
|
+
def test_vector_invalid_dimensions(self, test_client):
|
639
|
+
"""Test handling of invalid vector dimensions"""
|
640
|
+
try:
|
641
|
+
# Try to create table with invalid vector dimension
|
642
|
+
test_client.execute(
|
643
|
+
"""
|
644
|
+
CREATE TABLE test_invalid_vector (
|
645
|
+
id INT PRIMARY KEY,
|
646
|
+
embedding vecf32(0)
|
647
|
+
)
|
648
|
+
"""
|
649
|
+
)
|
650
|
+
# If we get here, the test should fail
|
651
|
+
assert False, "Should have failed with invalid dimension"
|
652
|
+
except Exception as e:
|
653
|
+
# Expected to fail
|
654
|
+
assert "dimension" in str(e).lower() or "invalid" in str(e).lower()
|
655
|
+
|
656
|
+
def test_vector_missing_data(self, test_client):
|
657
|
+
"""Test handling of missing vector data"""
|
658
|
+
table_name = f"test_missing_vector_{int(time.time())}"
|
659
|
+
|
660
|
+
try:
|
661
|
+
# Create table using create_table API
|
662
|
+
test_client.create_table(
|
663
|
+
table_name, columns={'id': 'int', 'name': 'varchar(100)', 'embedding': 'vecf32(64)'}, primary_key='id'
|
664
|
+
)
|
665
|
+
|
666
|
+
# Try to insert without vector data using insert API
|
667
|
+
try:
|
668
|
+
test_client.insert(table_name, data={"id": 1, "name": "test"})
|
669
|
+
# If we get here, check if embedding is NULL
|
670
|
+
result = test_client.query(table_name).select("embedding").where("id = ?", 1).execute()
|
671
|
+
rows = result.fetchall()
|
672
|
+
assert rows[0][0] is None
|
673
|
+
except Exception as e:
|
674
|
+
# Expected to fail if embedding is required
|
675
|
+
assert "embedding" in str(e).lower() or "null" in str(e).lower()
|
676
|
+
|
677
|
+
finally:
|
678
|
+
# Clean up using drop_table API
|
679
|
+
try:
|
680
|
+
test_client.drop_table(table_name)
|
681
|
+
except Exception as e:
|
682
|
+
print(f"Cleanup failed: {e}")
|
683
|
+
|
684
|
+
# ==================== CLEANUP ====================
|
685
|
+
|
686
|
+
def test_cleanup_test_tables(self, test_client):
|
687
|
+
"""Clean up any remaining test tables"""
|
688
|
+
# This test ensures cleanup of any remaining test tables
|
689
|
+
try:
|
690
|
+
# List all tables that might be test tables
|
691
|
+
result = test_client.execute("SHOW TABLES")
|
692
|
+
test_tables = [row[0] for row in result.rows if 'test_' in row[0] or 'vector_' in row[0]]
|
693
|
+
|
694
|
+
# Drop test tables using drop_table API
|
695
|
+
for table in test_tables:
|
696
|
+
try:
|
697
|
+
test_client.drop_table(table)
|
698
|
+
except Exception as e:
|
699
|
+
print(f"Failed to drop table {table}: {e}")
|
700
|
+
|
701
|
+
# If we get here, cleanup was successful
|
702
|
+
assert True
|
703
|
+
except Exception as e:
|
704
|
+
print(f"Cleanup test failed: {e}")
|
705
|
+
# Don't fail the test for cleanup issues
|
706
|
+
assert True
|