matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,506 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Online tests for IVF index statistics functionality.
|
17
|
+
Tests the get_ivf_stats method for both sync and async clients.
|
18
|
+
"""
|
19
|
+
|
20
|
+
import pytest
|
21
|
+
import pytest_asyncio
|
22
|
+
import sys
|
23
|
+
import os
|
24
|
+
import random
|
25
|
+
|
26
|
+
# Add the project root to Python path
|
27
|
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
28
|
+
|
29
|
+
from matrixone import Client, AsyncClient
|
30
|
+
from .test_config import online_config
|
31
|
+
|
32
|
+
|
33
|
+
class TestIVFStatsSync:
|
34
|
+
"""Test IVF stats functionality with synchronous client"""
|
35
|
+
|
36
|
+
@pytest.fixture(scope="class")
|
37
|
+
def test_client(self):
|
38
|
+
"""Create and connect MatrixOne client for testing"""
|
39
|
+
host, port, user, password, database = online_config.get_connection_params()
|
40
|
+
client = Client()
|
41
|
+
client.connect(host=host, port=port, user=user, password=password, database=database)
|
42
|
+
try:
|
43
|
+
yield client
|
44
|
+
finally:
|
45
|
+
try:
|
46
|
+
client.disconnect()
|
47
|
+
except Exception as e:
|
48
|
+
print(f"Warning: Failed to disconnect client: {e}")
|
49
|
+
|
50
|
+
@pytest.fixture(scope="function")
|
51
|
+
def test_table(self, test_client):
|
52
|
+
"""Create a test table with IVF index"""
|
53
|
+
table_name = "test_ivf_stats_table"
|
54
|
+
|
55
|
+
# Drop table if exists
|
56
|
+
try:
|
57
|
+
test_client.drop_table(table_name)
|
58
|
+
except:
|
59
|
+
pass
|
60
|
+
|
61
|
+
# Create table with vector column
|
62
|
+
test_client.create_table(
|
63
|
+
table_name, columns={"id": "int", "title": "varchar(255)", "embedding": "vecf32(128)"}, primary_key="id"
|
64
|
+
)
|
65
|
+
|
66
|
+
# Create IVF index
|
67
|
+
test_client.vector_ops.create_ivf(table_name, name="idx_test_embedding", column="embedding", lists=5)
|
68
|
+
|
69
|
+
# Insert sample data
|
70
|
+
for i in range(30):
|
71
|
+
vector = [random.random() for _ in range(128)]
|
72
|
+
test_client.vector_ops.insert(table_name, {"id": i + 1, "title": f"Document {i+1}", "embedding": vector})
|
73
|
+
|
74
|
+
yield table_name
|
75
|
+
|
76
|
+
# Cleanup
|
77
|
+
try:
|
78
|
+
test_client.drop_table(table_name)
|
79
|
+
except Exception as e:
|
80
|
+
print(f"Warning: Failed to drop table {table_name}: {e}")
|
81
|
+
|
82
|
+
def test_get_ivf_stats_with_column_name(self, test_client, test_table):
|
83
|
+
"""Test get_ivf_stats with explicit column name"""
|
84
|
+
stats = test_client.vector_ops.get_ivf_stats(test_table, "embedding")
|
85
|
+
|
86
|
+
# Verify structure
|
87
|
+
assert 'index_tables' in stats
|
88
|
+
assert 'distribution' in stats
|
89
|
+
assert 'database' in stats
|
90
|
+
assert 'table_name' in stats
|
91
|
+
assert 'column_name' in stats
|
92
|
+
|
93
|
+
# Verify values
|
94
|
+
assert stats['table_name'] == test_table
|
95
|
+
assert stats['column_name'] == 'embedding'
|
96
|
+
assert stats['database'] == test_client._connection_params['database']
|
97
|
+
|
98
|
+
# Verify index tables
|
99
|
+
assert 'metadata' in stats['index_tables']
|
100
|
+
assert 'centroids' in stats['index_tables']
|
101
|
+
assert 'entries' in stats['index_tables']
|
102
|
+
|
103
|
+
# Verify distribution
|
104
|
+
assert 'centroid_count' in stats['distribution']
|
105
|
+
assert 'centroid_id' in stats['distribution']
|
106
|
+
assert 'centroid_version' in stats['distribution']
|
107
|
+
|
108
|
+
# Verify distribution has data
|
109
|
+
assert isinstance(stats['distribution']['centroid_count'], list)
|
110
|
+
assert isinstance(stats['distribution']['centroid_id'], list)
|
111
|
+
assert isinstance(stats['distribution']['centroid_version'], list)
|
112
|
+
assert len(stats['distribution']['centroid_count']) > 0
|
113
|
+
assert len(stats['distribution']['centroid_id']) > 0
|
114
|
+
assert len(stats['distribution']['centroid_version']) > 0
|
115
|
+
|
116
|
+
def test_get_ivf_stats_auto_inference(self, test_client, test_table):
|
117
|
+
"""Test get_ivf_stats with auto-inferred column name"""
|
118
|
+
stats = test_client.vector_ops.get_ivf_stats(test_table)
|
119
|
+
|
120
|
+
# Verify auto-inference worked
|
121
|
+
assert stats['column_name'] == 'embedding'
|
122
|
+
|
123
|
+
# Verify structure
|
124
|
+
assert 'index_tables' in stats
|
125
|
+
assert 'distribution' in stats
|
126
|
+
|
127
|
+
def test_get_ivf_stats_within_transaction(self, test_client, test_table):
|
128
|
+
"""Test get_ivf_stats within transaction context"""
|
129
|
+
with test_client.transaction() as tx:
|
130
|
+
stats = tx.vector_ops.get_ivf_stats(test_table, "embedding")
|
131
|
+
|
132
|
+
# Verify structure
|
133
|
+
assert 'index_tables' in stats
|
134
|
+
assert 'distribution' in stats
|
135
|
+
assert stats['table_name'] == test_table
|
136
|
+
assert stats['column_name'] == 'embedding'
|
137
|
+
|
138
|
+
def test_get_ivf_stats_nonexistent_table(self, test_client):
|
139
|
+
"""Test get_ivf_stats with non-existent table"""
|
140
|
+
with pytest.raises(Exception) as exc_info:
|
141
|
+
test_client.vector_ops.get_ivf_stats("nonexistent_table", "embedding")
|
142
|
+
|
143
|
+
assert "No IVF index found" in str(exc_info.value)
|
144
|
+
|
145
|
+
def test_get_ivf_stats_no_ivf_index(self, test_client):
|
146
|
+
"""Test get_ivf_stats on table without IVF index"""
|
147
|
+
table_name = "test_no_ivf_index"
|
148
|
+
|
149
|
+
try:
|
150
|
+
# Create table without IVF index
|
151
|
+
test_client.create_table(table_name, columns={"id": "int", "embedding": "vecf32(128)"}, primary_key="id")
|
152
|
+
|
153
|
+
# Try to get stats - should fail
|
154
|
+
with pytest.raises(Exception) as exc_info:
|
155
|
+
test_client.vector_ops.get_ivf_stats(table_name, "embedding")
|
156
|
+
|
157
|
+
assert "No IVF index found" in str(exc_info.value)
|
158
|
+
|
159
|
+
finally:
|
160
|
+
try:
|
161
|
+
test_client.drop_table(table_name)
|
162
|
+
except:
|
163
|
+
pass
|
164
|
+
|
165
|
+
def test_get_ivf_stats_multiple_vector_columns(self, test_client):
|
166
|
+
"""Test get_ivf_stats with multiple vector columns"""
|
167
|
+
table_name = "test_multi_vector_cols"
|
168
|
+
|
169
|
+
try:
|
170
|
+
# Create table with multiple vector columns
|
171
|
+
test_client.create_table(
|
172
|
+
table_name, columns={"id": "int", "embedding1": "vecf32(128)", "embedding2": "vecf32(256)"}, primary_key="id"
|
173
|
+
)
|
174
|
+
|
175
|
+
# Create IVF index on first column
|
176
|
+
test_client.vector_ops.create_ivf(table_name, name="idx_embedding1", column="embedding1", lists=3)
|
177
|
+
|
178
|
+
# Insert some data
|
179
|
+
for i in range(10):
|
180
|
+
vector1 = [random.random() for _ in range(128)]
|
181
|
+
vector2 = [random.random() for _ in range(256)]
|
182
|
+
test_client.vector_ops.insert(table_name, {"id": i + 1, "embedding1": vector1, "embedding2": vector2})
|
183
|
+
|
184
|
+
# Without column_name - should raise error asking to specify
|
185
|
+
with pytest.raises(Exception) as exc_info:
|
186
|
+
test_client.vector_ops.get_ivf_stats(table_name)
|
187
|
+
|
188
|
+
assert "Multiple vector columns found" in str(exc_info.value)
|
189
|
+
assert "embedding1" in str(exc_info.value) or "embedding2" in str(exc_info.value)
|
190
|
+
|
191
|
+
# With explicit column_name - should work
|
192
|
+
stats = test_client.vector_ops.get_ivf_stats(table_name, "embedding1")
|
193
|
+
assert stats['column_name'] == 'embedding1'
|
194
|
+
|
195
|
+
finally:
|
196
|
+
try:
|
197
|
+
test_client.drop_table(table_name)
|
198
|
+
except:
|
199
|
+
pass
|
200
|
+
|
201
|
+
def test_get_ivf_stats_distribution_details(self, test_client, test_table):
|
202
|
+
"""Test that distribution details are correctly populated"""
|
203
|
+
stats = test_client.vector_ops.get_ivf_stats(test_table, "embedding")
|
204
|
+
|
205
|
+
distribution = stats['distribution']
|
206
|
+
|
207
|
+
# Verify all three lists have the same length
|
208
|
+
assert len(distribution['centroid_count']) == len(distribution['centroid_id'])
|
209
|
+
assert len(distribution['centroid_count']) == len(distribution['centroid_version'])
|
210
|
+
|
211
|
+
# Verify data types
|
212
|
+
for count in distribution['centroid_count']:
|
213
|
+
assert isinstance(count, int)
|
214
|
+
|
215
|
+
for centroid_id in distribution['centroid_id']:
|
216
|
+
assert isinstance(centroid_id, int)
|
217
|
+
|
218
|
+
for version in distribution['centroid_version']:
|
219
|
+
assert isinstance(version, int)
|
220
|
+
|
221
|
+
# Verify total count matches inserted data
|
222
|
+
total_vectors = sum(distribution['centroid_count'])
|
223
|
+
assert total_vectors == 30 # We inserted 30 vectors in the fixture
|
224
|
+
|
225
|
+
|
226
|
+
class TestIVFStatsAsync:
|
227
|
+
"""Test IVF stats functionality with asynchronous client"""
|
228
|
+
|
229
|
+
@pytest_asyncio.fixture(scope="function")
|
230
|
+
async def test_async_client(self):
|
231
|
+
"""Create and connect AsyncClient for testing"""
|
232
|
+
host, port, user, password, database = online_config.get_connection_params()
|
233
|
+
client = AsyncClient()
|
234
|
+
await client.connect(host=host, port=port, user=user, password=password, database=database)
|
235
|
+
try:
|
236
|
+
yield client
|
237
|
+
finally:
|
238
|
+
try:
|
239
|
+
await client.disconnect()
|
240
|
+
except Exception as e:
|
241
|
+
print(f"Warning: Failed to disconnect async client: {e}")
|
242
|
+
|
243
|
+
@pytest_asyncio.fixture(scope="function")
|
244
|
+
async def test_async_table(self, test_async_client):
|
245
|
+
"""Create a test table with IVF index for async tests"""
|
246
|
+
table_name = "test_async_ivf_stats_table"
|
247
|
+
|
248
|
+
# Drop table if exists
|
249
|
+
try:
|
250
|
+
await test_async_client.drop_table(table_name)
|
251
|
+
except:
|
252
|
+
pass
|
253
|
+
|
254
|
+
# Create table with vector column
|
255
|
+
await test_async_client.create_table(
|
256
|
+
table_name, columns={"id": "int", "title": "varchar(255)", "embedding": "vecf32(128)"}, primary_key="id"
|
257
|
+
)
|
258
|
+
|
259
|
+
# Create IVF index
|
260
|
+
await test_async_client.vector_ops.create_ivf(
|
261
|
+
table_name, name="idx_async_test_embedding", column="embedding", lists=4
|
262
|
+
)
|
263
|
+
|
264
|
+
# Insert sample data
|
265
|
+
for i in range(20):
|
266
|
+
vector = [random.random() for _ in range(128)]
|
267
|
+
await test_async_client.vector_ops.insert(
|
268
|
+
table_name, {"id": i + 1, "title": f"Async Document {i+1}", "embedding": vector}
|
269
|
+
)
|
270
|
+
|
271
|
+
yield table_name
|
272
|
+
|
273
|
+
# Cleanup
|
274
|
+
try:
|
275
|
+
await test_async_client.drop_table(table_name)
|
276
|
+
except Exception as e:
|
277
|
+
print(f"Warning: Failed to drop table {table_name}: {e}")
|
278
|
+
|
279
|
+
@pytest.mark.asyncio
|
280
|
+
async def test_async_get_ivf_stats_with_column_name(self, test_async_client, test_async_table):
|
281
|
+
"""Test async get_ivf_stats with explicit column name"""
|
282
|
+
stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table, "embedding")
|
283
|
+
|
284
|
+
# Verify structure
|
285
|
+
assert 'index_tables' in stats
|
286
|
+
assert 'distribution' in stats
|
287
|
+
assert 'database' in stats
|
288
|
+
assert 'table_name' in stats
|
289
|
+
assert 'column_name' in stats
|
290
|
+
|
291
|
+
# Verify values
|
292
|
+
assert stats['table_name'] == test_async_table
|
293
|
+
assert stats['column_name'] == 'embedding'
|
294
|
+
|
295
|
+
# Verify index tables
|
296
|
+
assert 'metadata' in stats['index_tables']
|
297
|
+
assert 'centroids' in stats['index_tables']
|
298
|
+
assert 'entries' in stats['index_tables']
|
299
|
+
|
300
|
+
# Verify distribution
|
301
|
+
assert 'centroid_count' in stats['distribution']
|
302
|
+
assert 'centroid_id' in stats['distribution']
|
303
|
+
assert 'centroid_version' in stats['distribution']
|
304
|
+
|
305
|
+
@pytest.mark.asyncio
|
306
|
+
async def test_async_get_ivf_stats_auto_inference(self, test_async_client, test_async_table):
|
307
|
+
"""Test async get_ivf_stats with auto-inferred column name"""
|
308
|
+
stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table)
|
309
|
+
|
310
|
+
# Verify auto-inference worked
|
311
|
+
assert stats['column_name'] == 'embedding'
|
312
|
+
|
313
|
+
# Verify structure
|
314
|
+
assert 'index_tables' in stats
|
315
|
+
assert 'distribution' in stats
|
316
|
+
|
317
|
+
@pytest.mark.asyncio
|
318
|
+
async def test_async_get_ivf_stats_within_transaction(self, test_async_client, test_async_table):
|
319
|
+
"""Test async get_ivf_stats within transaction context"""
|
320
|
+
async with test_async_client.transaction() as tx:
|
321
|
+
stats = await tx.vector_ops.get_ivf_stats(test_async_table, "embedding")
|
322
|
+
|
323
|
+
# Verify structure
|
324
|
+
assert 'index_tables' in stats
|
325
|
+
assert 'distribution' in stats
|
326
|
+
assert stats['table_name'] == test_async_table
|
327
|
+
assert stats['column_name'] == 'embedding'
|
328
|
+
|
329
|
+
@pytest.mark.asyncio
|
330
|
+
async def test_async_get_ivf_stats_distribution_details(self, test_async_client, test_async_table):
|
331
|
+
"""Test that async distribution details are correctly populated"""
|
332
|
+
stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table, "embedding")
|
333
|
+
|
334
|
+
distribution = stats['distribution']
|
335
|
+
|
336
|
+
# Verify all three lists have the same length
|
337
|
+
assert len(distribution['centroid_count']) == len(distribution['centroid_id'])
|
338
|
+
assert len(distribution['centroid_count']) == len(distribution['centroid_version'])
|
339
|
+
|
340
|
+
# Verify data types
|
341
|
+
for count in distribution['centroid_count']:
|
342
|
+
assert isinstance(count, int)
|
343
|
+
|
344
|
+
for centroid_id in distribution['centroid_id']:
|
345
|
+
assert isinstance(centroid_id, int)
|
346
|
+
|
347
|
+
for version in distribution['centroid_version']:
|
348
|
+
assert isinstance(version, int)
|
349
|
+
|
350
|
+
# Verify total count matches inserted data
|
351
|
+
total_vectors = sum(distribution['centroid_count'])
|
352
|
+
assert total_vectors == 20 # We inserted 20 vectors in the fixture
|
353
|
+
|
354
|
+
@pytest.mark.asyncio
|
355
|
+
async def test_async_get_ivf_stats_multiple_vector_columns(self, test_async_client):
|
356
|
+
"""Test async get_ivf_stats with multiple vector columns"""
|
357
|
+
table_name = "test_async_multi_vector_cols"
|
358
|
+
|
359
|
+
try:
|
360
|
+
# Create table with multiple vector columns
|
361
|
+
await test_async_client.create_table(
|
362
|
+
table_name, columns={"id": "int", "embedding1": "vecf32(128)", "embedding2": "vecf32(256)"}, primary_key="id"
|
363
|
+
)
|
364
|
+
|
365
|
+
# Create IVF index on first column
|
366
|
+
await test_async_client.vector_ops.create_ivf(
|
367
|
+
table_name, name="idx_async_embedding1", column="embedding1", lists=3
|
368
|
+
)
|
369
|
+
|
370
|
+
# Insert some data
|
371
|
+
for i in range(10):
|
372
|
+
vector1 = [random.random() for _ in range(128)]
|
373
|
+
vector2 = [random.random() for _ in range(256)]
|
374
|
+
await test_async_client.vector_ops.insert(
|
375
|
+
table_name, {"id": i + 1, "embedding1": vector1, "embedding2": vector2}
|
376
|
+
)
|
377
|
+
|
378
|
+
# Without column_name - should raise error asking to specify
|
379
|
+
with pytest.raises(Exception) as exc_info:
|
380
|
+
await test_async_client.vector_ops.get_ivf_stats(table_name)
|
381
|
+
|
382
|
+
assert "Multiple vector columns found" in str(exc_info.value)
|
383
|
+
|
384
|
+
# With explicit column_name - should work
|
385
|
+
stats = await test_async_client.vector_ops.get_ivf_stats(table_name, "embedding1")
|
386
|
+
assert stats['column_name'] == 'embedding1'
|
387
|
+
|
388
|
+
finally:
|
389
|
+
try:
|
390
|
+
await test_async_client.drop_table(table_name)
|
391
|
+
except:
|
392
|
+
pass
|
393
|
+
|
394
|
+
|
395
|
+
class TestIVFStatsEdgeCases:
|
396
|
+
"""Test edge cases for IVF stats functionality"""
|
397
|
+
|
398
|
+
@pytest.fixture(scope="class")
|
399
|
+
def test_client(self):
|
400
|
+
"""Create and connect MatrixOne client for testing"""
|
401
|
+
host, port, user, password, database = online_config.get_connection_params()
|
402
|
+
client = Client()
|
403
|
+
client.connect(host=host, port=port, user=user, password=password, database=database)
|
404
|
+
try:
|
405
|
+
yield client
|
406
|
+
finally:
|
407
|
+
try:
|
408
|
+
client.disconnect()
|
409
|
+
except Exception as e:
|
410
|
+
print(f"Warning: Failed to disconnect client: {e}")
|
411
|
+
|
412
|
+
def test_get_ivf_stats_empty_table(self, test_client):
|
413
|
+
"""Test get_ivf_stats on table with IVF index but no data"""
|
414
|
+
table_name = "test_empty_ivf_table"
|
415
|
+
|
416
|
+
try:
|
417
|
+
# Create table with vector column
|
418
|
+
test_client.create_table(table_name, columns={"id": "int", "embedding": "vecf32(64)"}, primary_key="id")
|
419
|
+
|
420
|
+
# Create IVF index
|
421
|
+
test_client.vector_ops.create_ivf(table_name, name="idx_empty_embedding", column="embedding", lists=3)
|
422
|
+
|
423
|
+
# Get stats without inserting data
|
424
|
+
stats = test_client.vector_ops.get_ivf_stats(table_name, "embedding")
|
425
|
+
|
426
|
+
# Verify structure exists even with empty data
|
427
|
+
assert 'index_tables' in stats
|
428
|
+
assert 'distribution' in stats
|
429
|
+
|
430
|
+
# Distribution might be empty or have initial structure
|
431
|
+
assert isinstance(stats['distribution']['centroid_count'], list)
|
432
|
+
|
433
|
+
finally:
|
434
|
+
try:
|
435
|
+
test_client.drop_table(table_name)
|
436
|
+
except:
|
437
|
+
pass
|
438
|
+
|
439
|
+
def test_get_ivf_stats_different_vector_dimensions(self, test_client):
|
440
|
+
"""Test get_ivf_stats with different vector dimensions (f32 vs f64)"""
|
441
|
+
table_name_f32 = "test_ivf_vecf32"
|
442
|
+
table_name_f64 = "test_ivf_vecf64"
|
443
|
+
|
444
|
+
try:
|
445
|
+
# Test with vecf32
|
446
|
+
test_client.create_table(table_name_f32, columns={"id": "int", "embedding": "vecf32(64)"}, primary_key="id")
|
447
|
+
test_client.vector_ops.create_ivf(table_name_f32, "idx_f32", "embedding", lists=3)
|
448
|
+
|
449
|
+
# Insert data
|
450
|
+
for i in range(15):
|
451
|
+
test_client.vector_ops.insert(
|
452
|
+
table_name_f32, {"id": i + 1, "embedding": [random.random() for _ in range(64)]}
|
453
|
+
)
|
454
|
+
|
455
|
+
stats_f32 = test_client.vector_ops.get_ivf_stats(table_name_f32, "embedding")
|
456
|
+
assert stats_f32['column_name'] == 'embedding'
|
457
|
+
assert sum(stats_f32['distribution']['centroid_count']) == 15
|
458
|
+
|
459
|
+
# Test with vecf64
|
460
|
+
test_client.create_table(table_name_f64, columns={"id": "int", "embedding": "vecf64(64)"}, primary_key="id")
|
461
|
+
test_client.vector_ops.create_ivf(table_name_f64, "idx_f64", "embedding", lists=3)
|
462
|
+
|
463
|
+
# Insert data
|
464
|
+
for i in range(15):
|
465
|
+
test_client.vector_ops.insert(
|
466
|
+
table_name_f64, {"id": i + 1, "embedding": [random.random() for _ in range(64)]}
|
467
|
+
)
|
468
|
+
|
469
|
+
stats_f64 = test_client.vector_ops.get_ivf_stats(table_name_f64, "embedding")
|
470
|
+
assert stats_f64['column_name'] == 'embedding'
|
471
|
+
assert sum(stats_f64['distribution']['centroid_count']) == 15
|
472
|
+
|
473
|
+
finally:
|
474
|
+
try:
|
475
|
+
test_client.drop_table(table_name_f32)
|
476
|
+
except:
|
477
|
+
pass
|
478
|
+
try:
|
479
|
+
test_client.drop_table(table_name_f64)
|
480
|
+
except:
|
481
|
+
pass
|
482
|
+
|
483
|
+
def test_get_ivf_stats_no_vector_column(self, test_client):
|
484
|
+
"""Test get_ivf_stats on table without vector columns"""
|
485
|
+
table_name = "test_no_vector_col"
|
486
|
+
|
487
|
+
try:
|
488
|
+
# Create table without vector column
|
489
|
+
test_client.create_table(table_name, columns={"id": "int", "name": "varchar(100)"}, primary_key="id")
|
490
|
+
|
491
|
+
# Try to get stats - should fail
|
492
|
+
with pytest.raises(Exception) as exc_info:
|
493
|
+
test_client.vector_ops.get_ivf_stats(table_name)
|
494
|
+
|
495
|
+
assert "No vector columns found" in str(exc_info.value)
|
496
|
+
|
497
|
+
finally:
|
498
|
+
try:
|
499
|
+
test_client.drop_table(table_name)
|
500
|
+
except:
|
501
|
+
pass
|
502
|
+
|
503
|
+
|
504
|
+
if __name__ == "__main__":
|
505
|
+
# Run tests with pytest
|
506
|
+
pytest.main([__file__, "-v", "--tb=short"])
|