matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,506 @@
1
+ # Copyright 2021 - 2022 Matrix Origin
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Online tests for IVF index statistics functionality.
17
+ Tests the get_ivf_stats method for both sync and async clients.
18
+ """
19
+
20
+ import pytest
21
+ import pytest_asyncio
22
+ import sys
23
+ import os
24
+ import random
25
+
26
+ # Add the project root to Python path
27
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
28
+
29
+ from matrixone import Client, AsyncClient
30
+ from .test_config import online_config
31
+
32
+
33
+ class TestIVFStatsSync:
34
+ """Test IVF stats functionality with synchronous client"""
35
+
36
+ @pytest.fixture(scope="class")
37
+ def test_client(self):
38
+ """Create and connect MatrixOne client for testing"""
39
+ host, port, user, password, database = online_config.get_connection_params()
40
+ client = Client()
41
+ client.connect(host=host, port=port, user=user, password=password, database=database)
42
+ try:
43
+ yield client
44
+ finally:
45
+ try:
46
+ client.disconnect()
47
+ except Exception as e:
48
+ print(f"Warning: Failed to disconnect client: {e}")
49
+
50
+ @pytest.fixture(scope="function")
51
+ def test_table(self, test_client):
52
+ """Create a test table with IVF index"""
53
+ table_name = "test_ivf_stats_table"
54
+
55
+ # Drop table if exists
56
+ try:
57
+ test_client.drop_table(table_name)
58
+ except:
59
+ pass
60
+
61
+ # Create table with vector column
62
+ test_client.create_table(
63
+ table_name, columns={"id": "int", "title": "varchar(255)", "embedding": "vecf32(128)"}, primary_key="id"
64
+ )
65
+
66
+ # Create IVF index
67
+ test_client.vector_ops.create_ivf(table_name, name="idx_test_embedding", column="embedding", lists=5)
68
+
69
+ # Insert sample data
70
+ for i in range(30):
71
+ vector = [random.random() for _ in range(128)]
72
+ test_client.vector_ops.insert(table_name, {"id": i + 1, "title": f"Document {i+1}", "embedding": vector})
73
+
74
+ yield table_name
75
+
76
+ # Cleanup
77
+ try:
78
+ test_client.drop_table(table_name)
79
+ except Exception as e:
80
+ print(f"Warning: Failed to drop table {table_name}: {e}")
81
+
82
+ def test_get_ivf_stats_with_column_name(self, test_client, test_table):
83
+ """Test get_ivf_stats with explicit column name"""
84
+ stats = test_client.vector_ops.get_ivf_stats(test_table, "embedding")
85
+
86
+ # Verify structure
87
+ assert 'index_tables' in stats
88
+ assert 'distribution' in stats
89
+ assert 'database' in stats
90
+ assert 'table_name' in stats
91
+ assert 'column_name' in stats
92
+
93
+ # Verify values
94
+ assert stats['table_name'] == test_table
95
+ assert stats['column_name'] == 'embedding'
96
+ assert stats['database'] == test_client._connection_params['database']
97
+
98
+ # Verify index tables
99
+ assert 'metadata' in stats['index_tables']
100
+ assert 'centroids' in stats['index_tables']
101
+ assert 'entries' in stats['index_tables']
102
+
103
+ # Verify distribution
104
+ assert 'centroid_count' in stats['distribution']
105
+ assert 'centroid_id' in stats['distribution']
106
+ assert 'centroid_version' in stats['distribution']
107
+
108
+ # Verify distribution has data
109
+ assert isinstance(stats['distribution']['centroid_count'], list)
110
+ assert isinstance(stats['distribution']['centroid_id'], list)
111
+ assert isinstance(stats['distribution']['centroid_version'], list)
112
+ assert len(stats['distribution']['centroid_count']) > 0
113
+ assert len(stats['distribution']['centroid_id']) > 0
114
+ assert len(stats['distribution']['centroid_version']) > 0
115
+
116
+ def test_get_ivf_stats_auto_inference(self, test_client, test_table):
117
+ """Test get_ivf_stats with auto-inferred column name"""
118
+ stats = test_client.vector_ops.get_ivf_stats(test_table)
119
+
120
+ # Verify auto-inference worked
121
+ assert stats['column_name'] == 'embedding'
122
+
123
+ # Verify structure
124
+ assert 'index_tables' in stats
125
+ assert 'distribution' in stats
126
+
127
+ def test_get_ivf_stats_within_transaction(self, test_client, test_table):
128
+ """Test get_ivf_stats within transaction context"""
129
+ with test_client.transaction() as tx:
130
+ stats = tx.vector_ops.get_ivf_stats(test_table, "embedding")
131
+
132
+ # Verify structure
133
+ assert 'index_tables' in stats
134
+ assert 'distribution' in stats
135
+ assert stats['table_name'] == test_table
136
+ assert stats['column_name'] == 'embedding'
137
+
138
+ def test_get_ivf_stats_nonexistent_table(self, test_client):
139
+ """Test get_ivf_stats with non-existent table"""
140
+ with pytest.raises(Exception) as exc_info:
141
+ test_client.vector_ops.get_ivf_stats("nonexistent_table", "embedding")
142
+
143
+ assert "No IVF index found" in str(exc_info.value)
144
+
145
+ def test_get_ivf_stats_no_ivf_index(self, test_client):
146
+ """Test get_ivf_stats on table without IVF index"""
147
+ table_name = "test_no_ivf_index"
148
+
149
+ try:
150
+ # Create table without IVF index
151
+ test_client.create_table(table_name, columns={"id": "int", "embedding": "vecf32(128)"}, primary_key="id")
152
+
153
+ # Try to get stats - should fail
154
+ with pytest.raises(Exception) as exc_info:
155
+ test_client.vector_ops.get_ivf_stats(table_name, "embedding")
156
+
157
+ assert "No IVF index found" in str(exc_info.value)
158
+
159
+ finally:
160
+ try:
161
+ test_client.drop_table(table_name)
162
+ except:
163
+ pass
164
+
165
+ def test_get_ivf_stats_multiple_vector_columns(self, test_client):
166
+ """Test get_ivf_stats with multiple vector columns"""
167
+ table_name = "test_multi_vector_cols"
168
+
169
+ try:
170
+ # Create table with multiple vector columns
171
+ test_client.create_table(
172
+ table_name, columns={"id": "int", "embedding1": "vecf32(128)", "embedding2": "vecf32(256)"}, primary_key="id"
173
+ )
174
+
175
+ # Create IVF index on first column
176
+ test_client.vector_ops.create_ivf(table_name, name="idx_embedding1", column="embedding1", lists=3)
177
+
178
+ # Insert some data
179
+ for i in range(10):
180
+ vector1 = [random.random() for _ in range(128)]
181
+ vector2 = [random.random() for _ in range(256)]
182
+ test_client.vector_ops.insert(table_name, {"id": i + 1, "embedding1": vector1, "embedding2": vector2})
183
+
184
+ # Without column_name - should raise error asking to specify
185
+ with pytest.raises(Exception) as exc_info:
186
+ test_client.vector_ops.get_ivf_stats(table_name)
187
+
188
+ assert "Multiple vector columns found" in str(exc_info.value)
189
+ assert "embedding1" in str(exc_info.value) or "embedding2" in str(exc_info.value)
190
+
191
+ # With explicit column_name - should work
192
+ stats = test_client.vector_ops.get_ivf_stats(table_name, "embedding1")
193
+ assert stats['column_name'] == 'embedding1'
194
+
195
+ finally:
196
+ try:
197
+ test_client.drop_table(table_name)
198
+ except:
199
+ pass
200
+
201
+ def test_get_ivf_stats_distribution_details(self, test_client, test_table):
202
+ """Test that distribution details are correctly populated"""
203
+ stats = test_client.vector_ops.get_ivf_stats(test_table, "embedding")
204
+
205
+ distribution = stats['distribution']
206
+
207
+ # Verify all three lists have the same length
208
+ assert len(distribution['centroid_count']) == len(distribution['centroid_id'])
209
+ assert len(distribution['centroid_count']) == len(distribution['centroid_version'])
210
+
211
+ # Verify data types
212
+ for count in distribution['centroid_count']:
213
+ assert isinstance(count, int)
214
+
215
+ for centroid_id in distribution['centroid_id']:
216
+ assert isinstance(centroid_id, int)
217
+
218
+ for version in distribution['centroid_version']:
219
+ assert isinstance(version, int)
220
+
221
+ # Verify total count matches inserted data
222
+ total_vectors = sum(distribution['centroid_count'])
223
+ assert total_vectors == 30 # We inserted 30 vectors in the fixture
224
+
225
+
226
+ class TestIVFStatsAsync:
227
+ """Test IVF stats functionality with asynchronous client"""
228
+
229
+ @pytest_asyncio.fixture(scope="function")
230
+ async def test_async_client(self):
231
+ """Create and connect AsyncClient for testing"""
232
+ host, port, user, password, database = online_config.get_connection_params()
233
+ client = AsyncClient()
234
+ await client.connect(host=host, port=port, user=user, password=password, database=database)
235
+ try:
236
+ yield client
237
+ finally:
238
+ try:
239
+ await client.disconnect()
240
+ except Exception as e:
241
+ print(f"Warning: Failed to disconnect async client: {e}")
242
+
243
+ @pytest_asyncio.fixture(scope="function")
244
+ async def test_async_table(self, test_async_client):
245
+ """Create a test table with IVF index for async tests"""
246
+ table_name = "test_async_ivf_stats_table"
247
+
248
+ # Drop table if exists
249
+ try:
250
+ await test_async_client.drop_table(table_name)
251
+ except:
252
+ pass
253
+
254
+ # Create table with vector column
255
+ await test_async_client.create_table(
256
+ table_name, columns={"id": "int", "title": "varchar(255)", "embedding": "vecf32(128)"}, primary_key="id"
257
+ )
258
+
259
+ # Create IVF index
260
+ await test_async_client.vector_ops.create_ivf(
261
+ table_name, name="idx_async_test_embedding", column="embedding", lists=4
262
+ )
263
+
264
+ # Insert sample data
265
+ for i in range(20):
266
+ vector = [random.random() for _ in range(128)]
267
+ await test_async_client.vector_ops.insert(
268
+ table_name, {"id": i + 1, "title": f"Async Document {i+1}", "embedding": vector}
269
+ )
270
+
271
+ yield table_name
272
+
273
+ # Cleanup
274
+ try:
275
+ await test_async_client.drop_table(table_name)
276
+ except Exception as e:
277
+ print(f"Warning: Failed to drop table {table_name}: {e}")
278
+
279
+ @pytest.mark.asyncio
280
+ async def test_async_get_ivf_stats_with_column_name(self, test_async_client, test_async_table):
281
+ """Test async get_ivf_stats with explicit column name"""
282
+ stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table, "embedding")
283
+
284
+ # Verify structure
285
+ assert 'index_tables' in stats
286
+ assert 'distribution' in stats
287
+ assert 'database' in stats
288
+ assert 'table_name' in stats
289
+ assert 'column_name' in stats
290
+
291
+ # Verify values
292
+ assert stats['table_name'] == test_async_table
293
+ assert stats['column_name'] == 'embedding'
294
+
295
+ # Verify index tables
296
+ assert 'metadata' in stats['index_tables']
297
+ assert 'centroids' in stats['index_tables']
298
+ assert 'entries' in stats['index_tables']
299
+
300
+ # Verify distribution
301
+ assert 'centroid_count' in stats['distribution']
302
+ assert 'centroid_id' in stats['distribution']
303
+ assert 'centroid_version' in stats['distribution']
304
+
305
+ @pytest.mark.asyncio
306
+ async def test_async_get_ivf_stats_auto_inference(self, test_async_client, test_async_table):
307
+ """Test async get_ivf_stats with auto-inferred column name"""
308
+ stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table)
309
+
310
+ # Verify auto-inference worked
311
+ assert stats['column_name'] == 'embedding'
312
+
313
+ # Verify structure
314
+ assert 'index_tables' in stats
315
+ assert 'distribution' in stats
316
+
317
+ @pytest.mark.asyncio
318
+ async def test_async_get_ivf_stats_within_transaction(self, test_async_client, test_async_table):
319
+ """Test async get_ivf_stats within transaction context"""
320
+ async with test_async_client.transaction() as tx:
321
+ stats = await tx.vector_ops.get_ivf_stats(test_async_table, "embedding")
322
+
323
+ # Verify structure
324
+ assert 'index_tables' in stats
325
+ assert 'distribution' in stats
326
+ assert stats['table_name'] == test_async_table
327
+ assert stats['column_name'] == 'embedding'
328
+
329
+ @pytest.mark.asyncio
330
+ async def test_async_get_ivf_stats_distribution_details(self, test_async_client, test_async_table):
331
+ """Test that async distribution details are correctly populated"""
332
+ stats = await test_async_client.vector_ops.get_ivf_stats(test_async_table, "embedding")
333
+
334
+ distribution = stats['distribution']
335
+
336
+ # Verify all three lists have the same length
337
+ assert len(distribution['centroid_count']) == len(distribution['centroid_id'])
338
+ assert len(distribution['centroid_count']) == len(distribution['centroid_version'])
339
+
340
+ # Verify data types
341
+ for count in distribution['centroid_count']:
342
+ assert isinstance(count, int)
343
+
344
+ for centroid_id in distribution['centroid_id']:
345
+ assert isinstance(centroid_id, int)
346
+
347
+ for version in distribution['centroid_version']:
348
+ assert isinstance(version, int)
349
+
350
+ # Verify total count matches inserted data
351
+ total_vectors = sum(distribution['centroid_count'])
352
+ assert total_vectors == 20 # We inserted 20 vectors in the fixture
353
+
354
+ @pytest.mark.asyncio
355
+ async def test_async_get_ivf_stats_multiple_vector_columns(self, test_async_client):
356
+ """Test async get_ivf_stats with multiple vector columns"""
357
+ table_name = "test_async_multi_vector_cols"
358
+
359
+ try:
360
+ # Create table with multiple vector columns
361
+ await test_async_client.create_table(
362
+ table_name, columns={"id": "int", "embedding1": "vecf32(128)", "embedding2": "vecf32(256)"}, primary_key="id"
363
+ )
364
+
365
+ # Create IVF index on first column
366
+ await test_async_client.vector_ops.create_ivf(
367
+ table_name, name="idx_async_embedding1", column="embedding1", lists=3
368
+ )
369
+
370
+ # Insert some data
371
+ for i in range(10):
372
+ vector1 = [random.random() for _ in range(128)]
373
+ vector2 = [random.random() for _ in range(256)]
374
+ await test_async_client.vector_ops.insert(
375
+ table_name, {"id": i + 1, "embedding1": vector1, "embedding2": vector2}
376
+ )
377
+
378
+ # Without column_name - should raise error asking to specify
379
+ with pytest.raises(Exception) as exc_info:
380
+ await test_async_client.vector_ops.get_ivf_stats(table_name)
381
+
382
+ assert "Multiple vector columns found" in str(exc_info.value)
383
+
384
+ # With explicit column_name - should work
385
+ stats = await test_async_client.vector_ops.get_ivf_stats(table_name, "embedding1")
386
+ assert stats['column_name'] == 'embedding1'
387
+
388
+ finally:
389
+ try:
390
+ await test_async_client.drop_table(table_name)
391
+ except:
392
+ pass
393
+
394
+
395
+ class TestIVFStatsEdgeCases:
396
+ """Test edge cases for IVF stats functionality"""
397
+
398
+ @pytest.fixture(scope="class")
399
+ def test_client(self):
400
+ """Create and connect MatrixOne client for testing"""
401
+ host, port, user, password, database = online_config.get_connection_params()
402
+ client = Client()
403
+ client.connect(host=host, port=port, user=user, password=password, database=database)
404
+ try:
405
+ yield client
406
+ finally:
407
+ try:
408
+ client.disconnect()
409
+ except Exception as e:
410
+ print(f"Warning: Failed to disconnect client: {e}")
411
+
412
+ def test_get_ivf_stats_empty_table(self, test_client):
413
+ """Test get_ivf_stats on table with IVF index but no data"""
414
+ table_name = "test_empty_ivf_table"
415
+
416
+ try:
417
+ # Create table with vector column
418
+ test_client.create_table(table_name, columns={"id": "int", "embedding": "vecf32(64)"}, primary_key="id")
419
+
420
+ # Create IVF index
421
+ test_client.vector_ops.create_ivf(table_name, name="idx_empty_embedding", column="embedding", lists=3)
422
+
423
+ # Get stats without inserting data
424
+ stats = test_client.vector_ops.get_ivf_stats(table_name, "embedding")
425
+
426
+ # Verify structure exists even with empty data
427
+ assert 'index_tables' in stats
428
+ assert 'distribution' in stats
429
+
430
+ # Distribution might be empty or have initial structure
431
+ assert isinstance(stats['distribution']['centroid_count'], list)
432
+
433
+ finally:
434
+ try:
435
+ test_client.drop_table(table_name)
436
+ except:
437
+ pass
438
+
439
+ def test_get_ivf_stats_different_vector_dimensions(self, test_client):
440
+ """Test get_ivf_stats with different vector dimensions (f32 vs f64)"""
441
+ table_name_f32 = "test_ivf_vecf32"
442
+ table_name_f64 = "test_ivf_vecf64"
443
+
444
+ try:
445
+ # Test with vecf32
446
+ test_client.create_table(table_name_f32, columns={"id": "int", "embedding": "vecf32(64)"}, primary_key="id")
447
+ test_client.vector_ops.create_ivf(table_name_f32, "idx_f32", "embedding", lists=3)
448
+
449
+ # Insert data
450
+ for i in range(15):
451
+ test_client.vector_ops.insert(
452
+ table_name_f32, {"id": i + 1, "embedding": [random.random() for _ in range(64)]}
453
+ )
454
+
455
+ stats_f32 = test_client.vector_ops.get_ivf_stats(table_name_f32, "embedding")
456
+ assert stats_f32['column_name'] == 'embedding'
457
+ assert sum(stats_f32['distribution']['centroid_count']) == 15
458
+
459
+ # Test with vecf64
460
+ test_client.create_table(table_name_f64, columns={"id": "int", "embedding": "vecf64(64)"}, primary_key="id")
461
+ test_client.vector_ops.create_ivf(table_name_f64, "idx_f64", "embedding", lists=3)
462
+
463
+ # Insert data
464
+ for i in range(15):
465
+ test_client.vector_ops.insert(
466
+ table_name_f64, {"id": i + 1, "embedding": [random.random() for _ in range(64)]}
467
+ )
468
+
469
+ stats_f64 = test_client.vector_ops.get_ivf_stats(table_name_f64, "embedding")
470
+ assert stats_f64['column_name'] == 'embedding'
471
+ assert sum(stats_f64['distribution']['centroid_count']) == 15
472
+
473
+ finally:
474
+ try:
475
+ test_client.drop_table(table_name_f32)
476
+ except:
477
+ pass
478
+ try:
479
+ test_client.drop_table(table_name_f64)
480
+ except:
481
+ pass
482
+
483
+ def test_get_ivf_stats_no_vector_column(self, test_client):
484
+ """Test get_ivf_stats on table without vector columns"""
485
+ table_name = "test_no_vector_col"
486
+
487
+ try:
488
+ # Create table without vector column
489
+ test_client.create_table(table_name, columns={"id": "int", "name": "varchar(100)"}, primary_key="id")
490
+
491
+ # Try to get stats - should fail
492
+ with pytest.raises(Exception) as exc_info:
493
+ test_client.vector_ops.get_ivf_stats(table_name)
494
+
495
+ assert "No vector columns found" in str(exc_info.value)
496
+
497
+ finally:
498
+ try:
499
+ test_client.drop_table(table_name)
500
+ except:
501
+ pass
502
+
503
+
504
+ if __name__ == "__main__":
505
+ # Run tests with pytest
506
+ pytest.main([__file__, "-v", "--tb=short"])