matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,557 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
# Copyright 2021 - 2022 Matrix Origin
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
"""
|
18
|
+
Test PineconeCompatibleIndex functionality - Pinecone-compatible vector search interface
|
19
|
+
"""
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
import pytest_asyncio
|
23
|
+
from matrixone import Client, AsyncClient
|
24
|
+
from matrixone.search_vector_index import PineconeCompatibleIndex, VectorMatch, QueryResponse
|
25
|
+
|
26
|
+
|
27
|
+
class TestPineconeCompatibleIndex:
|
28
|
+
"""Test PineconeCompatibleIndex functionality"""
|
29
|
+
|
30
|
+
def test_get_pinecone_index_sync(self, test_client):
|
31
|
+
"""Test getting PineconeCompatibleIndex from sync client"""
|
32
|
+
# Create a test table with vector column
|
33
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS search_vector_test")
|
34
|
+
test_client.execute("USE search_vector_test")
|
35
|
+
|
36
|
+
test_client.execute(
|
37
|
+
"""
|
38
|
+
CREATE TABLE IF NOT EXISTS test_vectors (
|
39
|
+
id VARCHAR(50) PRIMARY KEY,
|
40
|
+
title VARCHAR(200),
|
41
|
+
content TEXT,
|
42
|
+
embedding vecf32(128)
|
43
|
+
)
|
44
|
+
"""
|
45
|
+
)
|
46
|
+
|
47
|
+
# Create vector index
|
48
|
+
test_client.vector_ops.create_ivf("test_vectors", name="idx_embedding", column="embedding", lists=100)
|
49
|
+
|
50
|
+
try:
|
51
|
+
# Get PineconeCompatibleIndex object
|
52
|
+
index = test_client.get_pinecone_index("test_vectors", vector_column="embedding")
|
53
|
+
|
54
|
+
assert isinstance(index, PineconeCompatibleIndex)
|
55
|
+
assert index.table_name == "test_vectors"
|
56
|
+
assert index.vector_column == "embedding"
|
57
|
+
assert index._get_id_column() == "id"
|
58
|
+
assert index.metadata_columns == ["title", "content"]
|
59
|
+
|
60
|
+
finally:
|
61
|
+
# Clean up
|
62
|
+
test_client.execute("DROP TABLE test_vectors")
|
63
|
+
test_client.execute("DROP DATABASE search_vector_test")
|
64
|
+
|
65
|
+
@pytest.mark.asyncio
|
66
|
+
async def test_get_pinecone_index_async(self, test_async_client):
|
67
|
+
"""Test getting PineconeCompatibleIndex from async client"""
|
68
|
+
# Create a test table with vector column
|
69
|
+
await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_search_vector_test")
|
70
|
+
await test_async_client.execute("USE async_search_vector_test")
|
71
|
+
|
72
|
+
await test_async_client.execute(
|
73
|
+
"""
|
74
|
+
CREATE TABLE IF NOT EXISTS test_vectors_async (
|
75
|
+
id VARCHAR(50) PRIMARY KEY,
|
76
|
+
title VARCHAR(200),
|
77
|
+
content TEXT,
|
78
|
+
embedding vecf32(128)
|
79
|
+
)
|
80
|
+
"""
|
81
|
+
)
|
82
|
+
|
83
|
+
# Create vector index
|
84
|
+
await test_async_client.vector_ops.create_ivf(
|
85
|
+
"test_vectors_async",
|
86
|
+
name="idx_embedding_async",
|
87
|
+
column="embedding",
|
88
|
+
lists=100,
|
89
|
+
)
|
90
|
+
|
91
|
+
try:
|
92
|
+
# Get PineconeCompatibleIndex object
|
93
|
+
index = test_async_client.get_pinecone_index("test_vectors_async", vector_column="embedding")
|
94
|
+
|
95
|
+
assert isinstance(index, PineconeCompatibleIndex)
|
96
|
+
assert index.table_name == "test_vectors_async"
|
97
|
+
assert index.vector_column == "embedding"
|
98
|
+
assert await index._get_id_column_async() == "id"
|
99
|
+
assert await index._get_metadata_columns_async() == ["title", "content"]
|
100
|
+
|
101
|
+
finally:
|
102
|
+
# Clean up
|
103
|
+
await test_async_client.execute("DROP TABLE test_vectors_async")
|
104
|
+
await test_async_client.execute("DROP DATABASE async_search_vector_test")
|
105
|
+
|
106
|
+
def test_parse_index_info(self, test_client):
|
107
|
+
"""Test parsing index information from CREATE TABLE statement"""
|
108
|
+
# Create a test table with vector column and index
|
109
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS parse_test")
|
110
|
+
test_client.execute("USE parse_test")
|
111
|
+
|
112
|
+
# Drop table if exists to ensure clean state
|
113
|
+
test_client.execute("DROP TABLE IF EXISTS test_parse")
|
114
|
+
|
115
|
+
test_client.execute(
|
116
|
+
"""
|
117
|
+
CREATE TABLE test_parse (
|
118
|
+
id BIGINT PRIMARY KEY,
|
119
|
+
title VARCHAR(200),
|
120
|
+
embedding vecf32(256)
|
121
|
+
)
|
122
|
+
"""
|
123
|
+
)
|
124
|
+
|
125
|
+
# Create vector index
|
126
|
+
test_client.vector_ops.create_hnsw("test_parse", name="idx_hnsw", column="embedding", m=16, ef_construction=200)
|
127
|
+
|
128
|
+
try:
|
129
|
+
# Get PineconeCompatibleIndex object
|
130
|
+
index = test_client.get_pinecone_index("test_parse", vector_column="embedding")
|
131
|
+
|
132
|
+
# Test parsing index info
|
133
|
+
index_info = index._get_index_info()
|
134
|
+
|
135
|
+
assert index_info["dimensions"] == 256
|
136
|
+
assert index_info["algorithm"] == "hnsw"
|
137
|
+
assert index_info["metric"] == "l2" # vector_l2_ops maps to l2
|
138
|
+
assert "m" in index_info["parameters"]
|
139
|
+
assert "ef_construction" in index_info["parameters"]
|
140
|
+
|
141
|
+
finally:
|
142
|
+
# Clean up
|
143
|
+
test_client.execute("DROP TABLE test_parse")
|
144
|
+
test_client.execute("DROP DATABASE parse_test")
|
145
|
+
|
146
|
+
def test_query_basic(self, test_client):
|
147
|
+
"""Test basic query functionality"""
|
148
|
+
# Create a test table with vector column
|
149
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS query_test")
|
150
|
+
test_client.execute("USE query_test")
|
151
|
+
|
152
|
+
test_client.execute(
|
153
|
+
"""
|
154
|
+
CREATE TABLE IF NOT EXISTS test_query (
|
155
|
+
id VARCHAR(50) PRIMARY KEY,
|
156
|
+
title VARCHAR(200),
|
157
|
+
category VARCHAR(50),
|
158
|
+
embedding vecf32(64)
|
159
|
+
)
|
160
|
+
"""
|
161
|
+
)
|
162
|
+
|
163
|
+
# Create vector index
|
164
|
+
test_client.vector_ops.create_ivf("test_query", name="idx_query", column="embedding", lists=10)
|
165
|
+
|
166
|
+
# Insert test data
|
167
|
+
test_client.execute(
|
168
|
+
"""
|
169
|
+
INSERT INTO test_query (id, title, category, embedding) VALUES
|
170
|
+
('doc1', 'Machine Learning Guide', 'AI', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
|
171
|
+
('doc2', 'Python Programming', 'Programming', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]'),
|
172
|
+
('doc3', 'Database Design', 'Database', '[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]')
|
173
|
+
"""
|
174
|
+
)
|
175
|
+
|
176
|
+
try:
|
177
|
+
# Get PineconeCompatibleIndex object
|
178
|
+
index = test_client.get_pinecone_index("test_query", vector_column="embedding")
|
179
|
+
|
180
|
+
# Test query
|
181
|
+
query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [
|
182
|
+
0.1,
|
183
|
+
0.2,
|
184
|
+
0.3,
|
185
|
+
0.4,
|
186
|
+
]
|
187
|
+
results = index.query(query_vector, top_k=2, include_metadata=True)
|
188
|
+
|
189
|
+
assert isinstance(results, QueryResponse)
|
190
|
+
assert len(results.matches) <= 2
|
191
|
+
assert results.namespace == ""
|
192
|
+
assert results.usage is not None
|
193
|
+
|
194
|
+
for match in results.matches:
|
195
|
+
assert isinstance(match, VectorMatch)
|
196
|
+
assert match.id in ["doc1", "doc2", "doc3"]
|
197
|
+
assert isinstance(match.score, float)
|
198
|
+
assert "title" in match.metadata
|
199
|
+
assert "category" in match.metadata
|
200
|
+
|
201
|
+
finally:
|
202
|
+
# Clean up
|
203
|
+
test_client.execute("DROP TABLE test_query")
|
204
|
+
test_client.execute("DROP DATABASE query_test")
|
205
|
+
|
206
|
+
@pytest.mark.asyncio
|
207
|
+
async def test_query_async(self, test_async_client):
|
208
|
+
"""Test async query functionality"""
|
209
|
+
# Create a test table with vector column
|
210
|
+
await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_query_test")
|
211
|
+
await test_async_client.execute("USE async_query_test")
|
212
|
+
|
213
|
+
await test_async_client.execute(
|
214
|
+
"""
|
215
|
+
CREATE TABLE IF NOT EXISTS test_query_async (
|
216
|
+
id VARCHAR(50) PRIMARY KEY,
|
217
|
+
title VARCHAR(200),
|
218
|
+
embedding vecf32(32)
|
219
|
+
)
|
220
|
+
"""
|
221
|
+
)
|
222
|
+
|
223
|
+
# Create vector index
|
224
|
+
await test_async_client.vector_ops.create_ivf(
|
225
|
+
"test_query_async", name="idx_query_async", column="embedding", lists=5
|
226
|
+
)
|
227
|
+
|
228
|
+
# Insert test data
|
229
|
+
await test_async_client.execute(
|
230
|
+
"""
|
231
|
+
INSERT INTO test_query_async (id, title, embedding) VALUES
|
232
|
+
('doc1', 'Test Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2]'),
|
233
|
+
('doc2', 'Test Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3]')
|
234
|
+
"""
|
235
|
+
)
|
236
|
+
|
237
|
+
try:
|
238
|
+
# Get PineconeCompatibleIndex object
|
239
|
+
index = test_async_client.get_pinecone_index("test_query_async", vector_column="embedding")
|
240
|
+
|
241
|
+
# Test async query
|
242
|
+
query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 3 + [0.1, 0.2]
|
243
|
+
results = await index.query_async(query_vector, top_k=1, include_metadata=True)
|
244
|
+
|
245
|
+
assert isinstance(results, QueryResponse)
|
246
|
+
assert len(results.matches) <= 1
|
247
|
+
assert results.namespace == ""
|
248
|
+
|
249
|
+
for match in results.matches:
|
250
|
+
assert isinstance(match, VectorMatch)
|
251
|
+
assert match.id in ["doc1", "doc2"]
|
252
|
+
assert isinstance(match.score, float)
|
253
|
+
assert "title" in match.metadata
|
254
|
+
|
255
|
+
finally:
|
256
|
+
# Clean up
|
257
|
+
await test_async_client.execute("DROP TABLE test_query_async")
|
258
|
+
await test_async_client.execute("DROP DATABASE async_query_test")
|
259
|
+
|
260
|
+
def test_delete_functionality(self, test_client):
|
261
|
+
"""Test delete functionality (IVF index only)"""
|
262
|
+
# Create a test table with vector column
|
263
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS upsert_test")
|
264
|
+
test_client.execute("USE upsert_test")
|
265
|
+
|
266
|
+
# Drop table if exists to ensure clean state
|
267
|
+
test_client.execute("DROP TABLE IF EXISTS test_upsert")
|
268
|
+
|
269
|
+
test_client.execute(
|
270
|
+
"""
|
271
|
+
CREATE TABLE test_upsert (
|
272
|
+
id VARCHAR(50) PRIMARY KEY,
|
273
|
+
title VARCHAR(200),
|
274
|
+
embedding vecf32(16)
|
275
|
+
)
|
276
|
+
"""
|
277
|
+
)
|
278
|
+
|
279
|
+
# Create IVF vector index (required for upsert/delete operations)
|
280
|
+
test_client.vector_ops.create_ivf("test_upsert", name="idx_upsert", column="embedding", lists=5)
|
281
|
+
|
282
|
+
try:
|
283
|
+
# Get PineconeCompatibleIndex object
|
284
|
+
index = test_client.get_pinecone_index("test_upsert", vector_column="embedding")
|
285
|
+
|
286
|
+
# Insert test data directly using SQL
|
287
|
+
test_client.execute(
|
288
|
+
"""
|
289
|
+
INSERT INTO test_upsert (id, title, embedding) VALUES
|
290
|
+
('test1', 'Test Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]'),
|
291
|
+
('test2', 'Test Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]')
|
292
|
+
"""
|
293
|
+
)
|
294
|
+
|
295
|
+
# Verify data was inserted
|
296
|
+
count_result = test_client.execute("SELECT COUNT(*) FROM test_upsert")
|
297
|
+
assert count_result.rows[0][0] == 2
|
298
|
+
|
299
|
+
# Test delete with string IDs
|
300
|
+
index.delete(["test1"])
|
301
|
+
|
302
|
+
# Verify data was deleted
|
303
|
+
count_result = test_client.execute("SELECT COUNT(*) FROM test_upsert")
|
304
|
+
assert count_result.rows[0][0] == 1
|
305
|
+
|
306
|
+
# Test delete with mixed ID types (if we had more data)
|
307
|
+
# This demonstrates that delete can handle different ID types
|
308
|
+
# index.delete([1, "test2", 3.14]) # Mixed types
|
309
|
+
|
310
|
+
finally:
|
311
|
+
# Clean up
|
312
|
+
test_client.execute("DROP TABLE test_upsert")
|
313
|
+
test_client.execute("DROP DATABASE upsert_test")
|
314
|
+
|
315
|
+
def test_describe_index_stats(self, test_client):
|
316
|
+
"""Test describe index stats functionality"""
|
317
|
+
# Create a test table with vector column
|
318
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS stats_test")
|
319
|
+
test_client.execute("USE stats_test")
|
320
|
+
|
321
|
+
test_client.execute(
|
322
|
+
"""
|
323
|
+
CREATE TABLE IF NOT EXISTS test_stats (
|
324
|
+
id VARCHAR(50) PRIMARY KEY,
|
325
|
+
title VARCHAR(200),
|
326
|
+
embedding vecf32(64)
|
327
|
+
)
|
328
|
+
"""
|
329
|
+
)
|
330
|
+
|
331
|
+
# Create vector index
|
332
|
+
test_client.vector_ops.create_ivf("test_stats", name="idx_stats", column="embedding", lists=10)
|
333
|
+
|
334
|
+
# Insert test data
|
335
|
+
test_client.execute(
|
336
|
+
"""
|
337
|
+
INSERT INTO test_stats (id, title, embedding) VALUES
|
338
|
+
('doc1', 'Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
|
339
|
+
('doc2', 'Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]')
|
340
|
+
"""
|
341
|
+
)
|
342
|
+
|
343
|
+
try:
|
344
|
+
# Get PineconeCompatibleIndex object
|
345
|
+
index = test_client.get_pinecone_index("test_stats", vector_column="embedding")
|
346
|
+
|
347
|
+
# Test describe index stats
|
348
|
+
stats = index.describe_index_stats()
|
349
|
+
|
350
|
+
assert isinstance(stats, dict)
|
351
|
+
assert "dimension" in stats
|
352
|
+
assert "total_vector_count" in stats
|
353
|
+
assert "namespaces" in stats
|
354
|
+
assert stats["dimension"] == 64
|
355
|
+
assert stats["total_vector_count"] == 2
|
356
|
+
assert "" in stats["namespaces"]
|
357
|
+
assert stats["namespaces"][""]["vector_count"] == 2
|
358
|
+
|
359
|
+
finally:
|
360
|
+
# Clean up
|
361
|
+
test_client.execute("DROP TABLE test_stats")
|
362
|
+
test_client.execute("DROP DATABASE stats_test")
|
363
|
+
|
364
|
+
def test_hnsw_upsert_not_supported(self, test_client):
|
365
|
+
"""Test that HNSW index does not support upsert operations"""
|
366
|
+
# Create a test table with vector column
|
367
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS hnsw_upsert_test")
|
368
|
+
test_client.execute("USE hnsw_upsert_test")
|
369
|
+
|
370
|
+
# Drop table if exists to ensure clean state
|
371
|
+
test_client.execute("DROP TABLE IF EXISTS test_hnsw_upsert")
|
372
|
+
|
373
|
+
test_client.execute(
|
374
|
+
"""
|
375
|
+
CREATE TABLE test_hnsw_upsert (
|
376
|
+
id BIGINT PRIMARY KEY,
|
377
|
+
title VARCHAR(200),
|
378
|
+
embedding vecf32(64)
|
379
|
+
)
|
380
|
+
"""
|
381
|
+
)
|
382
|
+
|
383
|
+
# Create HNSW vector index
|
384
|
+
test_client.vector_ops.create_hnsw(
|
385
|
+
"test_hnsw_upsert",
|
386
|
+
name="idx_hnsw_upsert",
|
387
|
+
column="embedding",
|
388
|
+
m=16,
|
389
|
+
ef_construction=200,
|
390
|
+
)
|
391
|
+
|
392
|
+
try:
|
393
|
+
# Get PineconeCompatibleIndex object
|
394
|
+
index = test_client.get_pinecone_index("test_hnsw_upsert", vector_column="embedding")
|
395
|
+
|
396
|
+
# Test that HNSW index only supports query operations
|
397
|
+
# (upsert and delete are not supported for HNSW indexes)
|
398
|
+
|
399
|
+
# Test that delete also raises ValueError for HNSW index
|
400
|
+
with pytest.raises(ValueError, match="HNSW index does not support delete operations"):
|
401
|
+
index.delete(["test1"])
|
402
|
+
|
403
|
+
# Test with different ID types
|
404
|
+
with pytest.raises(ValueError, match="HNSW index does not support delete operations"):
|
405
|
+
index.delete([1, 2, 3]) # Integer IDs
|
406
|
+
|
407
|
+
finally:
|
408
|
+
# Clean up
|
409
|
+
test_client.execute("DROP TABLE test_hnsw_upsert")
|
410
|
+
test_client.execute("DROP DATABASE hnsw_upsert_test")
|
411
|
+
|
412
|
+
|
413
|
+
class TestPineconeCompatibleIndexCaseInsensitive:
|
414
|
+
"""Test case-insensitive column name handling in PineconeCompatibleIndex"""
|
415
|
+
|
416
|
+
def test_case_insensitive_column_names_sync(self, test_client):
|
417
|
+
"""Test that column names are handled case-insensitively in sync client"""
|
418
|
+
# Create a test table with mixed case column names
|
419
|
+
test_client.execute("CREATE DATABASE IF NOT EXISTS case_test")
|
420
|
+
test_client.execute("USE case_test")
|
421
|
+
|
422
|
+
test_client.execute(
|
423
|
+
"""
|
424
|
+
CREATE TABLE IF NOT EXISTS test_case_vectors (
|
425
|
+
ID VARCHAR(50) PRIMARY KEY,
|
426
|
+
Title VARCHAR(200),
|
427
|
+
Content TEXT,
|
428
|
+
Embedding vecf32(64)
|
429
|
+
)
|
430
|
+
"""
|
431
|
+
)
|
432
|
+
|
433
|
+
# Create vector index
|
434
|
+
test_client.vector_ops.create_ivf("test_case_vectors", name="idx_case_embedding", column="Embedding", lists=10)
|
435
|
+
|
436
|
+
# Insert test data
|
437
|
+
test_client.execute(
|
438
|
+
"""
|
439
|
+
INSERT INTO test_case_vectors (ID, Title, Content, Embedding) VALUES
|
440
|
+
('doc1', 'Test Document 1', 'Content 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
|
441
|
+
('doc2', 'Test Document 2', 'Content 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]')
|
442
|
+
"""
|
443
|
+
)
|
444
|
+
|
445
|
+
try:
|
446
|
+
# Test with different case variations of vector column name
|
447
|
+
test_cases = [
|
448
|
+
"Embedding", # Original case
|
449
|
+
"embedding", # Lowercase
|
450
|
+
"EMBEDDING", # Uppercase
|
451
|
+
"Embedding", # Mixed case
|
452
|
+
]
|
453
|
+
|
454
|
+
for vector_col in test_cases:
|
455
|
+
# Get PineconeCompatibleIndex object
|
456
|
+
index = test_client.get_pinecone_index("test_case_vectors", vector_column=vector_col)
|
457
|
+
|
458
|
+
# Test that metadata columns are correctly identified (case-insensitive)
|
459
|
+
metadata_cols = index.metadata_columns
|
460
|
+
assert "Title" in metadata_cols or "title" in metadata_cols
|
461
|
+
assert "Content" in metadata_cols or "content" in metadata_cols
|
462
|
+
assert len(metadata_cols) == 2 # Should exclude ID and Embedding
|
463
|
+
|
464
|
+
# Test query functionality
|
465
|
+
query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [
|
466
|
+
0.1,
|
467
|
+
0.2,
|
468
|
+
0.3,
|
469
|
+
0.4,
|
470
|
+
]
|
471
|
+
results = index.query(query_vector, top_k=1, include_metadata=True)
|
472
|
+
|
473
|
+
assert isinstance(results, QueryResponse)
|
474
|
+
assert len(results.matches) >= 0
|
475
|
+
|
476
|
+
if results.matches:
|
477
|
+
match = results.matches[0]
|
478
|
+
assert match.id in ["doc1", "doc2"]
|
479
|
+
assert "Title" in match.metadata or "title" in match.metadata
|
480
|
+
assert "Content" in match.metadata or "content" in match.metadata
|
481
|
+
|
482
|
+
finally:
|
483
|
+
# Clean up
|
484
|
+
test_client.execute("DROP TABLE test_case_vectors")
|
485
|
+
test_client.execute("DROP DATABASE case_test")
|
486
|
+
|
487
|
+
@pytest.mark.asyncio
|
488
|
+
async def test_case_insensitive_column_names_async(self, test_async_client):
|
489
|
+
"""Test that column names are handled case-insensitively in async client"""
|
490
|
+
# Create a test table with mixed case column names
|
491
|
+
await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_case_test")
|
492
|
+
await test_async_client.execute("USE async_case_test")
|
493
|
+
|
494
|
+
await test_async_client.execute(
|
495
|
+
"""
|
496
|
+
CREATE TABLE IF NOT EXISTS test_case_vectors_async (
|
497
|
+
ID VARCHAR(50) PRIMARY KEY,
|
498
|
+
Title VARCHAR(200),
|
499
|
+
Content TEXT,
|
500
|
+
Embedding vecf32(32)
|
501
|
+
)
|
502
|
+
"""
|
503
|
+
)
|
504
|
+
|
505
|
+
# Create vector index
|
506
|
+
await test_async_client.vector_ops.create_ivf(
|
507
|
+
"test_case_vectors_async",
|
508
|
+
name="idx_case_embedding_async",
|
509
|
+
column="Embedding",
|
510
|
+
lists=5,
|
511
|
+
)
|
512
|
+
|
513
|
+
# Insert test data
|
514
|
+
await test_async_client.execute(
|
515
|
+
"""
|
516
|
+
INSERT INTO test_case_vectors_async (ID, Title, Content, Embedding) VALUES
|
517
|
+
('doc1', 'Test Document 1', 'Content 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2]'),
|
518
|
+
('doc2', 'Test Document 2', 'Content 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3]')
|
519
|
+
"""
|
520
|
+
)
|
521
|
+
|
522
|
+
try:
|
523
|
+
# Test with different case variations of vector column name
|
524
|
+
test_cases = [
|
525
|
+
"Embedding", # Original case
|
526
|
+
"embedding", # Lowercase
|
527
|
+
"EMBEDDING", # Uppercase
|
528
|
+
"Embedding", # Mixed case
|
529
|
+
]
|
530
|
+
|
531
|
+
for vector_col in test_cases:
|
532
|
+
# Get PineconeCompatibleIndex object
|
533
|
+
index = test_async_client.get_pinecone_index("test_case_vectors_async", vector_column=vector_col)
|
534
|
+
|
535
|
+
# Test that metadata columns are correctly identified (case-insensitive)
|
536
|
+
metadata_cols = await index._get_metadata_columns_async()
|
537
|
+
assert "Title" in metadata_cols or "title" in metadata_cols
|
538
|
+
assert "Content" in metadata_cols or "content" in metadata_cols
|
539
|
+
assert len(metadata_cols) == 2 # Should exclude ID and Embedding
|
540
|
+
|
541
|
+
# Test async query functionality
|
542
|
+
query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 3 + [0.1, 0.2]
|
543
|
+
results = await index.query_async(query_vector, top_k=1, include_metadata=True)
|
544
|
+
|
545
|
+
assert isinstance(results, QueryResponse)
|
546
|
+
assert len(results.matches) >= 0
|
547
|
+
|
548
|
+
if results.matches:
|
549
|
+
match = results.matches[0]
|
550
|
+
assert match.id in ["doc1", "doc2"]
|
551
|
+
assert "Title" in match.metadata or "title" in match.metadata
|
552
|
+
assert "Content" in match.metadata or "content" in match.metadata
|
553
|
+
|
554
|
+
finally:
|
555
|
+
# Clean up
|
556
|
+
await test_async_client.execute("DROP TABLE test_case_vectors_async")
|
557
|
+
await test_async_client.execute("DROP DATABASE async_case_test")
|