matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,557 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # Copyright 2021 - 2022 Matrix Origin
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ """
18
+ Test PineconeCompatibleIndex functionality - Pinecone-compatible vector search interface
19
+ """
20
+
21
+ import pytest
22
+ import pytest_asyncio
23
+ from matrixone import Client, AsyncClient
24
+ from matrixone.search_vector_index import PineconeCompatibleIndex, VectorMatch, QueryResponse
25
+
26
+
27
+ class TestPineconeCompatibleIndex:
28
+ """Test PineconeCompatibleIndex functionality"""
29
+
30
+ def test_get_pinecone_index_sync(self, test_client):
31
+ """Test getting PineconeCompatibleIndex from sync client"""
32
+ # Create a test table with vector column
33
+ test_client.execute("CREATE DATABASE IF NOT EXISTS search_vector_test")
34
+ test_client.execute("USE search_vector_test")
35
+
36
+ test_client.execute(
37
+ """
38
+ CREATE TABLE IF NOT EXISTS test_vectors (
39
+ id VARCHAR(50) PRIMARY KEY,
40
+ title VARCHAR(200),
41
+ content TEXT,
42
+ embedding vecf32(128)
43
+ )
44
+ """
45
+ )
46
+
47
+ # Create vector index
48
+ test_client.vector_ops.create_ivf("test_vectors", name="idx_embedding", column="embedding", lists=100)
49
+
50
+ try:
51
+ # Get PineconeCompatibleIndex object
52
+ index = test_client.get_pinecone_index("test_vectors", vector_column="embedding")
53
+
54
+ assert isinstance(index, PineconeCompatibleIndex)
55
+ assert index.table_name == "test_vectors"
56
+ assert index.vector_column == "embedding"
57
+ assert index._get_id_column() == "id"
58
+ assert index.metadata_columns == ["title", "content"]
59
+
60
+ finally:
61
+ # Clean up
62
+ test_client.execute("DROP TABLE test_vectors")
63
+ test_client.execute("DROP DATABASE search_vector_test")
64
+
65
+ @pytest.mark.asyncio
66
+ async def test_get_pinecone_index_async(self, test_async_client):
67
+ """Test getting PineconeCompatibleIndex from async client"""
68
+ # Create a test table with vector column
69
+ await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_search_vector_test")
70
+ await test_async_client.execute("USE async_search_vector_test")
71
+
72
+ await test_async_client.execute(
73
+ """
74
+ CREATE TABLE IF NOT EXISTS test_vectors_async (
75
+ id VARCHAR(50) PRIMARY KEY,
76
+ title VARCHAR(200),
77
+ content TEXT,
78
+ embedding vecf32(128)
79
+ )
80
+ """
81
+ )
82
+
83
+ # Create vector index
84
+ await test_async_client.vector_ops.create_ivf(
85
+ "test_vectors_async",
86
+ name="idx_embedding_async",
87
+ column="embedding",
88
+ lists=100,
89
+ )
90
+
91
+ try:
92
+ # Get PineconeCompatibleIndex object
93
+ index = test_async_client.get_pinecone_index("test_vectors_async", vector_column="embedding")
94
+
95
+ assert isinstance(index, PineconeCompatibleIndex)
96
+ assert index.table_name == "test_vectors_async"
97
+ assert index.vector_column == "embedding"
98
+ assert await index._get_id_column_async() == "id"
99
+ assert await index._get_metadata_columns_async() == ["title", "content"]
100
+
101
+ finally:
102
+ # Clean up
103
+ await test_async_client.execute("DROP TABLE test_vectors_async")
104
+ await test_async_client.execute("DROP DATABASE async_search_vector_test")
105
+
106
+ def test_parse_index_info(self, test_client):
107
+ """Test parsing index information from CREATE TABLE statement"""
108
+ # Create a test table with vector column and index
109
+ test_client.execute("CREATE DATABASE IF NOT EXISTS parse_test")
110
+ test_client.execute("USE parse_test")
111
+
112
+ # Drop table if exists to ensure clean state
113
+ test_client.execute("DROP TABLE IF EXISTS test_parse")
114
+
115
+ test_client.execute(
116
+ """
117
+ CREATE TABLE test_parse (
118
+ id BIGINT PRIMARY KEY,
119
+ title VARCHAR(200),
120
+ embedding vecf32(256)
121
+ )
122
+ """
123
+ )
124
+
125
+ # Create vector index
126
+ test_client.vector_ops.create_hnsw("test_parse", name="idx_hnsw", column="embedding", m=16, ef_construction=200)
127
+
128
+ try:
129
+ # Get PineconeCompatibleIndex object
130
+ index = test_client.get_pinecone_index("test_parse", vector_column="embedding")
131
+
132
+ # Test parsing index info
133
+ index_info = index._get_index_info()
134
+
135
+ assert index_info["dimensions"] == 256
136
+ assert index_info["algorithm"] == "hnsw"
137
+ assert index_info["metric"] == "l2" # vector_l2_ops maps to l2
138
+ assert "m" in index_info["parameters"]
139
+ assert "ef_construction" in index_info["parameters"]
140
+
141
+ finally:
142
+ # Clean up
143
+ test_client.execute("DROP TABLE test_parse")
144
+ test_client.execute("DROP DATABASE parse_test")
145
+
146
+ def test_query_basic(self, test_client):
147
+ """Test basic query functionality"""
148
+ # Create a test table with vector column
149
+ test_client.execute("CREATE DATABASE IF NOT EXISTS query_test")
150
+ test_client.execute("USE query_test")
151
+
152
+ test_client.execute(
153
+ """
154
+ CREATE TABLE IF NOT EXISTS test_query (
155
+ id VARCHAR(50) PRIMARY KEY,
156
+ title VARCHAR(200),
157
+ category VARCHAR(50),
158
+ embedding vecf32(64)
159
+ )
160
+ """
161
+ )
162
+
163
+ # Create vector index
164
+ test_client.vector_ops.create_ivf("test_query", name="idx_query", column="embedding", lists=10)
165
+
166
+ # Insert test data
167
+ test_client.execute(
168
+ """
169
+ INSERT INTO test_query (id, title, category, embedding) VALUES
170
+ ('doc1', 'Machine Learning Guide', 'AI', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
171
+ ('doc2', 'Python Programming', 'Programming', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]'),
172
+ ('doc3', 'Database Design', 'Database', '[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]')
173
+ """
174
+ )
175
+
176
+ try:
177
+ # Get PineconeCompatibleIndex object
178
+ index = test_client.get_pinecone_index("test_query", vector_column="embedding")
179
+
180
+ # Test query
181
+ query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [
182
+ 0.1,
183
+ 0.2,
184
+ 0.3,
185
+ 0.4,
186
+ ]
187
+ results = index.query(query_vector, top_k=2, include_metadata=True)
188
+
189
+ assert isinstance(results, QueryResponse)
190
+ assert len(results.matches) <= 2
191
+ assert results.namespace == ""
192
+ assert results.usage is not None
193
+
194
+ for match in results.matches:
195
+ assert isinstance(match, VectorMatch)
196
+ assert match.id in ["doc1", "doc2", "doc3"]
197
+ assert isinstance(match.score, float)
198
+ assert "title" in match.metadata
199
+ assert "category" in match.metadata
200
+
201
+ finally:
202
+ # Clean up
203
+ test_client.execute("DROP TABLE test_query")
204
+ test_client.execute("DROP DATABASE query_test")
205
+
206
+ @pytest.mark.asyncio
207
+ async def test_query_async(self, test_async_client):
208
+ """Test async query functionality"""
209
+ # Create a test table with vector column
210
+ await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_query_test")
211
+ await test_async_client.execute("USE async_query_test")
212
+
213
+ await test_async_client.execute(
214
+ """
215
+ CREATE TABLE IF NOT EXISTS test_query_async (
216
+ id VARCHAR(50) PRIMARY KEY,
217
+ title VARCHAR(200),
218
+ embedding vecf32(32)
219
+ )
220
+ """
221
+ )
222
+
223
+ # Create vector index
224
+ await test_async_client.vector_ops.create_ivf(
225
+ "test_query_async", name="idx_query_async", column="embedding", lists=5
226
+ )
227
+
228
+ # Insert test data
229
+ await test_async_client.execute(
230
+ """
231
+ INSERT INTO test_query_async (id, title, embedding) VALUES
232
+ ('doc1', 'Test Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2]'),
233
+ ('doc2', 'Test Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3]')
234
+ """
235
+ )
236
+
237
+ try:
238
+ # Get PineconeCompatibleIndex object
239
+ index = test_async_client.get_pinecone_index("test_query_async", vector_column="embedding")
240
+
241
+ # Test async query
242
+ query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 3 + [0.1, 0.2]
243
+ results = await index.query_async(query_vector, top_k=1, include_metadata=True)
244
+
245
+ assert isinstance(results, QueryResponse)
246
+ assert len(results.matches) <= 1
247
+ assert results.namespace == ""
248
+
249
+ for match in results.matches:
250
+ assert isinstance(match, VectorMatch)
251
+ assert match.id in ["doc1", "doc2"]
252
+ assert isinstance(match.score, float)
253
+ assert "title" in match.metadata
254
+
255
+ finally:
256
+ # Clean up
257
+ await test_async_client.execute("DROP TABLE test_query_async")
258
+ await test_async_client.execute("DROP DATABASE async_query_test")
259
+
260
+ def test_delete_functionality(self, test_client):
261
+ """Test delete functionality (IVF index only)"""
262
+ # Create a test table with vector column
263
+ test_client.execute("CREATE DATABASE IF NOT EXISTS upsert_test")
264
+ test_client.execute("USE upsert_test")
265
+
266
+ # Drop table if exists to ensure clean state
267
+ test_client.execute("DROP TABLE IF EXISTS test_upsert")
268
+
269
+ test_client.execute(
270
+ """
271
+ CREATE TABLE test_upsert (
272
+ id VARCHAR(50) PRIMARY KEY,
273
+ title VARCHAR(200),
274
+ embedding vecf32(16)
275
+ )
276
+ """
277
+ )
278
+
279
+ # Create IVF vector index (required for upsert/delete operations)
280
+ test_client.vector_ops.create_ivf("test_upsert", name="idx_upsert", column="embedding", lists=5)
281
+
282
+ try:
283
+ # Get PineconeCompatibleIndex object
284
+ index = test_client.get_pinecone_index("test_upsert", vector_column="embedding")
285
+
286
+ # Insert test data directly using SQL
287
+ test_client.execute(
288
+ """
289
+ INSERT INTO test_upsert (id, title, embedding) VALUES
290
+ ('test1', 'Test Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]'),
291
+ ('test2', 'Test Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]')
292
+ """
293
+ )
294
+
295
+ # Verify data was inserted
296
+ count_result = test_client.execute("SELECT COUNT(*) FROM test_upsert")
297
+ assert count_result.rows[0][0] == 2
298
+
299
+ # Test delete with string IDs
300
+ index.delete(["test1"])
301
+
302
+ # Verify data was deleted
303
+ count_result = test_client.execute("SELECT COUNT(*) FROM test_upsert")
304
+ assert count_result.rows[0][0] == 1
305
+
306
+ # Test delete with mixed ID types (if we had more data)
307
+ # This demonstrates that delete can handle different ID types
308
+ # index.delete([1, "test2", 3.14]) # Mixed types
309
+
310
+ finally:
311
+ # Clean up
312
+ test_client.execute("DROP TABLE test_upsert")
313
+ test_client.execute("DROP DATABASE upsert_test")
314
+
315
+ def test_describe_index_stats(self, test_client):
316
+ """Test describe index stats functionality"""
317
+ # Create a test table with vector column
318
+ test_client.execute("CREATE DATABASE IF NOT EXISTS stats_test")
319
+ test_client.execute("USE stats_test")
320
+
321
+ test_client.execute(
322
+ """
323
+ CREATE TABLE IF NOT EXISTS test_stats (
324
+ id VARCHAR(50) PRIMARY KEY,
325
+ title VARCHAR(200),
326
+ embedding vecf32(64)
327
+ )
328
+ """
329
+ )
330
+
331
+ # Create vector index
332
+ test_client.vector_ops.create_ivf("test_stats", name="idx_stats", column="embedding", lists=10)
333
+
334
+ # Insert test data
335
+ test_client.execute(
336
+ """
337
+ INSERT INTO test_stats (id, title, embedding) VALUES
338
+ ('doc1', 'Document 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
339
+ ('doc2', 'Document 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]')
340
+ """
341
+ )
342
+
343
+ try:
344
+ # Get PineconeCompatibleIndex object
345
+ index = test_client.get_pinecone_index("test_stats", vector_column="embedding")
346
+
347
+ # Test describe index stats
348
+ stats = index.describe_index_stats()
349
+
350
+ assert isinstance(stats, dict)
351
+ assert "dimension" in stats
352
+ assert "total_vector_count" in stats
353
+ assert "namespaces" in stats
354
+ assert stats["dimension"] == 64
355
+ assert stats["total_vector_count"] == 2
356
+ assert "" in stats["namespaces"]
357
+ assert stats["namespaces"][""]["vector_count"] == 2
358
+
359
+ finally:
360
+ # Clean up
361
+ test_client.execute("DROP TABLE test_stats")
362
+ test_client.execute("DROP DATABASE stats_test")
363
+
364
+ def test_hnsw_upsert_not_supported(self, test_client):
365
+ """Test that HNSW index does not support upsert operations"""
366
+ # Create a test table with vector column
367
+ test_client.execute("CREATE DATABASE IF NOT EXISTS hnsw_upsert_test")
368
+ test_client.execute("USE hnsw_upsert_test")
369
+
370
+ # Drop table if exists to ensure clean state
371
+ test_client.execute("DROP TABLE IF EXISTS test_hnsw_upsert")
372
+
373
+ test_client.execute(
374
+ """
375
+ CREATE TABLE test_hnsw_upsert (
376
+ id BIGINT PRIMARY KEY,
377
+ title VARCHAR(200),
378
+ embedding vecf32(64)
379
+ )
380
+ """
381
+ )
382
+
383
+ # Create HNSW vector index
384
+ test_client.vector_ops.create_hnsw(
385
+ "test_hnsw_upsert",
386
+ name="idx_hnsw_upsert",
387
+ column="embedding",
388
+ m=16,
389
+ ef_construction=200,
390
+ )
391
+
392
+ try:
393
+ # Get PineconeCompatibleIndex object
394
+ index = test_client.get_pinecone_index("test_hnsw_upsert", vector_column="embedding")
395
+
396
+ # Test that HNSW index only supports query operations
397
+ # (upsert and delete are not supported for HNSW indexes)
398
+
399
+ # Test that delete also raises ValueError for HNSW index
400
+ with pytest.raises(ValueError, match="HNSW index does not support delete operations"):
401
+ index.delete(["test1"])
402
+
403
+ # Test with different ID types
404
+ with pytest.raises(ValueError, match="HNSW index does not support delete operations"):
405
+ index.delete([1, 2, 3]) # Integer IDs
406
+
407
+ finally:
408
+ # Clean up
409
+ test_client.execute("DROP TABLE test_hnsw_upsert")
410
+ test_client.execute("DROP DATABASE hnsw_upsert_test")
411
+
412
+
413
+ class TestPineconeCompatibleIndexCaseInsensitive:
414
+ """Test case-insensitive column name handling in PineconeCompatibleIndex"""
415
+
416
+ def test_case_insensitive_column_names_sync(self, test_client):
417
+ """Test that column names are handled case-insensitively in sync client"""
418
+ # Create a test table with mixed case column names
419
+ test_client.execute("CREATE DATABASE IF NOT EXISTS case_test")
420
+ test_client.execute("USE case_test")
421
+
422
+ test_client.execute(
423
+ """
424
+ CREATE TABLE IF NOT EXISTS test_case_vectors (
425
+ ID VARCHAR(50) PRIMARY KEY,
426
+ Title VARCHAR(200),
427
+ Content TEXT,
428
+ Embedding vecf32(64)
429
+ )
430
+ """
431
+ )
432
+
433
+ # Create vector index
434
+ test_client.vector_ops.create_ivf("test_case_vectors", name="idx_case_embedding", column="Embedding", lists=10)
435
+
436
+ # Insert test data
437
+ test_client.execute(
438
+ """
439
+ INSERT INTO test_case_vectors (ID, Title, Content, Embedding) VALUES
440
+ ('doc1', 'Test Document 1', 'Content 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4]'),
441
+ ('doc2', 'Test Document 2', 'Content 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5]')
442
+ """
443
+ )
444
+
445
+ try:
446
+ # Test with different case variations of vector column name
447
+ test_cases = [
448
+ "Embedding", # Original case
449
+ "embedding", # Lowercase
450
+ "EMBEDDING", # Uppercase
451
+ "Embedding", # Mixed case
452
+ ]
453
+
454
+ for vector_col in test_cases:
455
+ # Get PineconeCompatibleIndex object
456
+ index = test_client.get_pinecone_index("test_case_vectors", vector_column=vector_col)
457
+
458
+ # Test that metadata columns are correctly identified (case-insensitive)
459
+ metadata_cols = index.metadata_columns
460
+ assert "Title" in metadata_cols or "title" in metadata_cols
461
+ assert "Content" in metadata_cols or "content" in metadata_cols
462
+ assert len(metadata_cols) == 2 # Should exclude ID and Embedding
463
+
464
+ # Test query functionality
465
+ query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 6 + [
466
+ 0.1,
467
+ 0.2,
468
+ 0.3,
469
+ 0.4,
470
+ ]
471
+ results = index.query(query_vector, top_k=1, include_metadata=True)
472
+
473
+ assert isinstance(results, QueryResponse)
474
+ assert len(results.matches) >= 0
475
+
476
+ if results.matches:
477
+ match = results.matches[0]
478
+ assert match.id in ["doc1", "doc2"]
479
+ assert "Title" in match.metadata or "title" in match.metadata
480
+ assert "Content" in match.metadata or "content" in match.metadata
481
+
482
+ finally:
483
+ # Clean up
484
+ test_client.execute("DROP TABLE test_case_vectors")
485
+ test_client.execute("DROP DATABASE case_test")
486
+
487
+ @pytest.mark.asyncio
488
+ async def test_case_insensitive_column_names_async(self, test_async_client):
489
+ """Test that column names are handled case-insensitively in async client"""
490
+ # Create a test table with mixed case column names
491
+ await test_async_client.execute("CREATE DATABASE IF NOT EXISTS async_case_test")
492
+ await test_async_client.execute("USE async_case_test")
493
+
494
+ await test_async_client.execute(
495
+ """
496
+ CREATE TABLE IF NOT EXISTS test_case_vectors_async (
497
+ ID VARCHAR(50) PRIMARY KEY,
498
+ Title VARCHAR(200),
499
+ Content TEXT,
500
+ Embedding vecf32(32)
501
+ )
502
+ """
503
+ )
504
+
505
+ # Create vector index
506
+ await test_async_client.vector_ops.create_ivf(
507
+ "test_case_vectors_async",
508
+ name="idx_case_embedding_async",
509
+ column="Embedding",
510
+ lists=5,
511
+ )
512
+
513
+ # Insert test data
514
+ await test_async_client.execute(
515
+ """
516
+ INSERT INTO test_case_vectors_async (ID, Title, Content, Embedding) VALUES
517
+ ('doc1', 'Test Document 1', 'Content 1', '[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2]'),
518
+ ('doc2', 'Test Document 2', 'Content 2', '[0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3]')
519
+ """
520
+ )
521
+
522
+ try:
523
+ # Test with different case variations of vector column name
524
+ test_cases = [
525
+ "Embedding", # Original case
526
+ "embedding", # Lowercase
527
+ "EMBEDDING", # Uppercase
528
+ "Embedding", # Mixed case
529
+ ]
530
+
531
+ for vector_col in test_cases:
532
+ # Get PineconeCompatibleIndex object
533
+ index = test_async_client.get_pinecone_index("test_case_vectors_async", vector_column=vector_col)
534
+
535
+ # Test that metadata columns are correctly identified (case-insensitive)
536
+ metadata_cols = await index._get_metadata_columns_async()
537
+ assert "Title" in metadata_cols or "title" in metadata_cols
538
+ assert "Content" in metadata_cols or "content" in metadata_cols
539
+ assert len(metadata_cols) == 2 # Should exclude ID and Embedding
540
+
541
+ # Test async query functionality
542
+ query_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] * 3 + [0.1, 0.2]
543
+ results = await index.query_async(query_vector, top_k=1, include_metadata=True)
544
+
545
+ assert isinstance(results, QueryResponse)
546
+ assert len(results.matches) >= 0
547
+
548
+ if results.matches:
549
+ match = results.matches[0]
550
+ assert match.id in ["doc1", "doc2"]
551
+ assert "Title" in match.metadata or "title" in match.metadata
552
+ assert "Content" in match.metadata or "content" in match.metadata
553
+
554
+ finally:
555
+ # Clean up
556
+ await test_async_client.execute("DROP TABLE test_case_vectors_async")
557
+ await test_async_client.execute("DROP DATABASE async_case_test")