matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,708 @@
1
+ # Copyright 2021 - 2022 Matrix Origin
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Online tests for Pinecone-compatible filter functionality in vector search.
17
+ """
18
+
19
+ import pytest
20
+ import asyncio
21
+ from typing import List, Dict, Any
22
+ from matrixone import Client, AsyncClient
23
+ from matrixone.sqlalchemy_ext import create_vector_column
24
+ from sqlalchemy import Column, Integer, String, Float, DateTime, create_engine
25
+ from sqlalchemy.orm import sessionmaker, declarative_base
26
+ from datetime import datetime
27
+
28
+ Base = declarative_base()
29
+
30
+
31
+ class MovieDocument(Base):
32
+ """Test model for movie documents with metadata"""
33
+
34
+ __tablename__ = 'test_movies'
35
+
36
+ id = Column(Integer, primary_key=True)
37
+ title = Column(String(200))
38
+ genre = Column(String(50))
39
+ year = Column(Integer)
40
+ rating = Column(Float)
41
+ director = Column(String(100))
42
+ embedding = create_vector_column(64, "f32")
43
+ created_at = Column(DateTime, default=datetime.now)
44
+
45
+
46
+ class TestPineconeFilter:
47
+ """Test Pinecone-compatible filter functionality"""
48
+
49
+ @pytest.fixture(scope="class")
50
+ def client(self):
51
+ """Create test client"""
52
+ return Client(host="127.0.0.1", port=6001, user="dump", password="111", database="test")
53
+
54
+ @pytest.fixture(scope="class")
55
+ def async_client(self):
56
+ """Create test async client"""
57
+ return AsyncClient()
58
+
59
+ @pytest.fixture(scope="class")
60
+ def test_database(self, client):
61
+ """Create test database"""
62
+ db_name = "test_pinecone_filter_db"
63
+ try:
64
+ client.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
65
+ client.execute(f"USE {db_name}")
66
+ yield db_name
67
+ finally:
68
+ try:
69
+ client.execute(f"DROP DATABASE IF EXISTS {db_name}")
70
+ except Exception as e:
71
+ print(f"Cleanup failed: {e}")
72
+
73
+ @pytest.fixture(scope="class")
74
+ def test_data_setup(self, client, test_database):
75
+ """Set up test data"""
76
+ # Create tables
77
+ client.create_all(Base)
78
+
79
+ # Enable vector index
80
+ client.vector_ops.enable_ivf()
81
+
82
+ # Create vector index
83
+ client.vector_ops.create_ivf(
84
+ "test_movies",
85
+ name="movies_ivf_index",
86
+ column="embedding",
87
+ op_type="vector_l2_ops",
88
+ )
89
+
90
+ # Insert test data
91
+ test_movies = [
92
+ {
93
+ "id": 1,
94
+ "title": "The Matrix",
95
+ "genre": "action",
96
+ "year": 1999,
97
+ "rating": 8.7,
98
+ "director": "Lana Wachowski",
99
+ "embedding": [0.1] * 64,
100
+ },
101
+ {
102
+ "id": 2,
103
+ "title": "Inception",
104
+ "genre": "sci-fi",
105
+ "year": 2010,
106
+ "rating": 8.8,
107
+ "director": "Christopher Nolan",
108
+ "embedding": [0.2] * 64,
109
+ },
110
+ {
111
+ "id": 3,
112
+ "title": "The Dark Knight",
113
+ "genre": "action",
114
+ "year": 2008,
115
+ "rating": 9.0,
116
+ "director": "Christopher Nolan",
117
+ "embedding": [0.3] * 64,
118
+ },
119
+ {
120
+ "id": 4,
121
+ "title": "Interstellar",
122
+ "genre": "sci-fi",
123
+ "year": 2014,
124
+ "rating": 8.6,
125
+ "director": "Christopher Nolan",
126
+ "embedding": [0.4] * 64,
127
+ },
128
+ {
129
+ "id": 5,
130
+ "title": "Pulp Fiction",
131
+ "genre": "crime",
132
+ "year": 1994,
133
+ "rating": 8.9,
134
+ "director": "Quentin Tarantino",
135
+ "embedding": [0.5] * 64,
136
+ },
137
+ ]
138
+
139
+ client.vector_ops.batch_insert("test_movies", test_movies)
140
+
141
+ yield test_movies
142
+
143
+ # Cleanup
144
+ try:
145
+ client.drop_all(Base)
146
+ except Exception as e:
147
+ print(f"Cleanup failed: {e}")
148
+
149
+ def test_basic_filter_equality(self, client, test_data_setup):
150
+ """Test basic equality filter"""
151
+ # Create Pinecone-compatible index
152
+ index = client.get_pinecone_index("test_movies", "embedding")
153
+
154
+ # Test filter by genre
155
+ query_vector = [0.15] * 64
156
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": "action"})
157
+
158
+ assert len(results.matches) == 2 # The Matrix and The Dark Knight
159
+ for match in results.matches:
160
+ assert match.metadata["genre"] == "action"
161
+
162
+ def test_filter_with_operators(self, client, test_data_setup):
163
+ """Test filter with various operators"""
164
+ index = client.get_pinecone_index("test_movies", "embedding")
165
+ query_vector = [0.15] * 64
166
+
167
+ # Test $gt operator
168
+ results = index.query(vector=query_vector, top_k=10, filter={"year": {"$gt": 2000}})
169
+
170
+ assert len(results.matches) >= 3 # Inception, The Dark Knight, Interstellar
171
+ for match in results.matches:
172
+ assert match.metadata["year"] > 2000
173
+
174
+ # Test $gte operator
175
+ results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$gte": 8.8}})
176
+
177
+ assert len(results.matches) >= 2 # Inception and The Dark Knight
178
+ for match in results.matches:
179
+ assert match.metadata["rating"] >= 8.8
180
+
181
+ def test_filter_with_in_operator(self, client, test_data_setup):
182
+ """Test filter with $in operator"""
183
+ index = client.get_pinecone_index("test_movies", "embedding")
184
+ query_vector = [0.15] * 64
185
+
186
+ # Test $in operator
187
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$in": ["action", "sci-fi"]}})
188
+
189
+ assert len(results.matches) == 4 # The Matrix, Inception, The Dark Knight, Interstellar
190
+ for match in results.matches:
191
+ assert match.metadata["genre"] in ["action", "sci-fi"]
192
+
193
+ def test_filter_with_and_operator(self, client, test_data_setup):
194
+ """Test filter with $and operator"""
195
+ index = client.get_pinecone_index("test_movies", "embedding")
196
+ query_vector = [0.15] * 64
197
+
198
+ # Test $and operator
199
+ results = index.query(
200
+ vector=query_vector,
201
+ top_k=10,
202
+ filter={"$and": [{"genre": "sci-fi"}, {"year": {"$gte": 2010}}]},
203
+ )
204
+
205
+ assert len(results.matches) == 2 # Inception and Interstellar
206
+ for match in results.matches:
207
+ assert match.metadata["genre"] == "sci-fi"
208
+ assert match.metadata["year"] >= 2010
209
+
210
+ def test_filter_with_or_operator(self, client, test_data_setup):
211
+ """Test filter with $or operator"""
212
+ index = client.get_pinecone_index("test_movies", "embedding")
213
+ query_vector = [0.15] * 64
214
+
215
+ # Test $or operator
216
+ results = index.query(
217
+ vector=query_vector,
218
+ top_k=10,
219
+ filter={"$or": [{"director": "Christopher Nolan"}, {"rating": {"$gte": 8.9}}]},
220
+ )
221
+
222
+ assert len(results.matches) >= 3 # Inception, The Dark Knight, Interstellar, Pulp Fiction
223
+ for match in results.matches:
224
+ assert match.metadata["director"] == "Christopher Nolan" or match.metadata["rating"] >= 8.9
225
+
226
+ def test_filter_with_multiple_conditions(self, client, test_data_setup):
227
+ """Test filter with multiple conditions"""
228
+ index = client.get_pinecone_index("test_movies", "embedding")
229
+ query_vector = [0.15] * 64
230
+
231
+ # Test multiple conditions
232
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": "action", "year": {"$gte": 2000}})
233
+
234
+ assert len(results.matches) == 1 # Only The Dark Knight
235
+ for match in results.matches:
236
+ assert match.metadata["genre"] == "action"
237
+ assert match.metadata["year"] >= 2000
238
+
239
+ def test_filter_with_nin_operator(self, client, test_data_setup):
240
+ """Test filter with $nin (not in) operator"""
241
+ index = client.get_pinecone_index("test_movies", "embedding")
242
+ query_vector = [0.15] * 64
243
+
244
+ # Test $nin operator
245
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$nin": ["action", "sci-fi"]}})
246
+
247
+ assert len(results.matches) == 1 # Only Pulp Fiction
248
+ for match in results.matches:
249
+ assert match.metadata["genre"] not in ["action", "sci-fi"]
250
+
251
+ def test_filter_with_range_operators(self, client, test_data_setup):
252
+ """Test filter with range operators"""
253
+ index = client.get_pinecone_index("test_movies", "embedding")
254
+ query_vector = [0.15] * 64
255
+
256
+ # Test $lt operator
257
+ results = index.query(vector=query_vector, top_k=10, filter={"year": {"$lt": 2000}})
258
+
259
+ assert len(results.matches) == 2 # The Matrix and Pulp Fiction
260
+ for match in results.matches:
261
+ assert match.metadata["year"] < 2000
262
+
263
+ # Test $lte operator
264
+ results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$lte": 8.7}})
265
+
266
+ assert len(results.matches) >= 2 # The Matrix and Interstellar
267
+ for match in results.matches:
268
+ assert match.metadata["rating"] <= 8.7
269
+
270
+ def test_filter_with_ne_operator(self, client, test_data_setup):
271
+ """Test filter with $ne (not equal) operator"""
272
+ index = client.get_pinecone_index("test_movies", "embedding")
273
+ query_vector = [0.15] * 64
274
+
275
+ # Test $ne operator
276
+ results = index.query(vector=query_vector, top_k=10, filter={"director": {"$ne": "Christopher Nolan"}})
277
+
278
+ assert len(results.matches) == 2 # The Matrix and Pulp Fiction
279
+ for match in results.matches:
280
+ assert match.metadata["director"] != "Christopher Nolan"
281
+
282
+ @pytest.mark.asyncio
283
+ async def test_async_filter_functionality(self, async_client, test_data_setup):
284
+ """Test async filter functionality"""
285
+ # Connect async client
286
+ await async_client.connect(
287
+ host="127.0.0.1",
288
+ port=6001,
289
+ user="dump",
290
+ password="111",
291
+ database="test_pinecone_filter_db",
292
+ )
293
+
294
+ try:
295
+ # Create async Pinecone-compatible index
296
+ index = async_client.get_pinecone_index("test_movies", "embedding")
297
+
298
+ # Test basic filter
299
+ query_vector = [0.15] * 64
300
+ results = await index.query_async(vector=query_vector, top_k=10, filter={"genre": "action"})
301
+
302
+ assert len(results.matches) == 2 # The Matrix and The Dark Knight
303
+ for match in results.matches:
304
+ assert match.metadata["genre"] == "action"
305
+ finally:
306
+ # Properly disconnect async client to avoid event loop warnings
307
+ await async_client.disconnect()
308
+
309
+ def test_filter_with_complex_nested_conditions(self, client, test_data_setup):
310
+ """Test filter with complex nested conditions"""
311
+ index = client.get_pinecone_index("test_movies", "embedding")
312
+ query_vector = [0.15] * 64
313
+
314
+ # Test complex nested conditions
315
+ results = index.query(
316
+ vector=query_vector,
317
+ top_k=10,
318
+ filter={
319
+ "$and": [
320
+ {"genre": {"$in": ["action", "sci-fi"]}},
321
+ {"$or": [{"year": {"$gte": 2010}}, {"rating": {"$gte": 8.9}}]},
322
+ ]
323
+ },
324
+ )
325
+
326
+ # Should match: Inception (sci-fi, 2010), The Dark Knight (action, rating 9.0), Interstellar (sci-fi, 2014)
327
+ assert len(results.matches) >= 3
328
+ for match in results.matches:
329
+ assert match.metadata["genre"] in ["action", "sci-fi"]
330
+ assert match.metadata["year"] >= 2010 or match.metadata["rating"] >= 8.9
331
+
332
+ def test_filter_with_no_results(self, client, test_data_setup):
333
+ """Test filter that returns no results"""
334
+ index = client.get_pinecone_index("test_movies", "embedding")
335
+ query_vector = [0.15] * 64
336
+
337
+ # Test filter that should return no results
338
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": "horror"})
339
+
340
+ assert len(results.matches) == 0
341
+
342
+ def test_filter_without_filter_parameter(self, client, test_data_setup):
343
+ """Test query without filter parameter (should return all results)"""
344
+ index = client.get_pinecone_index("test_movies", "embedding")
345
+ query_vector = [0.15] * 64
346
+
347
+ # Test without filter
348
+ results = index.query(vector=query_vector, top_k=10)
349
+
350
+ assert len(results.matches) == 5 # All movies
351
+ assert results.usage["read_units"] == 5
352
+
353
+ def test_query_with_include_metadata_false(self, client, test_data_setup):
354
+ """Test query with include_metadata=False"""
355
+ index = client.get_pinecone_index("test_movies", "embedding")
356
+ query_vector = [0.15] * 64
357
+
358
+ results = index.query(vector=query_vector, top_k=3, include_metadata=False)
359
+
360
+ assert len(results.matches) == 3
361
+ for match in results.matches:
362
+ assert match.metadata == {} # No metadata should be included
363
+ assert match.id is not None
364
+ assert match.score is not None
365
+
366
+ def test_query_with_include_values_true(self, client, test_data_setup):
367
+ """Test query with include_values=True"""
368
+ index = client.get_pinecone_index("test_movies", "embedding")
369
+ query_vector = [0.15] * 64
370
+
371
+ results = index.query(vector=query_vector, top_k=2, include_values=True)
372
+
373
+ assert len(results.matches) == 2
374
+ for match in results.matches:
375
+ assert match.values is not None
376
+ # Values might be returned as string representation of vector
377
+ if isinstance(match.values, str):
378
+ # Parse string representation like "[0.1,0.2,...]"
379
+ import ast
380
+
381
+ values_list = ast.literal_eval(match.values)
382
+ assert isinstance(values_list, list)
383
+ assert len(values_list) == 64
384
+ else:
385
+ assert isinstance(match.values, list)
386
+ assert len(match.values) == 64
387
+
388
+ def test_query_with_include_metadata_and_values_false(self, client, test_data_setup):
389
+ """Test query with both include_metadata=False and include_values=False"""
390
+ index = client.get_pinecone_index("test_movies", "embedding")
391
+ query_vector = [0.15] * 64
392
+
393
+ results = index.query(vector=query_vector, top_k=2, include_metadata=False, include_values=False)
394
+
395
+ assert len(results.matches) == 2
396
+ for match in results.matches:
397
+ assert match.metadata == {}
398
+ assert match.values is None
399
+ assert match.id is not None
400
+ assert match.score is not None
401
+
402
+ def test_query_with_top_k_one(self, client, test_data_setup):
403
+ """Test query with top_k=1"""
404
+ index = client.get_pinecone_index("test_movies", "embedding")
405
+ query_vector = [0.15] * 64
406
+
407
+ results = index.query(vector=query_vector, top_k=1)
408
+
409
+ assert len(results.matches) == 1
410
+ assert results.usage["read_units"] == 1
411
+
412
+ def test_query_with_namespace_parameter(self, client, test_data_setup):
413
+ """Test query with namespace parameter (should be ignored but not cause errors)"""
414
+ index = client.get_pinecone_index("test_movies", "embedding")
415
+ query_vector = [0.15] * 64
416
+
417
+ results = index.query(
418
+ vector=query_vector,
419
+ top_k=3,
420
+ namespace="test_namespace", # Should be ignored in MatrixOne
421
+ )
422
+
423
+ assert len(results.matches) == 3
424
+ assert results.namespace == "test_namespace"
425
+
426
+ def test_filter_with_string_numbers(self, client, test_data_setup):
427
+ """Test filter with string numbers"""
428
+ index = client.get_pinecone_index("test_movies", "embedding")
429
+ query_vector = [0.15] * 64
430
+
431
+ # Test with string year
432
+ results = index.query(vector=query_vector, top_k=10, filter={"year": "2010"})
433
+
434
+ assert len(results.matches) == 1 # Only Inception
435
+ assert results.matches[0].metadata["year"] == 2010
436
+
437
+ def test_filter_with_float_comparison(self, client, test_data_setup):
438
+ """Test filter with float comparisons"""
439
+ index = client.get_pinecone_index("test_movies", "embedding")
440
+ query_vector = [0.15] * 64
441
+
442
+ # Test with float rating
443
+ results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$gt": 8.7}})
444
+
445
+ assert len(results.matches) >= 2 # The Matrix (8.7), Inception (8.8), The Dark Knight (9.0)
446
+ for match in results.matches:
447
+ assert match.metadata["rating"] > 8.7
448
+
449
+ def test_filter_with_boolean_like_values(self, client, test_data_setup):
450
+ """Test filter with boolean-like values"""
451
+ # First, let's add some test data with boolean-like values
452
+ client.execute(
453
+ """
454
+ INSERT INTO test_movies (id, title, genre, year, rating, director, embedding) VALUES
455
+ (6, 'Test Movie 1', 'action', 2020, 7.5, 'Test Director', '[0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6]')
456
+ """
457
+ )
458
+
459
+ index = client.get_pinecone_index("test_movies", "embedding")
460
+ query_vector = [0.15] * 64
461
+
462
+ # Test with string comparison
463
+ results = index.query(vector=query_vector, top_k=10, filter={"director": "Test Director"})
464
+
465
+ assert len(results.matches) == 1
466
+ assert results.matches[0].metadata["director"] == "Test Director"
467
+
468
+ def test_filter_with_special_characters(self, client, test_data_setup):
469
+ """Test filter with special characters in values"""
470
+ # Add test data with special characters
471
+ client.execute(
472
+ """
473
+ INSERT INTO test_movies (id, title, genre, year, rating, director, embedding) VALUES
474
+ (7, 'Movie with "quotes"', 'drama', 2021, 8.0, 'Director with apostrophe''s name', '[0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7]')
475
+ """
476
+ )
477
+
478
+ index = client.get_pinecone_index("test_movies", "embedding")
479
+ query_vector = [0.15] * 64
480
+
481
+ # Test with special characters in filter
482
+ results = index.query(vector=query_vector, top_k=10, filter={"director": "Director with apostrophe's name"})
483
+
484
+ assert len(results.matches) == 1
485
+ assert results.matches[0].metadata["director"] == "Director with apostrophe's name"
486
+
487
+ def test_filter_with_large_in_list(self, client, test_data_setup):
488
+ """Test filter with large $in list"""
489
+ index = client.get_pinecone_index("test_movies", "embedding")
490
+ query_vector = [0.15] * 64
491
+
492
+ # Test with large $in list
493
+ large_list = [str(i) for i in range(1000)] # Large list
494
+ large_list.extend(["action", "sci-fi", "crime"]) # Include actual values
495
+
496
+ results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$in": large_list}})
497
+
498
+ # Should match all movies (including any added in previous tests)
499
+ assert len(results.matches) >= 5 # At least the original 5 movies
500
+
501
+ def test_filter_with_empty_in_list(self, client, test_data_setup):
502
+ """Test filter with empty $in list"""
503
+ index = client.get_pinecone_index("test_movies", "embedding")
504
+ query_vector = [0.15] * 64
505
+
506
+ results = index.query(
507
+ vector=query_vector,
508
+ top_k=10,
509
+ filter={"genre": {"$in": []}}, # Empty list should return no results
510
+ )
511
+
512
+ assert len(results.matches) == 0
513
+
514
+ def test_filter_with_empty_nin_list(self, client, test_data_setup):
515
+ """Test filter with empty $nin list"""
516
+ index = client.get_pinecone_index("test_movies", "embedding")
517
+ query_vector = [0.15] * 64
518
+
519
+ results = index.query(
520
+ vector=query_vector,
521
+ top_k=10,
522
+ filter={"genre": {"$nin": []}}, # Empty list should return all results
523
+ )
524
+
525
+ assert len(results.matches) >= 5 # All movies should match
526
+
527
+ def test_filter_with_mixed_data_types_in_list(self, client, test_data_setup):
528
+ """Test filter with mixed data types in $in list"""
529
+ index = client.get_pinecone_index("test_movies", "embedding")
530
+ query_vector = [0.15] * 64
531
+
532
+ # Test with mixed types (should work with string conversion)
533
+ results = index.query(
534
+ vector=query_vector,
535
+ top_k=10,
536
+ filter={"year": {"$in": [1999, "2010", 2008]}}, # Mixed int and string
537
+ )
538
+
539
+ assert len(results.matches) == 3 # The Matrix, Inception, The Dark Knight
540
+
541
+ def test_filter_with_deeply_nested_conditions(self, client, test_data_setup):
542
+ """Test filter with deeply nested $and and $or conditions"""
543
+ index = client.get_pinecone_index("test_movies", "embedding")
544
+ query_vector = [0.15] * 64
545
+
546
+ # Test deeply nested conditions
547
+ results = index.query(
548
+ vector=query_vector,
549
+ top_k=10,
550
+ filter={
551
+ "$and": [
552
+ {"$or": [{"genre": "action"}, {"genre": "sci-fi"}]},
553
+ {
554
+ "$and": [
555
+ {"year": {"$gte": 2000}},
556
+ {"$or": [{"rating": {"$gte": 8.8}}, {"director": "Christopher Nolan"}]},
557
+ ]
558
+ },
559
+ ]
560
+ },
561
+ )
562
+
563
+ # Should match: Inception (sci-fi, 2010, 8.8), The Dark Knight (action, 2008, 9.0, Christopher Nolan), Interstellar (sci-fi, 2014, Christopher Nolan)
564
+ assert len(results.matches) >= 3
565
+ for match in results.matches:
566
+ assert match.metadata["genre"] in ["action", "sci-fi"]
567
+ assert match.metadata["year"] >= 2000
568
+ assert match.metadata["rating"] >= 8.8 or match.metadata["director"] == "Christopher Nolan"
569
+
570
+ @pytest.mark.asyncio
571
+ async def test_async_query_with_all_parameters(self, async_client, test_data_setup):
572
+ """Test async query with all parameters"""
573
+ await async_client.connect(
574
+ host="127.0.0.1",
575
+ port=6001,
576
+ user="dump",
577
+ password="111",
578
+ database="test_pinecone_filter_db",
579
+ )
580
+
581
+ try:
582
+ index = async_client.get_pinecone_index("test_movies", "embedding")
583
+ query_vector = [0.15] * 64
584
+
585
+ results = await index.query_async(
586
+ vector=query_vector,
587
+ top_k=2,
588
+ include_metadata=True,
589
+ include_values=True,
590
+ filter={"genre": "action"},
591
+ namespace="async_test",
592
+ )
593
+
594
+ assert len(results.matches) >= 1 # At least one action movie
595
+ assert results.namespace == "async_test"
596
+ for match in results.matches:
597
+ assert match.metadata["genre"] == "action"
598
+ if match.values is not None:
599
+ # Values might be returned as string representation of vector
600
+ if isinstance(match.values, str):
601
+ import ast
602
+
603
+ values_list = ast.literal_eval(match.values)
604
+ assert len(values_list) == 64
605
+ else:
606
+ assert len(match.values) == 64
607
+ finally:
608
+ await async_client.disconnect()
609
+
610
+ @pytest.mark.asyncio
611
+ async def test_async_query_with_complex_filter(self, async_client, test_data_setup):
612
+ """Test async query with complex filter"""
613
+ await async_client.connect(
614
+ host="127.0.0.1",
615
+ port=6001,
616
+ user="dump",
617
+ password="111",
618
+ database="test_pinecone_filter_db",
619
+ )
620
+
621
+ try:
622
+ index = async_client.get_pinecone_index("test_movies", "embedding")
623
+ query_vector = [0.15] * 64
624
+
625
+ results = await index.query_async(
626
+ vector=query_vector,
627
+ top_k=10,
628
+ filter={
629
+ "$and": [
630
+ {"year": {"$gte": 2008}},
631
+ {"$or": [{"rating": {"$gte": 8.8}}, {"director": "Christopher Nolan"}]},
632
+ ]
633
+ },
634
+ )
635
+
636
+ # Should match: Inception (2010, 8.8), The Dark Knight (2008, 9.0, Christopher Nolan), Interstellar (2014, Christopher Nolan)
637
+ assert len(results.matches) >= 3
638
+ for match in results.matches:
639
+ assert match.metadata["year"] >= 2008
640
+ assert match.metadata["rating"] >= 8.8 or match.metadata["director"] == "Christopher Nolan"
641
+ finally:
642
+ await async_client.disconnect()
643
+
644
+ def test_edge_case_empty_table(self, client, test_data_setup):
645
+ """Test query on empty table"""
646
+ # Create empty table
647
+ client.execute(
648
+ """
649
+ CREATE TABLE empty_movies (
650
+ id INT PRIMARY KEY,
651
+ title VARCHAR(200),
652
+ embedding VECF32(64)
653
+ )
654
+ """
655
+ )
656
+
657
+ index = client.get_pinecone_index("empty_movies", "embedding")
658
+ query_vector = [0.15] * 64
659
+
660
+ results = index.query(vector=query_vector, top_k=10)
661
+
662
+ assert len(results.matches) == 0
663
+ assert results.usage["read_units"] == 0
664
+
665
+ def test_edge_case_single_record(self, client, test_data_setup):
666
+ """Test query on table with single record"""
667
+ # Create table with single record
668
+ client.execute(
669
+ """
670
+ CREATE TABLE single_movie (
671
+ id INT PRIMARY KEY,
672
+ title VARCHAR(200),
673
+ embedding VECF32(64)
674
+ )
675
+ """
676
+ )
677
+
678
+ client.execute(
679
+ """
680
+ INSERT INTO single_movie (id, title, embedding) VALUES
681
+ (1, 'Single Movie', '[0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5]')
682
+ """
683
+ )
684
+
685
+ index = client.get_pinecone_index("single_movie", "embedding")
686
+ query_vector = [0.5] * 64 # Exact match
687
+
688
+ results = index.query(vector=query_vector, top_k=10)
689
+
690
+ assert len(results.matches) == 1
691
+ assert results.matches[0].metadata["title"] == "Single Movie"
692
+ assert results.matches[0].score == 0.0 # Should be exact match
693
+
694
+ def test_performance_large_top_k(self, client, test_data_setup):
695
+ """Test performance with large top_k value"""
696
+ index = client.get_pinecone_index("test_movies", "embedding")
697
+ query_vector = [0.15] * 64
698
+
699
+ # Test with very large top_k
700
+ results = index.query(vector=query_vector, top_k=10000) # Very large number
701
+
702
+ # Should return all available records (at least 5, possibly more from previous tests)
703
+ assert len(results.matches) >= 5
704
+ assert results.usage["read_units"] == len(results.matches)
705
+
706
+
707
+ if __name__ == "__main__":
708
+ pytest.main([__file__])