matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,708 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Online tests for Pinecone-compatible filter functionality in vector search.
|
17
|
+
"""
|
18
|
+
|
19
|
+
import pytest
|
20
|
+
import asyncio
|
21
|
+
from typing import List, Dict, Any
|
22
|
+
from matrixone import Client, AsyncClient
|
23
|
+
from matrixone.sqlalchemy_ext import create_vector_column
|
24
|
+
from sqlalchemy import Column, Integer, String, Float, DateTime, create_engine
|
25
|
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
26
|
+
from datetime import datetime
|
27
|
+
|
28
|
+
Base = declarative_base()
|
29
|
+
|
30
|
+
|
31
|
+
class MovieDocument(Base):
|
32
|
+
"""Test model for movie documents with metadata"""
|
33
|
+
|
34
|
+
__tablename__ = 'test_movies'
|
35
|
+
|
36
|
+
id = Column(Integer, primary_key=True)
|
37
|
+
title = Column(String(200))
|
38
|
+
genre = Column(String(50))
|
39
|
+
year = Column(Integer)
|
40
|
+
rating = Column(Float)
|
41
|
+
director = Column(String(100))
|
42
|
+
embedding = create_vector_column(64, "f32")
|
43
|
+
created_at = Column(DateTime, default=datetime.now)
|
44
|
+
|
45
|
+
|
46
|
+
class TestPineconeFilter:
|
47
|
+
"""Test Pinecone-compatible filter functionality"""
|
48
|
+
|
49
|
+
@pytest.fixture(scope="class")
|
50
|
+
def client(self):
|
51
|
+
"""Create test client"""
|
52
|
+
return Client(host="127.0.0.1", port=6001, user="dump", password="111", database="test")
|
53
|
+
|
54
|
+
@pytest.fixture(scope="class")
|
55
|
+
def async_client(self):
|
56
|
+
"""Create test async client"""
|
57
|
+
return AsyncClient()
|
58
|
+
|
59
|
+
@pytest.fixture(scope="class")
|
60
|
+
def test_database(self, client):
|
61
|
+
"""Create test database"""
|
62
|
+
db_name = "test_pinecone_filter_db"
|
63
|
+
try:
|
64
|
+
client.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
|
65
|
+
client.execute(f"USE {db_name}")
|
66
|
+
yield db_name
|
67
|
+
finally:
|
68
|
+
try:
|
69
|
+
client.execute(f"DROP DATABASE IF EXISTS {db_name}")
|
70
|
+
except Exception as e:
|
71
|
+
print(f"Cleanup failed: {e}")
|
72
|
+
|
73
|
+
@pytest.fixture(scope="class")
|
74
|
+
def test_data_setup(self, client, test_database):
|
75
|
+
"""Set up test data"""
|
76
|
+
# Create tables
|
77
|
+
client.create_all(Base)
|
78
|
+
|
79
|
+
# Enable vector index
|
80
|
+
client.vector_ops.enable_ivf()
|
81
|
+
|
82
|
+
# Create vector index
|
83
|
+
client.vector_ops.create_ivf(
|
84
|
+
"test_movies",
|
85
|
+
name="movies_ivf_index",
|
86
|
+
column="embedding",
|
87
|
+
op_type="vector_l2_ops",
|
88
|
+
)
|
89
|
+
|
90
|
+
# Insert test data
|
91
|
+
test_movies = [
|
92
|
+
{
|
93
|
+
"id": 1,
|
94
|
+
"title": "The Matrix",
|
95
|
+
"genre": "action",
|
96
|
+
"year": 1999,
|
97
|
+
"rating": 8.7,
|
98
|
+
"director": "Lana Wachowski",
|
99
|
+
"embedding": [0.1] * 64,
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"id": 2,
|
103
|
+
"title": "Inception",
|
104
|
+
"genre": "sci-fi",
|
105
|
+
"year": 2010,
|
106
|
+
"rating": 8.8,
|
107
|
+
"director": "Christopher Nolan",
|
108
|
+
"embedding": [0.2] * 64,
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"id": 3,
|
112
|
+
"title": "The Dark Knight",
|
113
|
+
"genre": "action",
|
114
|
+
"year": 2008,
|
115
|
+
"rating": 9.0,
|
116
|
+
"director": "Christopher Nolan",
|
117
|
+
"embedding": [0.3] * 64,
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"id": 4,
|
121
|
+
"title": "Interstellar",
|
122
|
+
"genre": "sci-fi",
|
123
|
+
"year": 2014,
|
124
|
+
"rating": 8.6,
|
125
|
+
"director": "Christopher Nolan",
|
126
|
+
"embedding": [0.4] * 64,
|
127
|
+
},
|
128
|
+
{
|
129
|
+
"id": 5,
|
130
|
+
"title": "Pulp Fiction",
|
131
|
+
"genre": "crime",
|
132
|
+
"year": 1994,
|
133
|
+
"rating": 8.9,
|
134
|
+
"director": "Quentin Tarantino",
|
135
|
+
"embedding": [0.5] * 64,
|
136
|
+
},
|
137
|
+
]
|
138
|
+
|
139
|
+
client.vector_ops.batch_insert("test_movies", test_movies)
|
140
|
+
|
141
|
+
yield test_movies
|
142
|
+
|
143
|
+
# Cleanup
|
144
|
+
try:
|
145
|
+
client.drop_all(Base)
|
146
|
+
except Exception as e:
|
147
|
+
print(f"Cleanup failed: {e}")
|
148
|
+
|
149
|
+
def test_basic_filter_equality(self, client, test_data_setup):
|
150
|
+
"""Test basic equality filter"""
|
151
|
+
# Create Pinecone-compatible index
|
152
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
153
|
+
|
154
|
+
# Test filter by genre
|
155
|
+
query_vector = [0.15] * 64
|
156
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": "action"})
|
157
|
+
|
158
|
+
assert len(results.matches) == 2 # The Matrix and The Dark Knight
|
159
|
+
for match in results.matches:
|
160
|
+
assert match.metadata["genre"] == "action"
|
161
|
+
|
162
|
+
def test_filter_with_operators(self, client, test_data_setup):
|
163
|
+
"""Test filter with various operators"""
|
164
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
165
|
+
query_vector = [0.15] * 64
|
166
|
+
|
167
|
+
# Test $gt operator
|
168
|
+
results = index.query(vector=query_vector, top_k=10, filter={"year": {"$gt": 2000}})
|
169
|
+
|
170
|
+
assert len(results.matches) >= 3 # Inception, The Dark Knight, Interstellar
|
171
|
+
for match in results.matches:
|
172
|
+
assert match.metadata["year"] > 2000
|
173
|
+
|
174
|
+
# Test $gte operator
|
175
|
+
results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$gte": 8.8}})
|
176
|
+
|
177
|
+
assert len(results.matches) >= 2 # Inception and The Dark Knight
|
178
|
+
for match in results.matches:
|
179
|
+
assert match.metadata["rating"] >= 8.8
|
180
|
+
|
181
|
+
def test_filter_with_in_operator(self, client, test_data_setup):
|
182
|
+
"""Test filter with $in operator"""
|
183
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
184
|
+
query_vector = [0.15] * 64
|
185
|
+
|
186
|
+
# Test $in operator
|
187
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$in": ["action", "sci-fi"]}})
|
188
|
+
|
189
|
+
assert len(results.matches) == 4 # The Matrix, Inception, The Dark Knight, Interstellar
|
190
|
+
for match in results.matches:
|
191
|
+
assert match.metadata["genre"] in ["action", "sci-fi"]
|
192
|
+
|
193
|
+
def test_filter_with_and_operator(self, client, test_data_setup):
|
194
|
+
"""Test filter with $and operator"""
|
195
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
196
|
+
query_vector = [0.15] * 64
|
197
|
+
|
198
|
+
# Test $and operator
|
199
|
+
results = index.query(
|
200
|
+
vector=query_vector,
|
201
|
+
top_k=10,
|
202
|
+
filter={"$and": [{"genre": "sci-fi"}, {"year": {"$gte": 2010}}]},
|
203
|
+
)
|
204
|
+
|
205
|
+
assert len(results.matches) == 2 # Inception and Interstellar
|
206
|
+
for match in results.matches:
|
207
|
+
assert match.metadata["genre"] == "sci-fi"
|
208
|
+
assert match.metadata["year"] >= 2010
|
209
|
+
|
210
|
+
def test_filter_with_or_operator(self, client, test_data_setup):
|
211
|
+
"""Test filter with $or operator"""
|
212
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
213
|
+
query_vector = [0.15] * 64
|
214
|
+
|
215
|
+
# Test $or operator
|
216
|
+
results = index.query(
|
217
|
+
vector=query_vector,
|
218
|
+
top_k=10,
|
219
|
+
filter={"$or": [{"director": "Christopher Nolan"}, {"rating": {"$gte": 8.9}}]},
|
220
|
+
)
|
221
|
+
|
222
|
+
assert len(results.matches) >= 3 # Inception, The Dark Knight, Interstellar, Pulp Fiction
|
223
|
+
for match in results.matches:
|
224
|
+
assert match.metadata["director"] == "Christopher Nolan" or match.metadata["rating"] >= 8.9
|
225
|
+
|
226
|
+
def test_filter_with_multiple_conditions(self, client, test_data_setup):
|
227
|
+
"""Test filter with multiple conditions"""
|
228
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
229
|
+
query_vector = [0.15] * 64
|
230
|
+
|
231
|
+
# Test multiple conditions
|
232
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": "action", "year": {"$gte": 2000}})
|
233
|
+
|
234
|
+
assert len(results.matches) == 1 # Only The Dark Knight
|
235
|
+
for match in results.matches:
|
236
|
+
assert match.metadata["genre"] == "action"
|
237
|
+
assert match.metadata["year"] >= 2000
|
238
|
+
|
239
|
+
def test_filter_with_nin_operator(self, client, test_data_setup):
|
240
|
+
"""Test filter with $nin (not in) operator"""
|
241
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
242
|
+
query_vector = [0.15] * 64
|
243
|
+
|
244
|
+
# Test $nin operator
|
245
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$nin": ["action", "sci-fi"]}})
|
246
|
+
|
247
|
+
assert len(results.matches) == 1 # Only Pulp Fiction
|
248
|
+
for match in results.matches:
|
249
|
+
assert match.metadata["genre"] not in ["action", "sci-fi"]
|
250
|
+
|
251
|
+
def test_filter_with_range_operators(self, client, test_data_setup):
|
252
|
+
"""Test filter with range operators"""
|
253
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
254
|
+
query_vector = [0.15] * 64
|
255
|
+
|
256
|
+
# Test $lt operator
|
257
|
+
results = index.query(vector=query_vector, top_k=10, filter={"year": {"$lt": 2000}})
|
258
|
+
|
259
|
+
assert len(results.matches) == 2 # The Matrix and Pulp Fiction
|
260
|
+
for match in results.matches:
|
261
|
+
assert match.metadata["year"] < 2000
|
262
|
+
|
263
|
+
# Test $lte operator
|
264
|
+
results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$lte": 8.7}})
|
265
|
+
|
266
|
+
assert len(results.matches) >= 2 # The Matrix and Interstellar
|
267
|
+
for match in results.matches:
|
268
|
+
assert match.metadata["rating"] <= 8.7
|
269
|
+
|
270
|
+
def test_filter_with_ne_operator(self, client, test_data_setup):
|
271
|
+
"""Test filter with $ne (not equal) operator"""
|
272
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
273
|
+
query_vector = [0.15] * 64
|
274
|
+
|
275
|
+
# Test $ne operator
|
276
|
+
results = index.query(vector=query_vector, top_k=10, filter={"director": {"$ne": "Christopher Nolan"}})
|
277
|
+
|
278
|
+
assert len(results.matches) == 2 # The Matrix and Pulp Fiction
|
279
|
+
for match in results.matches:
|
280
|
+
assert match.metadata["director"] != "Christopher Nolan"
|
281
|
+
|
282
|
+
@pytest.mark.asyncio
|
283
|
+
async def test_async_filter_functionality(self, async_client, test_data_setup):
|
284
|
+
"""Test async filter functionality"""
|
285
|
+
# Connect async client
|
286
|
+
await async_client.connect(
|
287
|
+
host="127.0.0.1",
|
288
|
+
port=6001,
|
289
|
+
user="dump",
|
290
|
+
password="111",
|
291
|
+
database="test_pinecone_filter_db",
|
292
|
+
)
|
293
|
+
|
294
|
+
try:
|
295
|
+
# Create async Pinecone-compatible index
|
296
|
+
index = async_client.get_pinecone_index("test_movies", "embedding")
|
297
|
+
|
298
|
+
# Test basic filter
|
299
|
+
query_vector = [0.15] * 64
|
300
|
+
results = await index.query_async(vector=query_vector, top_k=10, filter={"genre": "action"})
|
301
|
+
|
302
|
+
assert len(results.matches) == 2 # The Matrix and The Dark Knight
|
303
|
+
for match in results.matches:
|
304
|
+
assert match.metadata["genre"] == "action"
|
305
|
+
finally:
|
306
|
+
# Properly disconnect async client to avoid event loop warnings
|
307
|
+
await async_client.disconnect()
|
308
|
+
|
309
|
+
def test_filter_with_complex_nested_conditions(self, client, test_data_setup):
|
310
|
+
"""Test filter with complex nested conditions"""
|
311
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
312
|
+
query_vector = [0.15] * 64
|
313
|
+
|
314
|
+
# Test complex nested conditions
|
315
|
+
results = index.query(
|
316
|
+
vector=query_vector,
|
317
|
+
top_k=10,
|
318
|
+
filter={
|
319
|
+
"$and": [
|
320
|
+
{"genre": {"$in": ["action", "sci-fi"]}},
|
321
|
+
{"$or": [{"year": {"$gte": 2010}}, {"rating": {"$gte": 8.9}}]},
|
322
|
+
]
|
323
|
+
},
|
324
|
+
)
|
325
|
+
|
326
|
+
# Should match: Inception (sci-fi, 2010), The Dark Knight (action, rating 9.0), Interstellar (sci-fi, 2014)
|
327
|
+
assert len(results.matches) >= 3
|
328
|
+
for match in results.matches:
|
329
|
+
assert match.metadata["genre"] in ["action", "sci-fi"]
|
330
|
+
assert match.metadata["year"] >= 2010 or match.metadata["rating"] >= 8.9
|
331
|
+
|
332
|
+
def test_filter_with_no_results(self, client, test_data_setup):
|
333
|
+
"""Test filter that returns no results"""
|
334
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
335
|
+
query_vector = [0.15] * 64
|
336
|
+
|
337
|
+
# Test filter that should return no results
|
338
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": "horror"})
|
339
|
+
|
340
|
+
assert len(results.matches) == 0
|
341
|
+
|
342
|
+
def test_filter_without_filter_parameter(self, client, test_data_setup):
|
343
|
+
"""Test query without filter parameter (should return all results)"""
|
344
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
345
|
+
query_vector = [0.15] * 64
|
346
|
+
|
347
|
+
# Test without filter
|
348
|
+
results = index.query(vector=query_vector, top_k=10)
|
349
|
+
|
350
|
+
assert len(results.matches) == 5 # All movies
|
351
|
+
assert results.usage["read_units"] == 5
|
352
|
+
|
353
|
+
def test_query_with_include_metadata_false(self, client, test_data_setup):
|
354
|
+
"""Test query with include_metadata=False"""
|
355
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
356
|
+
query_vector = [0.15] * 64
|
357
|
+
|
358
|
+
results = index.query(vector=query_vector, top_k=3, include_metadata=False)
|
359
|
+
|
360
|
+
assert len(results.matches) == 3
|
361
|
+
for match in results.matches:
|
362
|
+
assert match.metadata == {} # No metadata should be included
|
363
|
+
assert match.id is not None
|
364
|
+
assert match.score is not None
|
365
|
+
|
366
|
+
def test_query_with_include_values_true(self, client, test_data_setup):
|
367
|
+
"""Test query with include_values=True"""
|
368
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
369
|
+
query_vector = [0.15] * 64
|
370
|
+
|
371
|
+
results = index.query(vector=query_vector, top_k=2, include_values=True)
|
372
|
+
|
373
|
+
assert len(results.matches) == 2
|
374
|
+
for match in results.matches:
|
375
|
+
assert match.values is not None
|
376
|
+
# Values might be returned as string representation of vector
|
377
|
+
if isinstance(match.values, str):
|
378
|
+
# Parse string representation like "[0.1,0.2,...]"
|
379
|
+
import ast
|
380
|
+
|
381
|
+
values_list = ast.literal_eval(match.values)
|
382
|
+
assert isinstance(values_list, list)
|
383
|
+
assert len(values_list) == 64
|
384
|
+
else:
|
385
|
+
assert isinstance(match.values, list)
|
386
|
+
assert len(match.values) == 64
|
387
|
+
|
388
|
+
def test_query_with_include_metadata_and_values_false(self, client, test_data_setup):
|
389
|
+
"""Test query with both include_metadata=False and include_values=False"""
|
390
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
391
|
+
query_vector = [0.15] * 64
|
392
|
+
|
393
|
+
results = index.query(vector=query_vector, top_k=2, include_metadata=False, include_values=False)
|
394
|
+
|
395
|
+
assert len(results.matches) == 2
|
396
|
+
for match in results.matches:
|
397
|
+
assert match.metadata == {}
|
398
|
+
assert match.values is None
|
399
|
+
assert match.id is not None
|
400
|
+
assert match.score is not None
|
401
|
+
|
402
|
+
def test_query_with_top_k_one(self, client, test_data_setup):
|
403
|
+
"""Test query with top_k=1"""
|
404
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
405
|
+
query_vector = [0.15] * 64
|
406
|
+
|
407
|
+
results = index.query(vector=query_vector, top_k=1)
|
408
|
+
|
409
|
+
assert len(results.matches) == 1
|
410
|
+
assert results.usage["read_units"] == 1
|
411
|
+
|
412
|
+
def test_query_with_namespace_parameter(self, client, test_data_setup):
|
413
|
+
"""Test query with namespace parameter (should be ignored but not cause errors)"""
|
414
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
415
|
+
query_vector = [0.15] * 64
|
416
|
+
|
417
|
+
results = index.query(
|
418
|
+
vector=query_vector,
|
419
|
+
top_k=3,
|
420
|
+
namespace="test_namespace", # Should be ignored in MatrixOne
|
421
|
+
)
|
422
|
+
|
423
|
+
assert len(results.matches) == 3
|
424
|
+
assert results.namespace == "test_namespace"
|
425
|
+
|
426
|
+
def test_filter_with_string_numbers(self, client, test_data_setup):
|
427
|
+
"""Test filter with string numbers"""
|
428
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
429
|
+
query_vector = [0.15] * 64
|
430
|
+
|
431
|
+
# Test with string year
|
432
|
+
results = index.query(vector=query_vector, top_k=10, filter={"year": "2010"})
|
433
|
+
|
434
|
+
assert len(results.matches) == 1 # Only Inception
|
435
|
+
assert results.matches[0].metadata["year"] == 2010
|
436
|
+
|
437
|
+
def test_filter_with_float_comparison(self, client, test_data_setup):
|
438
|
+
"""Test filter with float comparisons"""
|
439
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
440
|
+
query_vector = [0.15] * 64
|
441
|
+
|
442
|
+
# Test with float rating
|
443
|
+
results = index.query(vector=query_vector, top_k=10, filter={"rating": {"$gt": 8.7}})
|
444
|
+
|
445
|
+
assert len(results.matches) >= 2 # The Matrix (8.7), Inception (8.8), The Dark Knight (9.0)
|
446
|
+
for match in results.matches:
|
447
|
+
assert match.metadata["rating"] > 8.7
|
448
|
+
|
449
|
+
def test_filter_with_boolean_like_values(self, client, test_data_setup):
|
450
|
+
"""Test filter with boolean-like values"""
|
451
|
+
# First, let's add some test data with boolean-like values
|
452
|
+
client.execute(
|
453
|
+
"""
|
454
|
+
INSERT INTO test_movies (id, title, genre, year, rating, director, embedding) VALUES
|
455
|
+
(6, 'Test Movie 1', 'action', 2020, 7.5, 'Test Director', '[0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6]')
|
456
|
+
"""
|
457
|
+
)
|
458
|
+
|
459
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
460
|
+
query_vector = [0.15] * 64
|
461
|
+
|
462
|
+
# Test with string comparison
|
463
|
+
results = index.query(vector=query_vector, top_k=10, filter={"director": "Test Director"})
|
464
|
+
|
465
|
+
assert len(results.matches) == 1
|
466
|
+
assert results.matches[0].metadata["director"] == "Test Director"
|
467
|
+
|
468
|
+
def test_filter_with_special_characters(self, client, test_data_setup):
|
469
|
+
"""Test filter with special characters in values"""
|
470
|
+
# Add test data with special characters
|
471
|
+
client.execute(
|
472
|
+
"""
|
473
|
+
INSERT INTO test_movies (id, title, genre, year, rating, director, embedding) VALUES
|
474
|
+
(7, 'Movie with "quotes"', 'drama', 2021, 8.0, 'Director with apostrophe''s name', '[0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7]')
|
475
|
+
"""
|
476
|
+
)
|
477
|
+
|
478
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
479
|
+
query_vector = [0.15] * 64
|
480
|
+
|
481
|
+
# Test with special characters in filter
|
482
|
+
results = index.query(vector=query_vector, top_k=10, filter={"director": "Director with apostrophe's name"})
|
483
|
+
|
484
|
+
assert len(results.matches) == 1
|
485
|
+
assert results.matches[0].metadata["director"] == "Director with apostrophe's name"
|
486
|
+
|
487
|
+
def test_filter_with_large_in_list(self, client, test_data_setup):
|
488
|
+
"""Test filter with large $in list"""
|
489
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
490
|
+
query_vector = [0.15] * 64
|
491
|
+
|
492
|
+
# Test with large $in list
|
493
|
+
large_list = [str(i) for i in range(1000)] # Large list
|
494
|
+
large_list.extend(["action", "sci-fi", "crime"]) # Include actual values
|
495
|
+
|
496
|
+
results = index.query(vector=query_vector, top_k=10, filter={"genre": {"$in": large_list}})
|
497
|
+
|
498
|
+
# Should match all movies (including any added in previous tests)
|
499
|
+
assert len(results.matches) >= 5 # At least the original 5 movies
|
500
|
+
|
501
|
+
def test_filter_with_empty_in_list(self, client, test_data_setup):
|
502
|
+
"""Test filter with empty $in list"""
|
503
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
504
|
+
query_vector = [0.15] * 64
|
505
|
+
|
506
|
+
results = index.query(
|
507
|
+
vector=query_vector,
|
508
|
+
top_k=10,
|
509
|
+
filter={"genre": {"$in": []}}, # Empty list should return no results
|
510
|
+
)
|
511
|
+
|
512
|
+
assert len(results.matches) == 0
|
513
|
+
|
514
|
+
def test_filter_with_empty_nin_list(self, client, test_data_setup):
|
515
|
+
"""Test filter with empty $nin list"""
|
516
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
517
|
+
query_vector = [0.15] * 64
|
518
|
+
|
519
|
+
results = index.query(
|
520
|
+
vector=query_vector,
|
521
|
+
top_k=10,
|
522
|
+
filter={"genre": {"$nin": []}}, # Empty list should return all results
|
523
|
+
)
|
524
|
+
|
525
|
+
assert len(results.matches) >= 5 # All movies should match
|
526
|
+
|
527
|
+
def test_filter_with_mixed_data_types_in_list(self, client, test_data_setup):
|
528
|
+
"""Test filter with mixed data types in $in list"""
|
529
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
530
|
+
query_vector = [0.15] * 64
|
531
|
+
|
532
|
+
# Test with mixed types (should work with string conversion)
|
533
|
+
results = index.query(
|
534
|
+
vector=query_vector,
|
535
|
+
top_k=10,
|
536
|
+
filter={"year": {"$in": [1999, "2010", 2008]}}, # Mixed int and string
|
537
|
+
)
|
538
|
+
|
539
|
+
assert len(results.matches) == 3 # The Matrix, Inception, The Dark Knight
|
540
|
+
|
541
|
+
def test_filter_with_deeply_nested_conditions(self, client, test_data_setup):
|
542
|
+
"""Test filter with deeply nested $and and $or conditions"""
|
543
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
544
|
+
query_vector = [0.15] * 64
|
545
|
+
|
546
|
+
# Test deeply nested conditions
|
547
|
+
results = index.query(
|
548
|
+
vector=query_vector,
|
549
|
+
top_k=10,
|
550
|
+
filter={
|
551
|
+
"$and": [
|
552
|
+
{"$or": [{"genre": "action"}, {"genre": "sci-fi"}]},
|
553
|
+
{
|
554
|
+
"$and": [
|
555
|
+
{"year": {"$gte": 2000}},
|
556
|
+
{"$or": [{"rating": {"$gte": 8.8}}, {"director": "Christopher Nolan"}]},
|
557
|
+
]
|
558
|
+
},
|
559
|
+
]
|
560
|
+
},
|
561
|
+
)
|
562
|
+
|
563
|
+
# Should match: Inception (sci-fi, 2010, 8.8), The Dark Knight (action, 2008, 9.0, Christopher Nolan), Interstellar (sci-fi, 2014, Christopher Nolan)
|
564
|
+
assert len(results.matches) >= 3
|
565
|
+
for match in results.matches:
|
566
|
+
assert match.metadata["genre"] in ["action", "sci-fi"]
|
567
|
+
assert match.metadata["year"] >= 2000
|
568
|
+
assert match.metadata["rating"] >= 8.8 or match.metadata["director"] == "Christopher Nolan"
|
569
|
+
|
570
|
+
@pytest.mark.asyncio
|
571
|
+
async def test_async_query_with_all_parameters(self, async_client, test_data_setup):
|
572
|
+
"""Test async query with all parameters"""
|
573
|
+
await async_client.connect(
|
574
|
+
host="127.0.0.1",
|
575
|
+
port=6001,
|
576
|
+
user="dump",
|
577
|
+
password="111",
|
578
|
+
database="test_pinecone_filter_db",
|
579
|
+
)
|
580
|
+
|
581
|
+
try:
|
582
|
+
index = async_client.get_pinecone_index("test_movies", "embedding")
|
583
|
+
query_vector = [0.15] * 64
|
584
|
+
|
585
|
+
results = await index.query_async(
|
586
|
+
vector=query_vector,
|
587
|
+
top_k=2,
|
588
|
+
include_metadata=True,
|
589
|
+
include_values=True,
|
590
|
+
filter={"genre": "action"},
|
591
|
+
namespace="async_test",
|
592
|
+
)
|
593
|
+
|
594
|
+
assert len(results.matches) >= 1 # At least one action movie
|
595
|
+
assert results.namespace == "async_test"
|
596
|
+
for match in results.matches:
|
597
|
+
assert match.metadata["genre"] == "action"
|
598
|
+
if match.values is not None:
|
599
|
+
# Values might be returned as string representation of vector
|
600
|
+
if isinstance(match.values, str):
|
601
|
+
import ast
|
602
|
+
|
603
|
+
values_list = ast.literal_eval(match.values)
|
604
|
+
assert len(values_list) == 64
|
605
|
+
else:
|
606
|
+
assert len(match.values) == 64
|
607
|
+
finally:
|
608
|
+
await async_client.disconnect()
|
609
|
+
|
610
|
+
@pytest.mark.asyncio
|
611
|
+
async def test_async_query_with_complex_filter(self, async_client, test_data_setup):
|
612
|
+
"""Test async query with complex filter"""
|
613
|
+
await async_client.connect(
|
614
|
+
host="127.0.0.1",
|
615
|
+
port=6001,
|
616
|
+
user="dump",
|
617
|
+
password="111",
|
618
|
+
database="test_pinecone_filter_db",
|
619
|
+
)
|
620
|
+
|
621
|
+
try:
|
622
|
+
index = async_client.get_pinecone_index("test_movies", "embedding")
|
623
|
+
query_vector = [0.15] * 64
|
624
|
+
|
625
|
+
results = await index.query_async(
|
626
|
+
vector=query_vector,
|
627
|
+
top_k=10,
|
628
|
+
filter={
|
629
|
+
"$and": [
|
630
|
+
{"year": {"$gte": 2008}},
|
631
|
+
{"$or": [{"rating": {"$gte": 8.8}}, {"director": "Christopher Nolan"}]},
|
632
|
+
]
|
633
|
+
},
|
634
|
+
)
|
635
|
+
|
636
|
+
# Should match: Inception (2010, 8.8), The Dark Knight (2008, 9.0, Christopher Nolan), Interstellar (2014, Christopher Nolan)
|
637
|
+
assert len(results.matches) >= 3
|
638
|
+
for match in results.matches:
|
639
|
+
assert match.metadata["year"] >= 2008
|
640
|
+
assert match.metadata["rating"] >= 8.8 or match.metadata["director"] == "Christopher Nolan"
|
641
|
+
finally:
|
642
|
+
await async_client.disconnect()
|
643
|
+
|
644
|
+
def test_edge_case_empty_table(self, client, test_data_setup):
|
645
|
+
"""Test query on empty table"""
|
646
|
+
# Create empty table
|
647
|
+
client.execute(
|
648
|
+
"""
|
649
|
+
CREATE TABLE empty_movies (
|
650
|
+
id INT PRIMARY KEY,
|
651
|
+
title VARCHAR(200),
|
652
|
+
embedding VECF32(64)
|
653
|
+
)
|
654
|
+
"""
|
655
|
+
)
|
656
|
+
|
657
|
+
index = client.get_pinecone_index("empty_movies", "embedding")
|
658
|
+
query_vector = [0.15] * 64
|
659
|
+
|
660
|
+
results = index.query(vector=query_vector, top_k=10)
|
661
|
+
|
662
|
+
assert len(results.matches) == 0
|
663
|
+
assert results.usage["read_units"] == 0
|
664
|
+
|
665
|
+
def test_edge_case_single_record(self, client, test_data_setup):
|
666
|
+
"""Test query on table with single record"""
|
667
|
+
# Create table with single record
|
668
|
+
client.execute(
|
669
|
+
"""
|
670
|
+
CREATE TABLE single_movie (
|
671
|
+
id INT PRIMARY KEY,
|
672
|
+
title VARCHAR(200),
|
673
|
+
embedding VECF32(64)
|
674
|
+
)
|
675
|
+
"""
|
676
|
+
)
|
677
|
+
|
678
|
+
client.execute(
|
679
|
+
"""
|
680
|
+
INSERT INTO single_movie (id, title, embedding) VALUES
|
681
|
+
(1, 'Single Movie', '[0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5]')
|
682
|
+
"""
|
683
|
+
)
|
684
|
+
|
685
|
+
index = client.get_pinecone_index("single_movie", "embedding")
|
686
|
+
query_vector = [0.5] * 64 # Exact match
|
687
|
+
|
688
|
+
results = index.query(vector=query_vector, top_k=10)
|
689
|
+
|
690
|
+
assert len(results.matches) == 1
|
691
|
+
assert results.matches[0].metadata["title"] == "Single Movie"
|
692
|
+
assert results.matches[0].score == 0.0 # Should be exact match
|
693
|
+
|
694
|
+
def test_performance_large_top_k(self, client, test_data_setup):
|
695
|
+
"""Test performance with large top_k value"""
|
696
|
+
index = client.get_pinecone_index("test_movies", "embedding")
|
697
|
+
query_vector = [0.15] * 64
|
698
|
+
|
699
|
+
# Test with very large top_k
|
700
|
+
results = index.query(vector=query_vector, top_k=10000) # Very large number
|
701
|
+
|
702
|
+
# Should return all available records (at least 5, possibly more from previous tests)
|
703
|
+
assert len(results.matches) >= 5
|
704
|
+
assert results.usage["read_units"] == len(results.matches)
|
705
|
+
|
706
|
+
|
707
|
+
if __name__ == "__main__":
|
708
|
+
pytest.main([__file__])
|