matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,842 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Online tests for fulltext search functionality.
|
17
|
+
Tests against real MatrixOne database with actual data.
|
18
|
+
"""
|
19
|
+
|
20
|
+
import pytest
|
21
|
+
import os
|
22
|
+
import sys
|
23
|
+
import warnings
|
24
|
+
from sqlalchemy import Column, Integer, String, Text, create_engine
|
25
|
+
from sqlalchemy.orm import sessionmaker
|
26
|
+
|
27
|
+
# Add the matrixone package to the path
|
28
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
|
29
|
+
|
30
|
+
from matrixone import Client, AsyncClient
|
31
|
+
from matrixone.orm import declarative_base
|
32
|
+
from matrixone.config import get_connection_params
|
33
|
+
from matrixone.sqlalchemy_ext.fulltext_search import (
|
34
|
+
boolean_match,
|
35
|
+
natural_match,
|
36
|
+
group,
|
37
|
+
FulltextSearchMode,
|
38
|
+
)
|
39
|
+
from matrixone.sqlalchemy_ext.adapters import logical_and, logical_or, logical_not
|
40
|
+
|
41
|
+
Base = declarative_base()
|
42
|
+
|
43
|
+
|
44
|
+
class Article(Base):
|
45
|
+
"""Test article model for fulltext search."""
|
46
|
+
|
47
|
+
__tablename__ = 'test_articles'
|
48
|
+
|
49
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
50
|
+
title = Column(String(255), nullable=False)
|
51
|
+
content = Column(Text, nullable=False)
|
52
|
+
tags = Column(String(500), nullable=True)
|
53
|
+
category = Column(String(100), nullable=True)
|
54
|
+
|
55
|
+
|
56
|
+
class TestFulltextSearchOnline:
|
57
|
+
"""Online fulltext search tests."""
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def setup_class(cls):
|
61
|
+
"""Set up test database and data."""
|
62
|
+
# Get connection parameters using standard config
|
63
|
+
host, port, user, password, database = get_connection_params()
|
64
|
+
|
65
|
+
cls.client = Client()
|
66
|
+
cls.client.connect(host=host, port=port, user=user, password=password, database=database)
|
67
|
+
|
68
|
+
# Create test database
|
69
|
+
cls.test_db = "test_fulltext_search"
|
70
|
+
try:
|
71
|
+
cls.client.execute(f"CREATE DATABASE IF NOT EXISTS {cls.test_db}")
|
72
|
+
cls.client.execute(f"USE {cls.test_db}")
|
73
|
+
except Exception as e:
|
74
|
+
pytest.skip(f"Cannot create test database: {e}")
|
75
|
+
|
76
|
+
# Create table
|
77
|
+
try:
|
78
|
+
cls.client.execute("DROP TABLE IF EXISTS test_articles")
|
79
|
+
cls.client.execute(
|
80
|
+
"""
|
81
|
+
CREATE TABLE IF NOT EXISTS test_articles (
|
82
|
+
id INT AUTO_INCREMENT PRIMARY KEY,
|
83
|
+
title VARCHAR(255) NOT NULL,
|
84
|
+
content TEXT NOT NULL,
|
85
|
+
tags VARCHAR(500),
|
86
|
+
category VARCHAR(100)
|
87
|
+
)
|
88
|
+
"""
|
89
|
+
)
|
90
|
+
except Exception as e:
|
91
|
+
pytest.skip(f"Cannot create test table: {e}")
|
92
|
+
|
93
|
+
# Insert test data
|
94
|
+
test_articles = [
|
95
|
+
{
|
96
|
+
'title': 'Python Programming Tutorial',
|
97
|
+
'content': 'Learn Python programming from basics to advanced concepts. This tutorial covers variables, functions, classes, and more.',
|
98
|
+
'tags': 'python,programming,tutorial,beginner',
|
99
|
+
'category': 'Programming',
|
100
|
+
},
|
101
|
+
{
|
102
|
+
'title': 'Java Development Guide',
|
103
|
+
'content': 'Complete guide to Java development including Spring framework, Hibernate, and best practices for enterprise applications.',
|
104
|
+
'tags': 'java,development,spring,enterprise',
|
105
|
+
'category': 'Programming',
|
106
|
+
},
|
107
|
+
{
|
108
|
+
'title': 'Machine Learning with Python',
|
109
|
+
'content': 'Introduction to machine learning using Python. Covers neural networks, deep learning, and AI algorithms.',
|
110
|
+
'tags': 'python,machine-learning,AI,neural-networks',
|
111
|
+
'category': 'AI',
|
112
|
+
},
|
113
|
+
{
|
114
|
+
'title': 'JavaScript Frontend Development',
|
115
|
+
'content': 'Modern JavaScript development for frontend applications. Learn React, Vue, and Angular frameworks.',
|
116
|
+
'tags': 'javascript,frontend,react,vue,angular',
|
117
|
+
'category': 'Web Development',
|
118
|
+
},
|
119
|
+
{
|
120
|
+
'title': 'Database Design Principles',
|
121
|
+
'content': 'Learn database design principles, normalization, indexing, and query optimization techniques.',
|
122
|
+
'tags': 'database,design,sql,optimization',
|
123
|
+
'category': 'Database',
|
124
|
+
},
|
125
|
+
{
|
126
|
+
'title': 'Legacy Python 2.7 Migration',
|
127
|
+
'content': 'Guide for migrating legacy Python 2.7 applications to Python 3. Deprecated features and compatibility issues.',
|
128
|
+
'tags': 'python,legacy,migration,deprecated',
|
129
|
+
'category': 'Programming',
|
130
|
+
},
|
131
|
+
{
|
132
|
+
'title': 'Advanced Neural Networks',
|
133
|
+
'content': 'Deep dive into advanced neural network architectures including CNNs, RNNs, and transformer models.',
|
134
|
+
'tags': 'neural-networks,deep-learning,CNN,RNN,transformer',
|
135
|
+
'category': 'AI',
|
136
|
+
},
|
137
|
+
{
|
138
|
+
'title': 'Web Security Best Practices',
|
139
|
+
'content': 'Security best practices for web applications. Learn about authentication, authorization, and common vulnerabilities.',
|
140
|
+
'tags': 'security,web,authentication,vulnerabilities',
|
141
|
+
'category': 'Security',
|
142
|
+
},
|
143
|
+
]
|
144
|
+
|
145
|
+
try:
|
146
|
+
for article in test_articles:
|
147
|
+
cls.client.execute(
|
148
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
149
|
+
(article['title'], article['content'], article['tags'], article['category']),
|
150
|
+
)
|
151
|
+
except Exception as e:
|
152
|
+
pytest.skip(f"Cannot insert test data: {e}")
|
153
|
+
|
154
|
+
# Create fulltext index
|
155
|
+
try:
|
156
|
+
# Set algorithm to BM25
|
157
|
+
cls.client.execute('SET ft_relevancy_algorithm = "BM25"')
|
158
|
+
|
159
|
+
# Create fulltext index
|
160
|
+
cls.client.execute("CREATE FULLTEXT INDEX ft_articles ON test_articles(title, content, tags)")
|
161
|
+
except Exception as e:
|
162
|
+
pytest.skip(f"Cannot create fulltext index: {e}")
|
163
|
+
|
164
|
+
@classmethod
|
165
|
+
def teardown_class(cls):
|
166
|
+
"""Clean up test database."""
|
167
|
+
try:
|
168
|
+
cls.client.execute("DROP TABLE IF EXISTS test_articles")
|
169
|
+
cls.client.execute(f"DROP DATABASE IF EXISTS {cls.test_db}")
|
170
|
+
cls.client.disconnect()
|
171
|
+
except:
|
172
|
+
pass
|
173
|
+
|
174
|
+
def test_basic_must_search(self):
|
175
|
+
"""Test basic must search functionality."""
|
176
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("python"))
|
177
|
+
|
178
|
+
results = query.all()
|
179
|
+
assert len(results) >= 2 # Should find Python articles
|
180
|
+
|
181
|
+
# Check that all results contain 'python'
|
182
|
+
for result in results:
|
183
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
184
|
+
assert "python" in content_lower
|
185
|
+
|
186
|
+
def test_must_not_search(self):
|
187
|
+
"""Test must_not search functionality."""
|
188
|
+
query = self.client.query(Article).filter(
|
189
|
+
boolean_match("title", "content", "tags").must("programming").must_not("legacy")
|
190
|
+
)
|
191
|
+
|
192
|
+
results = query.all()
|
193
|
+
assert len(results) >= 1
|
194
|
+
|
195
|
+
# Check that no results contain 'legacy'
|
196
|
+
for result in results:
|
197
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
198
|
+
assert "legacy" not in content_lower
|
199
|
+
assert "programming" in content_lower
|
200
|
+
|
201
|
+
def test_encourage_search(self):
|
202
|
+
"""Test encourage (optional positive weight) search."""
|
203
|
+
# Search for programming content, encourage tutorial
|
204
|
+
query = self.client.query(Article).filter(
|
205
|
+
boolean_match("title", "content", "tags").must("programming").encourage("tutorial")
|
206
|
+
)
|
207
|
+
|
208
|
+
results = query.all()
|
209
|
+
assert len(results) >= 1
|
210
|
+
|
211
|
+
# Results should be ordered by relevance (tutorial articles should rank higher)
|
212
|
+
tutorial_found = False
|
213
|
+
for result in results:
|
214
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
215
|
+
assert "programming" in content_lower
|
216
|
+
if "tutorial" in content_lower:
|
217
|
+
tutorial_found = True
|
218
|
+
|
219
|
+
# At least one result should contain 'tutorial'
|
220
|
+
assert tutorial_found
|
221
|
+
|
222
|
+
def test_discourage_search(self):
|
223
|
+
"""Test discourage (negative weight) search."""
|
224
|
+
# Search for Python content, discourage legacy
|
225
|
+
query = self.client.query(Article).filter(
|
226
|
+
boolean_match("title", "content", "tags").must("python").discourage("legacy")
|
227
|
+
)
|
228
|
+
|
229
|
+
results = query.all()
|
230
|
+
assert len(results) >= 1
|
231
|
+
|
232
|
+
# All results should contain 'python'
|
233
|
+
for result in results:
|
234
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
235
|
+
assert "python" in content_lower
|
236
|
+
|
237
|
+
def test_group_search(self):
|
238
|
+
"""Test group search functionality."""
|
239
|
+
# Must contain either 'python' or 'java'
|
240
|
+
query = self.client.query(Article).filter(
|
241
|
+
boolean_match("title", "content", "tags").must(group().medium("python", "java"))
|
242
|
+
)
|
243
|
+
|
244
|
+
results = query.all()
|
245
|
+
assert len(results) >= 2
|
246
|
+
|
247
|
+
# Each result should contain either 'python' or 'java'
|
248
|
+
for result in results:
|
249
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
250
|
+
assert "python" in content_lower or "java" in content_lower
|
251
|
+
|
252
|
+
def test_complex_boolean_search(self):
|
253
|
+
"""Test complex boolean search with multiple conditions."""
|
254
|
+
query = self.client.query(Article).filter(
|
255
|
+
boolean_match("title", "content", "tags")
|
256
|
+
.must("programming")
|
257
|
+
.encourage(group().medium("python", "java"))
|
258
|
+
.discourage("legacy")
|
259
|
+
.must_not("security")
|
260
|
+
)
|
261
|
+
|
262
|
+
results = query.all()
|
263
|
+
assert len(results) >= 1
|
264
|
+
|
265
|
+
for result in results:
|
266
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
267
|
+
assert "programming" in content_lower
|
268
|
+
assert "security" not in content_lower
|
269
|
+
|
270
|
+
def test_phrase_search(self):
|
271
|
+
"""Test phrase search functionality."""
|
272
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").phrase("machine learning"))
|
273
|
+
|
274
|
+
results = query.all()
|
275
|
+
assert len(results) >= 1
|
276
|
+
|
277
|
+
# Check that results contain the exact phrase
|
278
|
+
for result in results:
|
279
|
+
content_lower = (result.title + " " + result.content).lower()
|
280
|
+
assert "machine learning" in content_lower
|
281
|
+
|
282
|
+
def test_prefix_search(self):
|
283
|
+
"""Test prefix search functionality."""
|
284
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").prefix("neural"))
|
285
|
+
|
286
|
+
results = query.all()
|
287
|
+
assert len(results) >= 1
|
288
|
+
|
289
|
+
# Check that results contain words starting with 'neural'
|
290
|
+
for result in results:
|
291
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
292
|
+
# Should match 'neural', 'neural-networks', etc.
|
293
|
+
assert any(word.startswith("neural") for word in content_lower.replace("-", " ").split())
|
294
|
+
|
295
|
+
def test_element_weight_search(self):
|
296
|
+
"""Test element-level weight operators."""
|
297
|
+
# Test high and low weight within groups
|
298
|
+
query = self.client.query(Article).filter(
|
299
|
+
boolean_match("title", "content", "tags").must(group().high("python").low("tutorial"))
|
300
|
+
)
|
301
|
+
|
302
|
+
results = query.all()
|
303
|
+
assert len(results) >= 1
|
304
|
+
|
305
|
+
# Results should contain both terms with different weights
|
306
|
+
for result in results:
|
307
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
308
|
+
assert "python" in content_lower or "tutorial" in content_lower
|
309
|
+
|
310
|
+
def test_natural_language_search(self):
|
311
|
+
"""Test natural language search mode."""
|
312
|
+
# First try with terms that exist in our test data
|
313
|
+
query = self.client.query(Article).filter(natural_match("title", "content", "tags", query="python programming"))
|
314
|
+
|
315
|
+
results = query.all()
|
316
|
+
# If natural language search returns 0 results, try simpler terms
|
317
|
+
if len(results) == 0:
|
318
|
+
query = self.client.query(Article).filter(natural_match("title", "content", "tags", query="programming"))
|
319
|
+
results = query.all()
|
320
|
+
|
321
|
+
# Natural language search might return fewer results than boolean search
|
322
|
+
# So we'll be more lenient and just check that it doesn't error
|
323
|
+
assert len(results) >= 0 # Should not error
|
324
|
+
|
325
|
+
# If we have results, verify they contain relevant terms
|
326
|
+
if len(results) > 0:
|
327
|
+
programming_keywords = ["programming", "python", "java", "development", "tutorial"]
|
328
|
+
for result in results:
|
329
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
330
|
+
# At least one programming keyword should be present
|
331
|
+
assert any(keyword in content_lower for keyword in programming_keywords)
|
332
|
+
|
333
|
+
def test_combined_with_regular_filters(self):
|
334
|
+
"""Test fulltext search combined with regular SQL filters."""
|
335
|
+
query = (
|
336
|
+
self.client.query(Article)
|
337
|
+
.filter(boolean_match("title", "content", "tags").must("programming"))
|
338
|
+
.filter(Article.category == "Programming")
|
339
|
+
)
|
340
|
+
|
341
|
+
results = query.all()
|
342
|
+
assert len(results) >= 1
|
343
|
+
|
344
|
+
for result in results:
|
345
|
+
assert result.category == "Programming"
|
346
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
347
|
+
assert "programming" in content_lower
|
348
|
+
|
349
|
+
def test_ordering_and_limits(self):
|
350
|
+
"""Test ordering and limits with fulltext search."""
|
351
|
+
query = (
|
352
|
+
self.client.query(Article)
|
353
|
+
.filter(boolean_match("title", "content", "tags").encourage("python"))
|
354
|
+
.order_by(Article.id.desc())
|
355
|
+
.limit(3)
|
356
|
+
)
|
357
|
+
|
358
|
+
results = query.all()
|
359
|
+
assert len(results) <= 3
|
360
|
+
|
361
|
+
# Check ordering (should be descending by ID)
|
362
|
+
if len(results) > 1:
|
363
|
+
for i in range(len(results) - 1):
|
364
|
+
assert results[i].id >= results[i + 1].id
|
365
|
+
|
366
|
+
def test_count_with_fulltext(self):
|
367
|
+
"""Test count queries with fulltext search."""
|
368
|
+
count = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("programming")).count()
|
369
|
+
|
370
|
+
assert count >= 1
|
371
|
+
assert isinstance(count, int)
|
372
|
+
|
373
|
+
def test_matrixone_style_complex_query(self):
|
374
|
+
"""Test MatrixOne-style complex query: +red -(<blue >is)."""
|
375
|
+
# Adapt to our test data: +programming -(>legacy <deprecated)
|
376
|
+
query = self.client.query(Article).filter(
|
377
|
+
boolean_match("title", "content", "tags").must("programming").must_not(group().high("legacy").low("deprecated"))
|
378
|
+
)
|
379
|
+
|
380
|
+
results = query.all()
|
381
|
+
# Should find programming articles without legacy/deprecated content
|
382
|
+
for result in results:
|
383
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
384
|
+
assert "programming" in content_lower
|
385
|
+
|
386
|
+
def test_multiple_must_groups(self):
|
387
|
+
"""Test multiple must groups."""
|
388
|
+
query = self.client.query(Article).filter(
|
389
|
+
boolean_match("title", "content", "tags")
|
390
|
+
.must(group().medium("python", "java"))
|
391
|
+
.must(group().medium("programming", "development"))
|
392
|
+
)
|
393
|
+
|
394
|
+
results = query.all()
|
395
|
+
assert len(results) >= 1
|
396
|
+
|
397
|
+
for result in results:
|
398
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
399
|
+
# Must contain at least one from each group
|
400
|
+
assert "python" in content_lower or "java" in content_lower
|
401
|
+
assert "programming" in content_lower or "development" in content_lower
|
402
|
+
|
403
|
+
def test_empty_results(self):
|
404
|
+
"""Test queries that should return no results."""
|
405
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("nonexistent_term_xyz123"))
|
406
|
+
|
407
|
+
results = query.all()
|
408
|
+
assert len(results) == 0
|
409
|
+
|
410
|
+
def test_case_insensitive_search(self):
|
411
|
+
"""Test case insensitive search."""
|
412
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("PYTHON"))
|
413
|
+
|
414
|
+
results = query.all()
|
415
|
+
assert len(results) >= 1 # Should find python articles regardless of case
|
416
|
+
|
417
|
+
def test_special_characters_in_search(self):
|
418
|
+
"""Test search with special characters."""
|
419
|
+
# Test hyphenated terms
|
420
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").encourage("machine-learning"))
|
421
|
+
|
422
|
+
results = query.all()
|
423
|
+
# Should handle hyphenated terms appropriately
|
424
|
+
assert len(results) >= 0 # May or may not find results, but shouldn't error
|
425
|
+
|
426
|
+
|
427
|
+
class TestAsyncFulltextSearch:
|
428
|
+
"""Test async fulltext search functionality."""
|
429
|
+
|
430
|
+
@classmethod
|
431
|
+
def setup_class(cls):
|
432
|
+
"""Ensure test database exists for async tests."""
|
433
|
+
# Get connection parameters using standard config
|
434
|
+
host, port, user, password, database = get_connection_params()
|
435
|
+
|
436
|
+
# Create sync client to set up database
|
437
|
+
sync_client = Client()
|
438
|
+
sync_client.connect(host=host, port=port, user=user, password=password, database=database)
|
439
|
+
|
440
|
+
# Create test database if not exists
|
441
|
+
sync_client.execute("CREATE DATABASE IF NOT EXISTS test_fulltext_search")
|
442
|
+
sync_client.execute("USE test_fulltext_search")
|
443
|
+
|
444
|
+
# Create table if not exists
|
445
|
+
sync_client.execute("DROP TABLE IF EXISTS test_articles")
|
446
|
+
sync_client.execute(
|
447
|
+
"""
|
448
|
+
CREATE TABLE IF NOT EXISTS test_articles (
|
449
|
+
id INT AUTO_INCREMENT PRIMARY KEY,
|
450
|
+
title VARCHAR(200),
|
451
|
+
content TEXT,
|
452
|
+
tags VARCHAR(500),
|
453
|
+
category VARCHAR(50)
|
454
|
+
)
|
455
|
+
"""
|
456
|
+
)
|
457
|
+
|
458
|
+
# Insert at least one test record for async tests
|
459
|
+
sync_client.execute(
|
460
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
461
|
+
(
|
462
|
+
"Async Python Tutorial",
|
463
|
+
"Learn async programming with Python asyncio",
|
464
|
+
"python,async,tutorial",
|
465
|
+
"Programming",
|
466
|
+
),
|
467
|
+
)
|
468
|
+
|
469
|
+
# Create fulltext index
|
470
|
+
sync_client.execute('SET ft_relevancy_algorithm = "BM25"')
|
471
|
+
try:
|
472
|
+
sync_client.execute("CREATE FULLTEXT INDEX ft_articles ON test_articles(title, content, tags)")
|
473
|
+
except Exception:
|
474
|
+
# Index might already exist, ignore error
|
475
|
+
pass
|
476
|
+
|
477
|
+
sync_client.disconnect()
|
478
|
+
|
479
|
+
@pytest.mark.asyncio
|
480
|
+
async def test_async_basic_search(self):
|
481
|
+
"""Test basic async fulltext search."""
|
482
|
+
# Get connection parameters using standard config
|
483
|
+
host, port, user, password, database = get_connection_params()
|
484
|
+
|
485
|
+
async_client = AsyncClient()
|
486
|
+
await async_client.connect(host=host, port=port, user=user, password=password, database=database)
|
487
|
+
|
488
|
+
# Use the test database
|
489
|
+
await async_client.execute("USE test_fulltext_search")
|
490
|
+
|
491
|
+
query = async_client.query(Article).filter(boolean_match("title", "content", "tags").must("python"))
|
492
|
+
|
493
|
+
results = await query.all()
|
494
|
+
assert len(results) >= 1
|
495
|
+
|
496
|
+
for result in results:
|
497
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
498
|
+
assert "python" in content_lower
|
499
|
+
|
500
|
+
# Properly close async client to avoid warnings
|
501
|
+
try:
|
502
|
+
await async_client.disconnect()
|
503
|
+
except Exception:
|
504
|
+
pass # Ignore disconnect errors
|
505
|
+
|
506
|
+
|
507
|
+
class TestFulltextSearchEdgeCases:
|
508
|
+
"""Test edge cases and error conditions."""
|
509
|
+
|
510
|
+
@classmethod
|
511
|
+
def setup_class(cls):
|
512
|
+
"""Set up client for edge case tests."""
|
513
|
+
# Get connection parameters using standard config
|
514
|
+
host, port, user, password, database = get_connection_params()
|
515
|
+
|
516
|
+
cls.client = Client()
|
517
|
+
cls.client.connect(host=host, port=port, user=user, password=password, database=database)
|
518
|
+
|
519
|
+
# Ensure test database and data exist
|
520
|
+
cls.client.execute("CREATE DATABASE IF NOT EXISTS test_fulltext_search")
|
521
|
+
cls.client.execute("USE test_fulltext_search")
|
522
|
+
|
523
|
+
# Create table if not exists
|
524
|
+
cls.client.execute("DROP TABLE IF EXISTS test_articles")
|
525
|
+
cls.client.execute(
|
526
|
+
"""
|
527
|
+
CREATE TABLE IF NOT EXISTS test_articles (
|
528
|
+
id INT AUTO_INCREMENT PRIMARY KEY,
|
529
|
+
title VARCHAR(200),
|
530
|
+
content TEXT,
|
531
|
+
tags VARCHAR(500),
|
532
|
+
category VARCHAR(50)
|
533
|
+
)
|
534
|
+
"""
|
535
|
+
)
|
536
|
+
|
537
|
+
# Insert test data
|
538
|
+
cls.client.execute(
|
539
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
540
|
+
(
|
541
|
+
"Python Programming Guide",
|
542
|
+
"Complete Python programming tutorial",
|
543
|
+
"python,programming,guide",
|
544
|
+
"Programming",
|
545
|
+
),
|
546
|
+
)
|
547
|
+
cls.client.execute(
|
548
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
549
|
+
(
|
550
|
+
"Java Development",
|
551
|
+
"Java application development guide",
|
552
|
+
"java,development",
|
553
|
+
"Programming",
|
554
|
+
),
|
555
|
+
)
|
556
|
+
|
557
|
+
# Create fulltext index
|
558
|
+
cls.client.execute('SET ft_relevancy_algorithm = "BM25"')
|
559
|
+
try:
|
560
|
+
cls.client.execute("CREATE FULLTEXT INDEX ft_articles ON test_articles(title, content, tags)")
|
561
|
+
except Exception:
|
562
|
+
# Index might already exist, ignore error
|
563
|
+
pass
|
564
|
+
|
565
|
+
@classmethod
|
566
|
+
def teardown_class(cls):
|
567
|
+
"""Clean up client."""
|
568
|
+
try:
|
569
|
+
cls.client.disconnect()
|
570
|
+
except:
|
571
|
+
pass
|
572
|
+
|
573
|
+
def test_single_column_index(self):
|
574
|
+
"""Test search on single column."""
|
575
|
+
# Create a single-column fulltext index for testing
|
576
|
+
try:
|
577
|
+
self.client.execute("DROP INDEX ft_title ON test_articles")
|
578
|
+
except Exception:
|
579
|
+
# Index might not exist, ignore error
|
580
|
+
pass
|
581
|
+
|
582
|
+
try:
|
583
|
+
self.client.execute("CREATE FULLTEXT INDEX ft_title ON test_articles(title)")
|
584
|
+
except Exception:
|
585
|
+
# Index might already exist, ignore error
|
586
|
+
pass
|
587
|
+
|
588
|
+
query = self.client.query(Article).filter(boolean_match("title").must("python"))
|
589
|
+
|
590
|
+
results = query.all()
|
591
|
+
assert len(results) >= 1
|
592
|
+
|
593
|
+
# Verify results contain python in title
|
594
|
+
for result in results:
|
595
|
+
assert "python" in result.title.lower()
|
596
|
+
|
597
|
+
def test_very_long_query(self):
|
598
|
+
"""Test very long fulltext query."""
|
599
|
+
long_terms = ["term" + str(i) for i in range(10)] # Reasonable number of terms
|
600
|
+
|
601
|
+
filter_obj = boolean_match("title", "content", "tags")
|
602
|
+
for term in long_terms:
|
603
|
+
filter_obj = filter_obj.encourage(term)
|
604
|
+
|
605
|
+
query = self.client.query(Article).filter(filter_obj)
|
606
|
+
results = query.all()
|
607
|
+
# Should not error, even if no results
|
608
|
+
assert isinstance(results, list)
|
609
|
+
# Long query with non-existent terms should return empty results
|
610
|
+
assert len(results) == 0
|
611
|
+
|
612
|
+
def test_case_insensitive_search(self):
|
613
|
+
"""Test case insensitive search functionality."""
|
614
|
+
# Test uppercase search
|
615
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("PYTHON"))
|
616
|
+
|
617
|
+
results = query.all()
|
618
|
+
assert len(results) >= 1
|
619
|
+
|
620
|
+
# Verify results contain python (case insensitive)
|
621
|
+
for result in results:
|
622
|
+
content_lower = (result.title + " " + result.content + " " + (result.tags or "")).lower()
|
623
|
+
assert "python" in content_lower
|
624
|
+
|
625
|
+
def test_empty_result_handling(self):
|
626
|
+
"""Test handling of searches that return no results."""
|
627
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").must("nonexistent_term_xyz123"))
|
628
|
+
|
629
|
+
results = query.all()
|
630
|
+
assert len(results) == 0
|
631
|
+
assert isinstance(results, list)
|
632
|
+
|
633
|
+
def test_special_characters_handling(self):
|
634
|
+
"""Test handling of special characters in search terms."""
|
635
|
+
# Insert data with special characters
|
636
|
+
self.client.execute(
|
637
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
638
|
+
("C++ Programming", "Learn C++ programming language", "c++,programming", "Programming"),
|
639
|
+
)
|
640
|
+
|
641
|
+
# Test search with special characters (should handle gracefully)
|
642
|
+
query = self.client.query(Article).filter(boolean_match("title", "content", "tags").encourage("c++"))
|
643
|
+
|
644
|
+
results = query.all()
|
645
|
+
# Should not error, may or may not find results depending on indexing
|
646
|
+
assert isinstance(results, list)
|
647
|
+
|
648
|
+
|
649
|
+
class TestLogicalAdaptersOnline:
|
650
|
+
"""Online tests for generic logical adapters with real database."""
|
651
|
+
|
652
|
+
@classmethod
|
653
|
+
def setup_class(cls):
|
654
|
+
"""Set up client for logical adapter tests."""
|
655
|
+
# Get connection parameters using standard config
|
656
|
+
host, port, user, password, database = get_connection_params()
|
657
|
+
|
658
|
+
cls.client = Client()
|
659
|
+
cls.client.connect(host=host, port=port, user=user, password=password, database=database)
|
660
|
+
|
661
|
+
# Ensure test database and data exist
|
662
|
+
cls.client.execute("CREATE DATABASE IF NOT EXISTS test_fulltext_search")
|
663
|
+
cls.client.execute("USE test_fulltext_search")
|
664
|
+
|
665
|
+
# Create table if not exists
|
666
|
+
cls.client.execute("DROP TABLE IF EXISTS test_articles")
|
667
|
+
cls.client.execute(
|
668
|
+
"""
|
669
|
+
CREATE TABLE IF NOT EXISTS test_articles (
|
670
|
+
id INT AUTO_INCREMENT PRIMARY KEY,
|
671
|
+
title VARCHAR(255) NOT NULL,
|
672
|
+
content TEXT NOT NULL,
|
673
|
+
tags VARCHAR(500),
|
674
|
+
category VARCHAR(100)
|
675
|
+
)
|
676
|
+
"""
|
677
|
+
)
|
678
|
+
|
679
|
+
# Insert test data for logical adapter tests
|
680
|
+
test_data = [
|
681
|
+
(
|
682
|
+
"Python Programming",
|
683
|
+
"Learn Python programming basics",
|
684
|
+
"python,programming",
|
685
|
+
"Programming",
|
686
|
+
),
|
687
|
+
(
|
688
|
+
"Java Development",
|
689
|
+
"Java enterprise development guide",
|
690
|
+
"java,enterprise",
|
691
|
+
"Programming",
|
692
|
+
),
|
693
|
+
("Machine Learning", "Introduction to ML with Python", "python,ml,ai", "AI"),
|
694
|
+
("Web Security", "Security best practices for web apps", "security,web", "Security"),
|
695
|
+
("Data Science", "Data analysis with Python and R", "python,data,science", "AI"),
|
696
|
+
]
|
697
|
+
|
698
|
+
for title, content, tags, category in test_data:
|
699
|
+
cls.client.execute(
|
700
|
+
"INSERT INTO test_articles (title, content, tags, category) VALUES (%s, %s, %s, %s)",
|
701
|
+
(title, content, tags, category),
|
702
|
+
)
|
703
|
+
|
704
|
+
# Create fulltext index
|
705
|
+
try:
|
706
|
+
cls.client.execute("CREATE FULLTEXT INDEX ft_articles ON test_articles(title, content, tags)")
|
707
|
+
except Exception:
|
708
|
+
# Index might already exist, ignore error
|
709
|
+
pass
|
710
|
+
|
711
|
+
@classmethod
|
712
|
+
def teardown_class(cls):
|
713
|
+
"""Clean up after tests."""
|
714
|
+
if hasattr(cls, 'client'):
|
715
|
+
cls.client.disconnect()
|
716
|
+
|
717
|
+
def test_logical_and_online(self):
|
718
|
+
"""Test logical_and with real database queries."""
|
719
|
+
# Test: Find articles about Python programming
|
720
|
+
fulltext_condition = boolean_match("title", "content", "tags").must("python")
|
721
|
+
category_condition = Article.category == "Programming"
|
722
|
+
|
723
|
+
query = self.client.query(Article).filter(logical_and(fulltext_condition, category_condition))
|
724
|
+
results = query.all()
|
725
|
+
|
726
|
+
assert isinstance(results, list)
|
727
|
+
# Should find "Python Programming" article
|
728
|
+
if results:
|
729
|
+
assert any("Python" in r.title for r in results)
|
730
|
+
|
731
|
+
def test_logical_or_online(self):
|
732
|
+
"""Test logical_or with real database queries."""
|
733
|
+
# MatrixOne does NOT support OR operations with MATCH() AGAINST()
|
734
|
+
# Test OR with regular conditions only
|
735
|
+
programming_condition = Article.category == "Programming"
|
736
|
+
ai_condition = Article.category == "AI"
|
737
|
+
|
738
|
+
query = self.client.query(Article).filter(logical_or(programming_condition, ai_condition))
|
739
|
+
results = query.all()
|
740
|
+
|
741
|
+
assert isinstance(results, list)
|
742
|
+
# Should find articles in either Programming or AI category
|
743
|
+
if results:
|
744
|
+
for result in results:
|
745
|
+
assert result.category in ["Programming", "AI"]
|
746
|
+
|
747
|
+
def test_logical_not_online(self):
|
748
|
+
"""Test logical_not with real database queries."""
|
749
|
+
# MatrixOne has limitations with NOT in fulltext context
|
750
|
+
# Use simpler approach: test NOT with regular conditions
|
751
|
+
category_condition = Article.category == "Programming"
|
752
|
+
|
753
|
+
query = self.client.query(Article).filter(logical_not(category_condition))
|
754
|
+
results = query.all()
|
755
|
+
|
756
|
+
assert isinstance(results, list)
|
757
|
+
# Should exclude Programming category articles
|
758
|
+
if results:
|
759
|
+
for result in results:
|
760
|
+
assert result.category != "Programming"
|
761
|
+
|
762
|
+
def test_mixed_conditions_online(self):
|
763
|
+
"""Test mixing fulltext and regular SQL conditions."""
|
764
|
+
# Test: Find AI articles containing Python
|
765
|
+
fulltext_condition = boolean_match("title", "content", "tags").must("python")
|
766
|
+
category_condition = Article.category == "AI"
|
767
|
+
|
768
|
+
query = self.client.query(Article).filter(logical_and(fulltext_condition, category_condition))
|
769
|
+
results = query.all()
|
770
|
+
|
771
|
+
assert isinstance(results, list)
|
772
|
+
# Should find "Machine Learning" and "Data Science" articles if they match
|
773
|
+
if results:
|
774
|
+
for result in results:
|
775
|
+
assert result.category == "AI"
|
776
|
+
|
777
|
+
def test_complex_nested_conditions_online(self):
|
778
|
+
"""Test complex nested logical conditions."""
|
779
|
+
# MatrixOne does NOT support complex OR with MATCH() AGAINST()
|
780
|
+
# Test nested AND conditions with regular fields only
|
781
|
+
programming_cat = Article.category == "Programming"
|
782
|
+
ai_cat = Article.category == "AI"
|
783
|
+
|
784
|
+
# Test nested OR with regular conditions
|
785
|
+
final_condition = logical_or(programming_cat, ai_cat)
|
786
|
+
|
787
|
+
query = self.client.query(Article).filter(final_condition)
|
788
|
+
results = query.all()
|
789
|
+
|
790
|
+
assert isinstance(results, list)
|
791
|
+
# Should find articles in Programming or AI categories
|
792
|
+
if results:
|
793
|
+
categories = [r.category for r in results]
|
794
|
+
assert all(cat in ["Programming", "AI"] for cat in categories)
|
795
|
+
|
796
|
+
def test_logical_or_with_different_fulltext_modes(self):
|
797
|
+
"""Test logical_or with different fulltext search modes."""
|
798
|
+
# Test simplified version: just test natural language mode works
|
799
|
+
natural_condition = natural_match("title", "content", "tags", query="python")
|
800
|
+
|
801
|
+
query = self.client.query(Article).filter(natural_condition)
|
802
|
+
results = query.all()
|
803
|
+
|
804
|
+
assert isinstance(results, list)
|
805
|
+
# Should find articles matching the natural language condition
|
806
|
+
|
807
|
+
def test_multiple_logical_operations(self):
|
808
|
+
"""Test multiple logical operations in one query."""
|
809
|
+
# Test simplified version: fulltext AND regular condition
|
810
|
+
fulltext_condition = boolean_match("title", "content", "tags").must("python")
|
811
|
+
category_condition = Article.category == "Programming"
|
812
|
+
|
813
|
+
final_condition = logical_and(fulltext_condition, category_condition)
|
814
|
+
|
815
|
+
query = self.client.query(Article).filter(final_condition)
|
816
|
+
results = query.all()
|
817
|
+
|
818
|
+
assert isinstance(results, list)
|
819
|
+
# Should find Python articles in Programming category
|
820
|
+
if results:
|
821
|
+
for result in results:
|
822
|
+
assert result.category == "Programming"
|
823
|
+
|
824
|
+
def test_fulltext_and_supported_online(self):
|
825
|
+
"""Test that fulltext AND regular conditions work (this is supported by MatrixOne)."""
|
826
|
+
# This should work: MATCH() AGAINST() AND regular_condition
|
827
|
+
fulltext_condition = boolean_match("title", "content", "tags").must("programming")
|
828
|
+
category_condition = Article.category == "Programming"
|
829
|
+
|
830
|
+
# Test AND combination (supported)
|
831
|
+
combined_condition = logical_and(fulltext_condition, category_condition)
|
832
|
+
|
833
|
+
query = self.client.query(Article).filter(combined_condition)
|
834
|
+
results = query.all()
|
835
|
+
|
836
|
+
assert isinstance(results, list)
|
837
|
+
# Should find programming articles in Programming category
|
838
|
+
|
839
|
+
|
840
|
+
if __name__ == "__main__":
|
841
|
+
# Run with: python -m pytest tests/online/test_fulltext_search_online.py -v
|
842
|
+
pytest.main([__file__, "-v", "-s"])
|