matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,1686 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Advanced Fulltext Search Builder for MatrixOne
|
17
|
+
|
18
|
+
This module provides an Elasticsearch-like query builder for MatrixOne fulltext search,
|
19
|
+
with chainable methods and comprehensive search capabilities.
|
20
|
+
|
21
|
+
## Column Matching Requirements
|
22
|
+
|
23
|
+
**CRITICAL**: The columns specified in MATCH() must exactly match the columns
|
24
|
+
defined in the FULLTEXT index. This is a MatrixOne requirement.
|
25
|
+
|
26
|
+
Examples:
|
27
|
+
- If your index is: `FULLTEXT(title, content, tags)`
|
28
|
+
- Your MATCH() must be: `MATCH(title, content, tags) AGAINST(...)`
|
29
|
+
- NOT: `MATCH(title) AGAINST(...)` or `MATCH(title, content) AGAINST(...)`
|
30
|
+
|
31
|
+
## MatrixOne Limitations
|
32
|
+
|
33
|
+
1. **Multiple MATCH() Functions**: MatrixOne does not support multiple
|
34
|
+
MATCH() functions in the same query.
|
35
|
+
|
36
|
+
❌ WRONG: `WHERE MATCH(...) AND MATCH(...)`
|
37
|
+
✅ CORRECT: Use chained filter() calls or combine terms in single MATCH()
|
38
|
+
|
39
|
+
2. **Complex Nested Groups**: Some complex nested syntaxes are not supported.
|
40
|
+
|
41
|
+
❌ WRONG: `'+learning -basic (+machine AI) (+deep neural)'`
|
42
|
+
✅ CORRECT: `'+learning -basic +machine +deep'`
|
43
|
+
|
44
|
+
## Supported Boolean Mode Operators
|
45
|
+
|
46
|
+
### Group-level operators (applied to entire groups):
|
47
|
+
- `+(group)`: Group must be present
|
48
|
+
- `-(group)`: Group must not be present
|
49
|
+
|
50
|
+
### Element-level operators:
|
51
|
+
- `+term`: Term must contain (required)
|
52
|
+
- `-term`: Term must not contain (excluded)
|
53
|
+
- `term`: Term optional (should contain)
|
54
|
+
- `"phrase"`: Exact phrase match
|
55
|
+
- `term*`: Prefix match
|
56
|
+
|
57
|
+
### Weight operators (within groups/elements):
|
58
|
+
- `term`: Optional term with normal positive weight boost
|
59
|
+
- `>term`: Higher relevance weight for term (high positive boost)
|
60
|
+
- `<term`: Lower relevance weight for term (low positive boost)
|
61
|
+
- `~term`: Reduced/suppressed relevance weight (negative or minimal boost)
|
62
|
+
|
63
|
+
### Weight Operator Comparison:
|
64
|
+
- `encourage("tutorial")` → `tutorial` : Encourages documents with "tutorial"
|
65
|
+
- `discourage("legacy")` → `~legacy` : Discourages documents with "legacy"
|
66
|
+
|
67
|
+
Both are optional (don't filter documents) but affect ranking differently.
|
68
|
+
|
69
|
+
### MatrixOne Example:
|
70
|
+
- `'+red -(<blue >is)'`: Must have 'red', must NOT have group containing 'blue' (low weight) and 'is' (high weight)
|
71
|
+
|
72
|
+
## Supported Modes
|
73
|
+
|
74
|
+
- **NATURAL LANGUAGE**: Default full-text search
|
75
|
+
- **BOOLEAN**: Advanced boolean operators
|
76
|
+
- **QUERY EXPANSION**: Automatic query expansion (limited support)
|
77
|
+
"""
|
78
|
+
|
79
|
+
from __future__ import annotations
|
80
|
+
|
81
|
+
from typing import TYPE_CHECKING, Any, List, Optional
|
82
|
+
|
83
|
+
from sqlalchemy import Boolean
|
84
|
+
from sqlalchemy.sql import and_, not_, or_, text
|
85
|
+
from sqlalchemy.sql.elements import ClauseElement
|
86
|
+
|
87
|
+
if TYPE_CHECKING:
|
88
|
+
from ..client import Client
|
89
|
+
|
90
|
+
|
91
|
+
class FulltextSearchMode:
|
92
|
+
"""Enum-like class for fulltext search modes."""
|
93
|
+
|
94
|
+
NATURAL_LANGUAGE = "natural language mode"
|
95
|
+
BOOLEAN = "boolean mode"
|
96
|
+
QUERY_EXPANSION = "query expansion mode"
|
97
|
+
|
98
|
+
|
99
|
+
class FulltextSearchAlgorithm:
|
100
|
+
"""Enum-like class for fulltext search algorithms."""
|
101
|
+
|
102
|
+
TF_IDF = "TF-IDF"
|
103
|
+
BM25 = "BM25"
|
104
|
+
|
105
|
+
|
106
|
+
class FulltextElement:
|
107
|
+
"""Represents a single fulltext element (term, phrase, prefix, etc.)."""
|
108
|
+
|
109
|
+
def __init__(self, content: str, operator: str = "", weight_modifier: str = ""):
|
110
|
+
self.content = content
|
111
|
+
self.operator = operator # "+", "-", "", etc.
|
112
|
+
self.weight_modifier = weight_modifier # ">", "<", "~", etc.
|
113
|
+
|
114
|
+
def build(self) -> str:
|
115
|
+
"""Build the element string."""
|
116
|
+
if self.weight_modifier:
|
117
|
+
return f"{self.operator}{self.weight_modifier}{self.content}"
|
118
|
+
return f"{self.operator}{self.content}"
|
119
|
+
|
120
|
+
|
121
|
+
class FulltextGroup:
|
122
|
+
"""Represents a group of fulltext elements for building nested boolean queries.
|
123
|
+
|
124
|
+
This class is used to create groups of terms that can be combined with
|
125
|
+
group-level operators (+, -, ~, no prefix) in MatrixOne's boolean mode.
|
126
|
+
|
127
|
+
Element-level Methods (within groups):
|
128
|
+
- medium(): Add terms with medium weight (no operators)
|
129
|
+
- high(): Add terms with high weight (>term)
|
130
|
+
- low(): Add terms with low weight (<term)
|
131
|
+
- phrase(): Add exact phrase matches ("phrase")
|
132
|
+
- prefix(): Add prefix matches (term*)
|
133
|
+
|
134
|
+
Group Types:
|
135
|
+
- "or": OR semantics (default) - any term in group can match
|
136
|
+
- "and": AND semantics - all terms in group must match
|
137
|
+
- "not": NOT semantics - none of the terms in group can match
|
138
|
+
|
139
|
+
Usage with Group-level Operators:
|
140
|
+
# Create groups and apply group-level operators
|
141
|
+
query.must(group().medium("java", "kotlin")) # +(java kotlin)
|
142
|
+
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
|
143
|
+
query.discourage(group().medium("old", "outdated")) # ~(old outdated)
|
144
|
+
query.must_not(group().medium("spam", "junk")) # -(spam junk)
|
145
|
+
|
146
|
+
Element-level Weight Operators (inside groups):
|
147
|
+
# MatrixOne syntax: '+red -(<blue >is)'
|
148
|
+
group().low("blue").high("is")
|
149
|
+
# Used as: query.must("red").must_not(group().low("blue").high("is"))
|
150
|
+
|
151
|
+
Important Notes:
|
152
|
+
- Use medium() for normal terms inside groups (no operators)
|
153
|
+
- Use high()/low() for element-level weight control
|
154
|
+
- Group-level operators (+, -, ~) are applied by the parent query builder
|
155
|
+
"""
|
156
|
+
|
157
|
+
def __init__(self, group_type: str = "or"):
|
158
|
+
self.elements: List[FulltextElement] = []
|
159
|
+
self.groups: List["FulltextGroup"] = []
|
160
|
+
self.group_type = group_type # "or", "and", "not"
|
161
|
+
self.is_tilde = False # Whether this group has tilde weight
|
162
|
+
|
163
|
+
def must(self, *terms: str) -> "FulltextGroup":
|
164
|
+
"""Add required terms (only for top-level, groups should use medium() instead)."""
|
165
|
+
for term in terms:
|
166
|
+
# Groups don't use +/- operators on elements, only at group level
|
167
|
+
if self.group_type in ["or", "not"]:
|
168
|
+
# Inside groups, elements should not have +/- operators
|
169
|
+
self.elements.append(FulltextElement(term, ""))
|
170
|
+
else:
|
171
|
+
# Top-level (main group) can use + operator
|
172
|
+
self.elements.append(FulltextElement(term, "+"))
|
173
|
+
return self
|
174
|
+
|
175
|
+
def must_not(self, *terms: str) -> "FulltextGroup":
|
176
|
+
"""Add excluded terms (only for top-level, groups should use medium() instead)."""
|
177
|
+
for term in terms:
|
178
|
+
# Groups don't use +/- operators on elements, only at group level
|
179
|
+
if self.group_type in ["or", "not"]:
|
180
|
+
# Inside groups, elements should not have +/- operators
|
181
|
+
self.elements.append(FulltextElement(term, ""))
|
182
|
+
else:
|
183
|
+
# Top-level (main group) can use - operator
|
184
|
+
self.elements.append(FulltextElement(term, "-"))
|
185
|
+
return self
|
186
|
+
|
187
|
+
def encourage(self, *terms: str) -> "FulltextGroup":
|
188
|
+
"""Add terms that should be encouraged (normal positive weight).
|
189
|
+
|
190
|
+
These terms are optional - documents without them can still match,
|
191
|
+
but documents containing them will get normal positive scoring boost.
|
192
|
+
|
193
|
+
Args::
|
194
|
+
|
195
|
+
*terms: Terms to add with normal positive weight
|
196
|
+
|
197
|
+
Example::
|
198
|
+
|
199
|
+
# Documents with 'python' get normal positive boost
|
200
|
+
group.encourage("python") # Generates: python
|
201
|
+
"""
|
202
|
+
for term in terms:
|
203
|
+
# Optional terms never have operators
|
204
|
+
self.elements.append(FulltextElement(term, ""))
|
205
|
+
return self
|
206
|
+
|
207
|
+
def medium(self, *terms: str) -> "FulltextGroup":
|
208
|
+
"""Add terms with medium/normal weight (no operators)."""
|
209
|
+
for term in terms:
|
210
|
+
self.elements.append(FulltextElement(term, ""))
|
211
|
+
return self
|
212
|
+
|
213
|
+
def phrase(self, phrase: str) -> "FulltextGroup":
|
214
|
+
"""Add a phrase search."""
|
215
|
+
self.elements.append(FulltextElement(f'"{phrase}"', ""))
|
216
|
+
return self
|
217
|
+
|
218
|
+
def prefix(self, prefix: str) -> "FulltextGroup":
|
219
|
+
"""Add a prefix search."""
|
220
|
+
self.elements.append(FulltextElement(f"{prefix}*", ""))
|
221
|
+
return self
|
222
|
+
|
223
|
+
def boost(self, term: str, weight: float) -> "FulltextGroup":
|
224
|
+
"""Add a boosted term."""
|
225
|
+
self.elements.append(FulltextElement(f"{term}^{weight}", ""))
|
226
|
+
return self
|
227
|
+
|
228
|
+
def high(self, *terms: str) -> "FulltextGroup":
|
229
|
+
"""Add terms with high weight (>term)."""
|
230
|
+
for term in terms:
|
231
|
+
self.elements.append(FulltextElement(term, "", ">"))
|
232
|
+
return self
|
233
|
+
|
234
|
+
def low(self, *terms: str) -> "FulltextGroup":
|
235
|
+
"""Add terms with low weight (<term)."""
|
236
|
+
for term in terms:
|
237
|
+
self.elements.append(FulltextElement(term, "", "<"))
|
238
|
+
return self
|
239
|
+
|
240
|
+
def add_group(self, *groups: "FulltextGroup") -> "FulltextGroup":
|
241
|
+
"""Add nested groups."""
|
242
|
+
for group in groups:
|
243
|
+
self.groups.append(group)
|
244
|
+
return self
|
245
|
+
|
246
|
+
def add_tilde_group(self, group: "FulltextGroup") -> "FulltextGroup":
|
247
|
+
"""Add a group with tilde weight (~group)."""
|
248
|
+
group.is_tilde = True
|
249
|
+
self.groups.append(group)
|
250
|
+
return self
|
251
|
+
|
252
|
+
def build(self) -> str:
|
253
|
+
"""Build the group string."""
|
254
|
+
parts = []
|
255
|
+
|
256
|
+
# Add elements
|
257
|
+
for element in self.elements:
|
258
|
+
parts.append(element.build())
|
259
|
+
|
260
|
+
# Add nested groups with appropriate prefix based on group type
|
261
|
+
for group in self.groups:
|
262
|
+
group_str = group.build()
|
263
|
+
if group_str:
|
264
|
+
if group.is_tilde:
|
265
|
+
# For tilde groups, use ~(<content>) format
|
266
|
+
parts.append(f"~({group_str})")
|
267
|
+
elif group.group_type == "not":
|
268
|
+
# For NOT groups, use -(<content>) format
|
269
|
+
parts.append(f"-({group_str})")
|
270
|
+
elif group.group_type == "and":
|
271
|
+
# For AND groups, use +(<content>) format
|
272
|
+
parts.append(f"+({group_str})")
|
273
|
+
else: # or (default)
|
274
|
+
# For OR groups, just use (<content>) format
|
275
|
+
parts.append(f"({group_str})")
|
276
|
+
|
277
|
+
return " ".join(parts)
|
278
|
+
|
279
|
+
|
280
|
+
class FulltextQueryBuilder:
|
281
|
+
"""Builder for constructing fulltext boolean queries.
|
282
|
+
|
283
|
+
This class provides a chainable API for building complex fulltext search queries
|
284
|
+
that are compatible with MatrixOne's MATCH() AGAINST() syntax.
|
285
|
+
|
286
|
+
Core Methods:
|
287
|
+
- must(): Required terms/groups (+ operator)
|
288
|
+
- must_not(): Excluded terms/groups (- operator)
|
289
|
+
- encourage(): Optional terms/groups with normal weight (no prefix)
|
290
|
+
- discourage(): Optional terms/groups with reduced weight (~ operator)
|
291
|
+
|
292
|
+
Examples::
|
293
|
+
|
294
|
+
# Basic usage
|
295
|
+
query.must("python") # +python
|
296
|
+
query.encourage("tutorial") # tutorial
|
297
|
+
query.discourage("legacy") # ~legacy
|
298
|
+
query.must_not("deprecated") # -deprecated
|
299
|
+
|
300
|
+
# Group usage
|
301
|
+
query.must(group().medium("java", "kotlin")) # +(java kotlin)
|
302
|
+
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
|
303
|
+
query.must_not(group().medium("spam", "junk")) # -(spam junk)
|
304
|
+
|
305
|
+
Note: Group-level operators (+, -, ~) applied to entire groups. Element-level operators (>, <)
|
306
|
+
applied within groups using high(), low()
|
307
|
+
"""
|
308
|
+
|
309
|
+
def __init__(self):
|
310
|
+
self.main_group = FulltextGroup("and") # Main group with AND semantics
|
311
|
+
|
312
|
+
def must(self, *items) -> "FulltextQueryBuilder":
|
313
|
+
"""Add required terms or groups (+ operator at group level).
|
314
|
+
|
315
|
+
Documents MUST contain these terms/groups to match. This is equivalent
|
316
|
+
to the '+' operator in MatrixOne's boolean mode syntax.
|
317
|
+
|
318
|
+
Args::
|
319
|
+
|
320
|
+
*items: Can be strings (terms) or FulltextGroup objects
|
321
|
+
|
322
|
+
Examples::
|
323
|
+
|
324
|
+
# Required term - documents must contain 'python'
|
325
|
+
query.must("python") # Generates: +python
|
326
|
+
|
327
|
+
# Required group - documents must contain either 'java' OR 'kotlin'
|
328
|
+
query.must(group().medium("java", "kotlin")) # Generates: +(java kotlin)
|
329
|
+
|
330
|
+
# Multiple required terms
|
331
|
+
query.must("python", "programming") # Generates: +python +programming
|
332
|
+
|
333
|
+
# Unpack list to search multiple terms
|
334
|
+
words = ["python", "programming"]
|
335
|
+
query.must(*words) # Correct: unpacks the list
|
336
|
+
|
337
|
+
Raises::
|
338
|
+
|
339
|
+
TypeError: If a list or tuple is passed directly without unpacking
|
340
|
+
|
341
|
+
Returns::
|
342
|
+
|
343
|
+
FulltextQueryBuilder: Self for method chaining
|
344
|
+
"""
|
345
|
+
for item in items:
|
346
|
+
# Validate parameter types and provide friendly error messages
|
347
|
+
if isinstance(item, (list, tuple)):
|
348
|
+
raise TypeError(
|
349
|
+
f"must() received a {type(item).__name__} object, but expected individual terms. "
|
350
|
+
f"To search multiple terms, use the unpacking operator: must(*terms) instead of must(terms). "
|
351
|
+
f"Example: must(*{list(item)[:3]}) or must({', '.join(repr(str(t)) for t in list(item)[:3])})"
|
352
|
+
)
|
353
|
+
if isinstance(item, FulltextGroup):
|
354
|
+
item.group_type = "and" # Force group to be required
|
355
|
+
self.main_group.add_group(item)
|
356
|
+
else:
|
357
|
+
self.main_group.must(item)
|
358
|
+
return self
|
359
|
+
|
360
|
+
def must_not(self, *items) -> "FulltextQueryBuilder":
|
361
|
+
"""Add excluded terms or groups (- operator at group level).
|
362
|
+
|
363
|
+
Documents MUST NOT contain these terms/groups to match. This is equivalent
|
364
|
+
to the '-' operator in MatrixOne's boolean mode syntax.
|
365
|
+
|
366
|
+
Args::
|
367
|
+
|
368
|
+
*items: Can be strings (terms) or FulltextGroup objects
|
369
|
+
|
370
|
+
Examples::
|
371
|
+
|
372
|
+
# Excluded term - documents must not contain 'deprecated'
|
373
|
+
query.must_not("deprecated") # Generates: -deprecated
|
374
|
+
|
375
|
+
# Excluded group - documents must not contain 'spam' OR 'junk'
|
376
|
+
query.must_not(group().medium("spam", "junk")) # Generates: -(spam junk)
|
377
|
+
|
378
|
+
# Multiple excluded terms
|
379
|
+
query.must_not("spam", "junk") # Generates: -spam -junk
|
380
|
+
|
381
|
+
# Unpack list to exclude multiple terms
|
382
|
+
words = ["spam", "junk"]
|
383
|
+
query.must_not(*words) # Correct: unpacks the list
|
384
|
+
|
385
|
+
Raises::
|
386
|
+
|
387
|
+
TypeError: If a list or tuple is passed directly without unpacking
|
388
|
+
|
389
|
+
Returns::
|
390
|
+
|
391
|
+
FulltextQueryBuilder: Self for method chaining
|
392
|
+
"""
|
393
|
+
for item in items:
|
394
|
+
# Validate parameter types and provide friendly error messages
|
395
|
+
if isinstance(item, (list, tuple)):
|
396
|
+
raise TypeError(
|
397
|
+
f"must_not() received a {type(item).__name__} object, but expected individual terms. "
|
398
|
+
f"To exclude multiple terms, use the unpacking operator: must_not(*terms) instead of must_not(terms). "
|
399
|
+
f"Example: must_not(*{list(item)[:3]}) or must_not({', '.join(repr(str(t)) for t in list(item)[:3])})"
|
400
|
+
)
|
401
|
+
if isinstance(item, FulltextGroup):
|
402
|
+
item.group_type = "not" # Force group to be excluded
|
403
|
+
self.main_group.add_group(item)
|
404
|
+
else:
|
405
|
+
self.main_group.must_not(item)
|
406
|
+
return self
|
407
|
+
|
408
|
+
def encourage(self, *items) -> "FulltextQueryBuilder":
|
409
|
+
"""Add terms or groups that should be encouraged (normal positive weight).
|
410
|
+
|
411
|
+
Documents can match without these terms, but containing them will
|
412
|
+
INCREASE the relevance score. This provides normal positive weight boost.
|
413
|
+
|
414
|
+
Args::
|
415
|
+
|
416
|
+
*items: Can be strings (terms) or FulltextGroup objects
|
417
|
+
|
418
|
+
Examples::
|
419
|
+
|
420
|
+
# Encourage documents with 'tutorial'
|
421
|
+
query.encourage("tutorial") # Generates: tutorial
|
422
|
+
|
423
|
+
# Encourage documents with 'beginner' OR 'intro'
|
424
|
+
query.encourage(group().medium("beginner", "intro")) # Generates: (beginner intro)
|
425
|
+
|
426
|
+
# Multiple encouraged terms
|
427
|
+
query.encourage("tutorial", "guide") # Generates: tutorial guide
|
428
|
+
|
429
|
+
# Unpack list to encourage multiple terms
|
430
|
+
words = ["tutorial", "guide"]
|
431
|
+
query.encourage(*words) # Correct: unpacks the list
|
432
|
+
|
433
|
+
Weight Comparison:
|
434
|
+
- encourage("term"): Normal positive boost (encourages term)
|
435
|
+
- discourage("term"): Reduced/negative boost (discourages term)
|
436
|
+
|
437
|
+
Raises::
|
438
|
+
|
439
|
+
TypeError: If a list or tuple is passed directly without unpacking
|
440
|
+
|
441
|
+
Returns::
|
442
|
+
|
443
|
+
FulltextQueryBuilder: Self for method chaining
|
444
|
+
"""
|
445
|
+
for item in items:
|
446
|
+
# Validate parameter types and provide friendly error messages
|
447
|
+
if isinstance(item, (list, tuple)):
|
448
|
+
raise TypeError(
|
449
|
+
f"encourage() received a {type(item).__name__} object, but expected individual terms. "
|
450
|
+
f"To encourage multiple terms, use the unpacking operator: encourage(*terms) "
|
451
|
+
f"instead of encourage(terms). "
|
452
|
+
f"Example: encourage(*{list(item)[:3]}) or "
|
453
|
+
f"encourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
|
454
|
+
)
|
455
|
+
if isinstance(item, FulltextGroup):
|
456
|
+
item.group_type = "or" # Force group to be optional
|
457
|
+
self.main_group.add_group(item)
|
458
|
+
else:
|
459
|
+
self.main_group.encourage(item)
|
460
|
+
return self
|
461
|
+
|
462
|
+
def discourage(self, *items) -> "FulltextQueryBuilder":
|
463
|
+
"""Add terms or groups that should be discouraged (~ operator at group level).
|
464
|
+
|
465
|
+
Documents can match without these terms, but containing them will
|
466
|
+
DECREASE the relevance score. This provides reduced or negative weight boost,
|
467
|
+
effectively discouraging documents that contain these terms.
|
468
|
+
|
469
|
+
Args::
|
470
|
+
|
471
|
+
*items: Can be strings (terms) or FulltextGroup objects
|
472
|
+
|
473
|
+
Examples::
|
474
|
+
|
475
|
+
# Discourage documents with 'legacy'
|
476
|
+
query.discourage("legacy") # Generates: ~legacy
|
477
|
+
|
478
|
+
# Discourage documents with 'old' OR 'outdated'
|
479
|
+
query.discourage(group().medium("old", "outdated")) # Generates: ~(old outdated)
|
480
|
+
|
481
|
+
# Multiple discouraged terms
|
482
|
+
query.discourage("legacy", "deprecated") # Generates: ~legacy ~deprecated
|
483
|
+
|
484
|
+
# Unpack list to discourage multiple terms
|
485
|
+
words = ["legacy", "deprecated"]
|
486
|
+
query.discourage(*words) # Correct: unpacks the list
|
487
|
+
|
488
|
+
Weight Comparison:
|
489
|
+
- encourage("term"): Normal positive boost (encourages term)
|
490
|
+
- discourage("term"): Reduced/negative boost (discourages term)
|
491
|
+
|
492
|
+
Use Cases:
|
493
|
+
# Search Python content, but discourage legacy versions
|
494
|
+
query.must("python").encourage("3.11").discourage("2.7")
|
495
|
+
|
496
|
+
# Find tutorials, but avoid outdated content
|
497
|
+
query.must("tutorial").discourage(group().medium("old", "deprecated"))
|
498
|
+
|
499
|
+
Raises::
|
500
|
+
|
501
|
+
TypeError: If a list or tuple is passed directly without unpacking
|
502
|
+
|
503
|
+
Returns::
|
504
|
+
|
505
|
+
FulltextQueryBuilder: Self for method chaining
|
506
|
+
"""
|
507
|
+
for item in items:
|
508
|
+
# Validate parameter types and provide friendly error messages
|
509
|
+
if isinstance(item, (list, tuple)):
|
510
|
+
raise TypeError(
|
511
|
+
f"discourage() received a {type(item).__name__} object, but expected individual terms. "
|
512
|
+
f"To discourage multiple terms, use the unpacking operator: discourage(*terms) "
|
513
|
+
f"instead of discourage(terms). "
|
514
|
+
f"Example: discourage(*{list(item)[:3]}) or "
|
515
|
+
f"discourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
|
516
|
+
)
|
517
|
+
if isinstance(item, FulltextGroup):
|
518
|
+
# Apply tilde to the entire group
|
519
|
+
self.main_group.add_tilde_group(item)
|
520
|
+
else:
|
521
|
+
# Apply tilde to individual term
|
522
|
+
self.main_group.elements.append(FulltextElement(item, "", "~"))
|
523
|
+
return self
|
524
|
+
|
525
|
+
def phrase(self, phrase: str) -> "FulltextQueryBuilder":
|
526
|
+
"""Add a phrase search to the main group."""
|
527
|
+
self.main_group.phrase(phrase)
|
528
|
+
return self
|
529
|
+
|
530
|
+
def prefix(self, prefix: str) -> "FulltextQueryBuilder":
|
531
|
+
"""Add a prefix search to the main group."""
|
532
|
+
self.main_group.prefix(prefix)
|
533
|
+
return self
|
534
|
+
|
535
|
+
def boost(self, term: str, weight: float) -> "FulltextQueryBuilder":
|
536
|
+
"""Add a boosted term to the main group."""
|
537
|
+
self.main_group.boost(term, weight)
|
538
|
+
return self
|
539
|
+
|
540
|
+
def group(self, *builders: "FulltextQueryBuilder") -> "FulltextQueryBuilder":
|
541
|
+
"""Add nested query builders as groups (OR semantics)."""
|
542
|
+
for builder in builders:
|
543
|
+
# Convert builder to group and add to main group
|
544
|
+
group = FulltextGroup("or")
|
545
|
+
# Add all elements from the builder's main group
|
546
|
+
group.elements.extend(builder.main_group.elements)
|
547
|
+
group.groups.extend(builder.main_group.groups)
|
548
|
+
self.main_group.groups.append(group)
|
549
|
+
return self
|
550
|
+
|
551
|
+
def build(self) -> str:
|
552
|
+
"""Build the final query string."""
|
553
|
+
return self.main_group.build()
|
554
|
+
|
555
|
+
def as_sql(
|
556
|
+
self,
|
557
|
+
table: str,
|
558
|
+
columns: List[str],
|
559
|
+
mode: str = FulltextSearchMode.BOOLEAN,
|
560
|
+
include_score: bool = False,
|
561
|
+
select_columns: Optional[List[str]] = None,
|
562
|
+
where_conditions: Optional[List[str]] = None,
|
563
|
+
order_by: Optional[str] = None,
|
564
|
+
limit: Optional[int] = None,
|
565
|
+
offset: Optional[int] = None,
|
566
|
+
) -> str:
|
567
|
+
"""Build a complete SQL query with optional AS score support.
|
568
|
+
|
569
|
+
This method generates a full SQL query similar to FulltextSearchBuilder but using
|
570
|
+
the query built by FulltextQueryBuilder.
|
571
|
+
|
572
|
+
Args::
|
573
|
+
|
574
|
+
table: Table name to search in
|
575
|
+
columns: List of columns to search in (must match FULLTEXT index)
|
576
|
+
mode: Search mode (BOOLEAN, NATURAL_LANGUAGE, etc.)
|
577
|
+
include_score: Whether to include relevance score in results
|
578
|
+
select_columns: Columns to select (default: all columns "*")
|
579
|
+
where_conditions: Additional WHERE conditions
|
580
|
+
order_by: ORDER BY clause (e.g., "score DESC")
|
581
|
+
limit: LIMIT value
|
582
|
+
offset: OFFSET value
|
583
|
+
|
584
|
+
Returns::
|
585
|
+
|
586
|
+
str: Complete SQL query
|
587
|
+
|
588
|
+
Examples::
|
589
|
+
|
590
|
+
.. code-block:: python
|
591
|
+
|
592
|
+
# Basic query with score
|
593
|
+
query = FulltextQueryBuilder().must("python").encourage("tutorial")
|
594
|
+
sql = query.as_sql("articles", ["title", "content"], include_score=True)
|
595
|
+
# SELECT *, MATCH(title, content) AGAINST('+python tutorial' IN boolean mode) AS score
|
596
|
+
# FROM articles WHERE MATCH(title, content) AGAINST('+python tutorial' IN boolean mode)
|
597
|
+
|
598
|
+
# Query with custom columns and ORDER BY score
|
599
|
+
sql = query.as_sql("articles", ["title", "content"],
|
600
|
+
select_columns=["id", "title"], include_score=True,
|
601
|
+
order_by="score DESC", limit=10)
|
602
|
+
"""
|
603
|
+
query_string = self.build()
|
604
|
+
if not query_string:
|
605
|
+
raise ValueError("Query is required - add at least one search term")
|
606
|
+
|
607
|
+
if not table:
|
608
|
+
raise ValueError("Table name is required")
|
609
|
+
|
610
|
+
if not columns:
|
611
|
+
raise ValueError("Search columns are required")
|
612
|
+
|
613
|
+
# Build columns string for MATCH()
|
614
|
+
columns_str = ", ".join(columns)
|
615
|
+
|
616
|
+
# Build SELECT clause
|
617
|
+
if select_columns:
|
618
|
+
select_parts = select_columns.copy()
|
619
|
+
else:
|
620
|
+
select_parts = ["*"]
|
621
|
+
|
622
|
+
if include_score:
|
623
|
+
score_expr = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode}) AS score"
|
624
|
+
select_parts.append(score_expr)
|
625
|
+
|
626
|
+
select_clause = f"SELECT {', '.join(select_parts)}"
|
627
|
+
|
628
|
+
# Build FROM clause
|
629
|
+
from_clause = f"FROM {table}"
|
630
|
+
|
631
|
+
# Build WHERE clause
|
632
|
+
where_parts = []
|
633
|
+
|
634
|
+
# Add fulltext search condition
|
635
|
+
fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode})"
|
636
|
+
where_parts.append(fulltext_condition)
|
637
|
+
|
638
|
+
# Add additional WHERE conditions
|
639
|
+
if where_conditions:
|
640
|
+
where_parts.extend(where_conditions)
|
641
|
+
|
642
|
+
where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
|
643
|
+
|
644
|
+
# Build ORDER BY clause
|
645
|
+
order_clause = f"ORDER BY {order_by}" if order_by else ""
|
646
|
+
|
647
|
+
# Build LIMIT clause
|
648
|
+
limit_clause = f"LIMIT {limit}" if limit else ""
|
649
|
+
|
650
|
+
# Build OFFSET clause
|
651
|
+
offset_clause = f"OFFSET {offset}" if offset else ""
|
652
|
+
|
653
|
+
# Combine all clauses
|
654
|
+
sql_parts = [
|
655
|
+
select_clause,
|
656
|
+
from_clause,
|
657
|
+
where_clause,
|
658
|
+
order_clause,
|
659
|
+
limit_clause,
|
660
|
+
offset_clause,
|
661
|
+
]
|
662
|
+
return " ".join(filter(None, sql_parts))
|
663
|
+
|
664
|
+
def as_score_sql(self, table: str, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN) -> str:
|
665
|
+
"""Convenient method to generate SQL with score included.
|
666
|
+
|
667
|
+
This is equivalent to calling as_sql() with include_score=True.
|
668
|
+
|
669
|
+
Args::
|
670
|
+
|
671
|
+
table: Table name to search in
|
672
|
+
columns: List of columns to search in
|
673
|
+
mode: Search mode
|
674
|
+
|
675
|
+
Returns::
|
676
|
+
|
677
|
+
str: Complete SQL query with AS score
|
678
|
+
|
679
|
+
Example::
|
680
|
+
|
681
|
+
query = FulltextQueryBuilder().must("python").encourage("tutorial")
|
682
|
+
sql = query.as_score_sql("articles", ["title", "content"])
|
683
|
+
# Generates SQL with AS score automatically included
|
684
|
+
"""
|
685
|
+
return self.as_sql(table, columns, mode, include_score=True)
|
686
|
+
|
687
|
+
|
688
|
+
class FulltextFilter(ClauseElement):
|
689
|
+
"""Advanced fulltext filter for integrating fulltext search with ORM queries.
|
690
|
+
|
691
|
+
This class wraps FulltextQueryBuilder to provide seamless integration
|
692
|
+
with MatrixOne ORM's filter() method, allowing fulltext search to be
|
693
|
+
combined with other SQL conditions.
|
694
|
+
|
695
|
+
Core Methods (Group-level operators):
|
696
|
+
- must(): Required terms/groups (+ operator)
|
697
|
+
- must_not(): Excluded terms/groups (- operator)
|
698
|
+
- encourage(): Optional terms/groups with normal weight (no prefix)
|
699
|
+
- discourage(): Optional terms/groups with reduced weight (~ operator)
|
700
|
+
|
701
|
+
Parameter Types:
|
702
|
+
- str: Single term (e.g., "python")
|
703
|
+
- FulltextGroup: Group of terms (e.g., group().medium("java", "kotlin"))
|
704
|
+
|
705
|
+
Usage with ORM:
|
706
|
+
|
707
|
+
.. code-block:: python
|
708
|
+
|
709
|
+
# Basic fulltext filter
|
710
|
+
results = client.query(Article).filter(
|
711
|
+
boolean_match("title", "content").must("python").encourage("tutorial")
|
712
|
+
).all()
|
713
|
+
|
714
|
+
# Combined with other conditions
|
715
|
+
results = client.query(Article).filter(
|
716
|
+
boolean_match("title", "content").must("python")
|
717
|
+
).filter(
|
718
|
+
Article.category == "Programming"
|
719
|
+
).all()
|
720
|
+
|
721
|
+
# Complex fulltext with groups
|
722
|
+
results = client.query(Article).filter(
|
723
|
+
boolean_match("title", "content", "tags")
|
724
|
+
.must("programming")
|
725
|
+
.must(group().medium("python", "java"))
|
726
|
+
.discourage(group().medium("legacy", "deprecated"))
|
727
|
+
).all()
|
728
|
+
|
729
|
+
Weight Operator Examples
|
730
|
+
|
731
|
+
.. code-block:: python
|
732
|
+
|
733
|
+
# Encourage tutorials, discourage legacy content
|
734
|
+
boolean_match("title", "content")
|
735
|
+
.must("python")
|
736
|
+
.encourage("tutorial") # Boost documents with 'tutorial'
|
737
|
+
.discourage("legacy") # Lower ranking for 'legacy' documents
|
738
|
+
|
739
|
+
Supported MatrixOne Boolean Mode Operators:
|
740
|
+
Group-level: +, -, ~, (no prefix) - applied to entire groups/terms
|
741
|
+
Element-level: >, < - applied within groups using high(), low()
|
742
|
+
Other: "phrase", term* - exact phrases and prefix matching
|
743
|
+
Complex: +red -(<blue >is) - nested groups with mixed operators
|
744
|
+
|
745
|
+
Important MatrixOne Requirements:
|
746
|
+
**Column Matching**: The columns specified must exactly match
|
747
|
+
the columns defined in the FULLTEXT index. If your index is
|
748
|
+
`FULLTEXT(title, content, tags)`, you must include all three columns.
|
749
|
+
|
750
|
+
**Limitations**:
|
751
|
+
- Only one MATCH() function per query is supported
|
752
|
+
- Complex nested groups may have syntax restrictions
|
753
|
+
- Use fulltext_and/fulltext_or for combining with other conditions
|
754
|
+
"""
|
755
|
+
|
756
|
+
def __init__(self, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN):
|
757
|
+
super().__init__()
|
758
|
+
self.columns = columns
|
759
|
+
self.mode = mode
|
760
|
+
self.query_builder = FulltextQueryBuilder()
|
761
|
+
self._natural_query = None # Store natural language query separately
|
762
|
+
# Set SQLAlchemy type info for compatibility
|
763
|
+
self.type = Boolean()
|
764
|
+
|
765
|
+
def columns(self, *columns: str) -> "FulltextFilter":
|
766
|
+
"""Set the columns to search in."""
|
767
|
+
self.columns = list(columns)
|
768
|
+
return self
|
769
|
+
|
770
|
+
def must(self, *items) -> "FulltextFilter":
|
771
|
+
"""Add required terms or groups (+ operator at group level)."""
|
772
|
+
self.query_builder.must(*items)
|
773
|
+
return self
|
774
|
+
|
775
|
+
def must_not(self, *items) -> "FulltextFilter":
|
776
|
+
"""Add excluded terms or groups (- operator at group level)."""
|
777
|
+
self.query_builder.must_not(*items)
|
778
|
+
return self
|
779
|
+
|
780
|
+
def encourage(self, *items) -> "FulltextFilter":
|
781
|
+
"""Add terms or groups that should be encouraged (normal positive weight)."""
|
782
|
+
self.query_builder.encourage(*items)
|
783
|
+
return self
|
784
|
+
|
785
|
+
def phrase(self, *phrases: str) -> "FulltextFilter":
|
786
|
+
"""Add exact phrases - equivalent to "phrase"."""
|
787
|
+
self.query_builder.phrase(*phrases)
|
788
|
+
return self
|
789
|
+
|
790
|
+
def prefix(self, *terms: str) -> "FulltextFilter":
|
791
|
+
"""Add prefix terms - equivalent to term*."""
|
792
|
+
self.query_builder.prefix(*terms)
|
793
|
+
return self
|
794
|
+
|
795
|
+
def boost(self, term: str, weight: float) -> "FulltextFilter":
|
796
|
+
"""Add a boosted term (term^weight)."""
|
797
|
+
self.query_builder.boost(term, weight)
|
798
|
+
return self
|
799
|
+
|
800
|
+
def discourage(self, *items) -> "FulltextFilter":
|
801
|
+
"""Add terms or groups that should be discouraged (~ operator at group level)."""
|
802
|
+
self.query_builder.discourage(*items)
|
803
|
+
return self
|
804
|
+
|
805
|
+
def set_natural_query(self, query: str) -> "FulltextFilter":
|
806
|
+
"""Set natural language query string (used for NATURAL_LANGUAGE mode)."""
|
807
|
+
self._natural_query = query
|
808
|
+
return self
|
809
|
+
|
810
|
+
def group(self, *filters: "FulltextFilter") -> "FulltextFilter":
|
811
|
+
"""Add nested query groups (OR semantics)."""
|
812
|
+
builders = [f.query_builder for f in filters]
|
813
|
+
self.query_builder.group(*builders)
|
814
|
+
return self
|
815
|
+
|
816
|
+
def natural_language(self) -> "FulltextFilter":
|
817
|
+
"""Set to natural language mode."""
|
818
|
+
self.mode = FulltextSearchMode.NATURAL_LANGUAGE
|
819
|
+
return self
|
820
|
+
|
821
|
+
def boolean_mode(self) -> "FulltextFilter":
|
822
|
+
"""Set to boolean mode."""
|
823
|
+
self.mode = FulltextSearchMode.BOOLEAN
|
824
|
+
return self
|
825
|
+
|
826
|
+
def query_expansion(self) -> "FulltextFilter":
|
827
|
+
"""Set to query expansion mode."""
|
828
|
+
self.mode = FulltextSearchMode.QUERY_EXPANSION
|
829
|
+
return self
|
830
|
+
|
831
|
+
def compile(self, compile_kwargs=None):
|
832
|
+
"""Compile to SQL expression for use in filter() method."""
|
833
|
+
if not self.columns:
|
834
|
+
raise ValueError("Columns must be specified")
|
835
|
+
|
836
|
+
columns_str = ", ".join(self.columns)
|
837
|
+
|
838
|
+
# For natural language mode, use the stored natural query if available
|
839
|
+
if self.mode == FulltextSearchMode.NATURAL_LANGUAGE and self._natural_query:
|
840
|
+
query_string = self._natural_query
|
841
|
+
else:
|
842
|
+
query_string = self.query_builder.build()
|
843
|
+
|
844
|
+
if not query_string:
|
845
|
+
raise ValueError("Query cannot be empty")
|
846
|
+
|
847
|
+
if self.mode == FulltextSearchMode.NATURAL_LANGUAGE:
|
848
|
+
return f"MATCH({columns_str}) AGAINST('{query_string}')"
|
849
|
+
elif self.mode == FulltextSearchMode.BOOLEAN:
|
850
|
+
return f"MATCH({columns_str}) AGAINST('{query_string}' IN BOOLEAN MODE)"
|
851
|
+
elif self.mode == FulltextSearchMode.QUERY_EXPANSION:
|
852
|
+
return f"MATCH({columns_str}) AGAINST('{query_string}' WITH QUERY EXPANSION)"
|
853
|
+
else:
|
854
|
+
return f"MATCH({columns_str}) AGAINST('{query_string}')"
|
855
|
+
|
856
|
+
def _compiler_dispatch(self, visitor, **kw):
|
857
|
+
"""SQLAlchemy compiler dispatch method for complete compatibility."""
|
858
|
+
# Generate the MATCH() AGAINST() SQL
|
859
|
+
sql_text = self.compile()
|
860
|
+
# Return a text clause that SQLAlchemy can handle
|
861
|
+
return visitor.process(text(sql_text), **kw)
|
862
|
+
|
863
|
+
def label(self, name: str):
|
864
|
+
"""Create a labeled version for use in SELECT clauses.
|
865
|
+
|
866
|
+
This allows using fulltext expressions as selectable columns with aliases:
|
867
|
+
|
868
|
+
Args::
|
869
|
+
|
870
|
+
name: The alias name for the column
|
871
|
+
|
872
|
+
Returns::
|
873
|
+
|
874
|
+
A SQLAlchemy labeled expression
|
875
|
+
|
876
|
+
Examples::
|
877
|
+
|
878
|
+
.. code-block:: python
|
879
|
+
|
880
|
+
# Use as a SELECT column with score
|
881
|
+
query(Article, Article.id,
|
882
|
+
boolean_match("title", "content").must("python").label("score"))
|
883
|
+
|
884
|
+
# Multiple fulltext scores
|
885
|
+
query(Article, Article.id,
|
886
|
+
boolean_match("title", "content").must("python").label("relevance"),
|
887
|
+
boolean_match("tags").must("programming").label("tag_score"))
|
888
|
+
|
889
|
+
Generated SQL:
|
890
|
+
|
891
|
+
.. code-block:: sql
|
892
|
+
|
893
|
+
SELECT articles.id,
|
894
|
+
MATCH(title, content) AGAINST('+python' IN BOOLEAN MODE) AS score
|
895
|
+
FROM articles
|
896
|
+
"""
|
897
|
+
# Create a text expression that can be labeled
|
898
|
+
sql_text = self.compile()
|
899
|
+
text_expr = text(sql_text)
|
900
|
+
|
901
|
+
# Create a custom labeled expression
|
902
|
+
class FulltextLabel:
|
903
|
+
def __init__(self, text_expr, name):
|
904
|
+
self.text_expr = text_expr
|
905
|
+
self.name = name
|
906
|
+
|
907
|
+
def __str__(self):
|
908
|
+
# For ORM integration, return only the expression without AS
|
909
|
+
# The ORM will add the AS alias part
|
910
|
+
return sql_text
|
911
|
+
|
912
|
+
# Make it compatible with SQLAlchemy's compilation
|
913
|
+
def compile(self, compile_kwargs=None):
|
914
|
+
# For standalone use, include AS
|
915
|
+
return f"{sql_text} AS {self.name}"
|
916
|
+
|
917
|
+
def _compiler_dispatch(self, visitor, **kw):
|
918
|
+
# For SQLAlchemy integration, return only the expression
|
919
|
+
# SQLAlchemy will handle the AS alias
|
920
|
+
return sql_text
|
921
|
+
|
922
|
+
return FulltextLabel(text_expr, name)
|
923
|
+
|
924
|
+
def __str__(self):
|
925
|
+
"""String representation for debugging."""
|
926
|
+
return f"FulltextFilter({self.columns}, mode={self.mode})"
|
927
|
+
|
928
|
+
def __repr__(self):
|
929
|
+
"""Detailed representation for debugging."""
|
930
|
+
return f"FulltextFilter(columns={self.columns}, mode='{self.mode}', query='{self.query_builder.build()}')"
|
931
|
+
|
932
|
+
def as_text(self):
|
933
|
+
"""Convert to SQLAlchemy text() object for compatibility with and_(), or_(), etc."""
|
934
|
+
return text(self.compile())
|
935
|
+
|
936
|
+
@classmethod
|
937
|
+
def _create_and(cls, *conditions):
|
938
|
+
"""Helper to create AND expressions with FulltextFilter support."""
|
939
|
+
processed_conditions = []
|
940
|
+
for condition in conditions:
|
941
|
+
if isinstance(condition, cls):
|
942
|
+
processed_conditions.append(condition.as_text())
|
943
|
+
else:
|
944
|
+
processed_conditions.append(condition)
|
945
|
+
return and_(*processed_conditions)
|
946
|
+
|
947
|
+
@classmethod
|
948
|
+
def _create_or(cls, *conditions):
|
949
|
+
"""Helper to create OR expressions with FulltextFilter support."""
|
950
|
+
processed_conditions = []
|
951
|
+
for condition in conditions:
|
952
|
+
if isinstance(condition, cls):
|
953
|
+
processed_conditions.append(condition.as_text())
|
954
|
+
else:
|
955
|
+
processed_conditions.append(condition)
|
956
|
+
return or_(*processed_conditions)
|
957
|
+
|
958
|
+
|
959
|
+
# Convenience functions for common use cases
|
960
|
+
|
961
|
+
|
962
|
+
def boolean_match(*columns) -> FulltextFilter:
|
963
|
+
"""Create a boolean mode fulltext filter for specified columns.
|
964
|
+
|
965
|
+
This is the main entry point for creating fulltext search queries that integrate
|
966
|
+
seamlessly with MatrixOne ORM's filter() method.
|
967
|
+
|
968
|
+
Args::
|
969
|
+
|
970
|
+
*columns: Column names or SQLAlchemy Column objects to search against
|
971
|
+
|
972
|
+
Returns::
|
973
|
+
|
974
|
+
FulltextFilter: A chainable filter object
|
975
|
+
|
976
|
+
Examples::
|
977
|
+
|
978
|
+
# Basic search - must contain 'python'
|
979
|
+
boolean_match("title", "content").must("python")
|
980
|
+
|
981
|
+
# Multiple conditions
|
982
|
+
boolean_match("title", "content")
|
983
|
+
.must("python")
|
984
|
+
.encourage("tutorial")
|
985
|
+
.discourage("legacy")
|
986
|
+
|
987
|
+
# Group search - either 'python' or 'java'
|
988
|
+
boolean_match("title", "content").must(group().medium("python", "java"))
|
989
|
+
|
990
|
+
# Using SQLAlchemy Column objects
|
991
|
+
boolean_match(Article.title, Article.content).must("python")
|
992
|
+
|
993
|
+
Note: The columns specified must exactly match the FULLTEXT index columns. For example, if your
|
994
|
+
index is FULLTEXT(title, content, tags), you must use boolean_match("title", "content", "tags")
|
995
|
+
"""
|
996
|
+
# Convert columns to strings
|
997
|
+
column_names = []
|
998
|
+
for col in columns:
|
999
|
+
if hasattr(col, 'name'):
|
1000
|
+
# SQLAlchemy Column object
|
1001
|
+
column_names.append(col.name)
|
1002
|
+
elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
|
1003
|
+
# Model attribute
|
1004
|
+
column_names.append(col.name)
|
1005
|
+
else:
|
1006
|
+
# String column name
|
1007
|
+
column_names.append(str(col))
|
1008
|
+
|
1009
|
+
return FulltextFilter(column_names, FulltextSearchMode.BOOLEAN)
|
1010
|
+
|
1011
|
+
|
1012
|
+
def natural_match(*columns, query: str) -> FulltextFilter:
|
1013
|
+
"""
|
1014
|
+
Create a natural language mode fulltext filter for specified columns.
|
1015
|
+
|
1016
|
+
Natural language mode provides user-friendly search with automatic processing:
|
1017
|
+
- Stopword removal (e.g., 'the', 'a', 'an')
|
1018
|
+
- Stemming and variations
|
1019
|
+
- Relevance scoring based on TF-IDF or BM25 algorithm
|
1020
|
+
- Best for end-user search interfaces
|
1021
|
+
|
1022
|
+
Args:
|
1023
|
+
*columns: Column names or SQLAlchemy Column objects to search against
|
1024
|
+
- Must exactly match the columns in your fulltext index
|
1025
|
+
- Can be strings or Column objects
|
1026
|
+
query: Natural language query string
|
1027
|
+
- User-friendly search terms
|
1028
|
+
- Automatically processed for best results
|
1029
|
+
- Multi-word queries are supported
|
1030
|
+
|
1031
|
+
Important - Column Matching:
|
1032
|
+
The columns specified in MATCH() must exactly match the columns defined in
|
1033
|
+
the FULLTEXT index. Mismatches will cause errors.
|
1034
|
+
|
1035
|
+
Examples:
|
1036
|
+
If index is: FULLTEXT(title, content)
|
1037
|
+
- ✅ natural_match("title", "content", query="...") - Correct
|
1038
|
+
- ❌ natural_match("title", query="...") - Error (partial)
|
1039
|
+
- ❌ natural_match("content", query="...") - Error (partial)
|
1040
|
+
|
1041
|
+
If index is: FULLTEXT(content)
|
1042
|
+
- ✅ natural_match("content", query="...") - Correct
|
1043
|
+
- ❌ natural_match("title", "content", query="...") - Error (extra column)
|
1044
|
+
|
1045
|
+
Parser Compatibility:
|
1046
|
+
Works with all parser types:
|
1047
|
+
- Default parser: Standard text tokenization
|
1048
|
+
- JSON parser: Searches JSON values within documents
|
1049
|
+
- NGRAM parser: Chinese and Asian language tokenization
|
1050
|
+
|
1051
|
+
Returns:
|
1052
|
+
FulltextFilter: A fulltext filter object for use in queries
|
1053
|
+
|
1054
|
+
Examples::
|
1055
|
+
|
1056
|
+
# Basic natural language search
|
1057
|
+
result = client.query("articles.id", "articles.title", "articles.content").filter(
|
1058
|
+
natural_match("title", "content", query="machine learning")
|
1059
|
+
).execute()
|
1060
|
+
|
1061
|
+
# Using with ORM models
|
1062
|
+
result = client.query(Article).filter(
|
1063
|
+
natural_match(Article.title, Article.content, query="artificial intelligence")
|
1064
|
+
).execute()
|
1065
|
+
|
1066
|
+
# Single column search
|
1067
|
+
result = client.query(Article).filter(
|
1068
|
+
natural_match(Article.content, query="python programming")
|
1069
|
+
).execute()
|
1070
|
+
|
1071
|
+
# With relevance scoring
|
1072
|
+
result = client.query(
|
1073
|
+
Article.id,
|
1074
|
+
Article.title,
|
1075
|
+
Article.content,
|
1076
|
+
natural_match(Article.content, query="deep learning").label("score")
|
1077
|
+
).execute()
|
1078
|
+
|
1079
|
+
# JSON parser - searching within JSON documents
|
1080
|
+
result = client.query(Product).filter(
|
1081
|
+
natural_match(Product.details, query="Dell laptop")
|
1082
|
+
).execute()
|
1083
|
+
|
1084
|
+
# NGRAM parser - Chinese content search
|
1085
|
+
result = client.query(ChineseArticle).filter(
|
1086
|
+
natural_match(ChineseArticle.title, ChineseArticle.body, query="神雕侠侣")
|
1087
|
+
).execute()
|
1088
|
+
|
1089
|
+
# Combined with SQL filters
|
1090
|
+
result = client.query(Article).filter(
|
1091
|
+
natural_match(Article.content, query="programming tutorial")
|
1092
|
+
).filter(Article.category == "Education").execute()
|
1093
|
+
"""
|
1094
|
+
# Convert columns to strings
|
1095
|
+
column_names = []
|
1096
|
+
for col in columns:
|
1097
|
+
if hasattr(col, 'name'):
|
1098
|
+
# SQLAlchemy Column object
|
1099
|
+
column_names.append(col.name)
|
1100
|
+
elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
|
1101
|
+
# Model attribute
|
1102
|
+
column_names.append(col.name)
|
1103
|
+
else:
|
1104
|
+
# String column name
|
1105
|
+
column_names.append(str(col))
|
1106
|
+
|
1107
|
+
return FulltextFilter(column_names, FulltextSearchMode.NATURAL_LANGUAGE).set_natural_query(query)
|
1108
|
+
|
1109
|
+
|
1110
|
+
def group() -> FulltextGroup:
|
1111
|
+
"""Create a new query group builder with OR semantics between elements.
|
1112
|
+
|
1113
|
+
Creates a group where elements have OR relationship. The group-level semantics
|
1114
|
+
(required, excluded, optional, reduced weight) are determined by how it's used:
|
1115
|
+
- must(group()) → +(...) - group is required
|
1116
|
+
- must_not(group()) → -(...) - group is excluded
|
1117
|
+
- encourage(group()) → (...) - group is optional with normal weight
|
1118
|
+
- discourage(group()) → ~(...) - group is optional with reduced weight
|
1119
|
+
|
1120
|
+
Element-level Methods (use inside groups):
|
1121
|
+
- medium(): Add terms with medium weight (no operators)
|
1122
|
+
- high(): Add terms with high weight (>term)
|
1123
|
+
- low(): Add terms with low weight (<term)
|
1124
|
+
- phrase(): Add exact phrase matches ("phrase")
|
1125
|
+
- prefix(): Add prefix matches (term*)
|
1126
|
+
|
1127
|
+
IMPORTANT: Inside groups, do NOT use must()/must_not() as they add +/- operators.
|
1128
|
+
Use medium() for plain terms or high()/low() for element-level weight control.
|
1129
|
+
|
1130
|
+
Examples
|
1131
|
+
# Required group - must contain 'java' OR 'kotlin'
|
1132
|
+
query.must(group().medium("java", "kotlin")) # +(java kotlin)
|
1133
|
+
|
1134
|
+
# Excluded group - must not contain 'spam' OR 'junk'
|
1135
|
+
query.must_not(group().medium("spam", "junk")) # -(spam junk)
|
1136
|
+
|
1137
|
+
# Optional group with normal weight
|
1138
|
+
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
|
1139
|
+
|
1140
|
+
# Optional group with reduced weight
|
1141
|
+
query.discourage(group().medium("old", "outdated")) # ~(old outdated)
|
1142
|
+
|
1143
|
+
# Complex MatrixOne style with element-level weights
|
1144
|
+
query.must("red").must_not(group().low("blue").high("is"))
|
1145
|
+
# Generates: '+red -(<blue >is)'
|
1146
|
+
"""
|
1147
|
+
return FulltextGroup("or")
|
1148
|
+
|
1149
|
+
|
1150
|
+
# Import generic logical adapters at the end to avoid circular imports
|
1151
|
+
try:
|
1152
|
+
from .adapters import logical_and, logical_not, logical_or
|
1153
|
+
except ImportError:
|
1154
|
+
# Fallback implementations if adapters module is not available
|
1155
|
+
def logical_and(*conditions):
|
1156
|
+
processed_conditions = []
|
1157
|
+
for condition in conditions:
|
1158
|
+
if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
|
1159
|
+
processed_conditions.append(text(f"({condition.compile()})"))
|
1160
|
+
else:
|
1161
|
+
processed_conditions.append(condition)
|
1162
|
+
return and_(*processed_conditions)
|
1163
|
+
|
1164
|
+
def logical_or(*conditions):
|
1165
|
+
processed_conditions = []
|
1166
|
+
for condition in conditions:
|
1167
|
+
if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
|
1168
|
+
processed_conditions.append(text(f"({condition.compile()})"))
|
1169
|
+
else:
|
1170
|
+
processed_conditions.append(condition)
|
1171
|
+
return or_(*processed_conditions)
|
1172
|
+
|
1173
|
+
def logical_not(condition):
|
1174
|
+
if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
|
1175
|
+
return text(f"NOT ({condition.compile()})")
|
1176
|
+
else:
|
1177
|
+
return not_(condition)
|
1178
|
+
|
1179
|
+
|
1180
|
+
# Remove old FulltextTerm and FulltextQuery classes as they are replaced by FulltextQueryBuilder
|
1181
|
+
|
1182
|
+
|
1183
|
+
class FulltextSearchBuilder:
|
1184
|
+
"""
|
1185
|
+
Elasticsearch-like fulltext search builder for MatrixOne.
|
1186
|
+
|
1187
|
+
Provides a chainable interface for building complex fulltext queries
|
1188
|
+
with support for various search modes, filters, and sorting.
|
1189
|
+
|
1190
|
+
Boolean Mode Operators:
|
1191
|
+
- ``+word`` : Required term (must contain)
|
1192
|
+
- ``-word`` : Excluded term (must not contain)
|
1193
|
+
- ``~word`` : Lower weight term (reduces relevance score)
|
1194
|
+
- ``<word`` : Lower weight term (reduces relevance score)
|
1195
|
+
- ``>word`` : Higher weight term (increases relevance score)
|
1196
|
+
- word : Optional term (may contain)
|
1197
|
+
- ``"phrase"`` : Exact phrase match
|
1198
|
+
- ``word*`` : Wildcard prefix match
|
1199
|
+
- (word1 word2) : Grouping (contains any of the words)
|
1200
|
+
|
1201
|
+
Note: MatrixOne supports all boolean mode operators.
|
1202
|
+
|
1203
|
+
Search Modes:
|
1204
|
+
- NATURAL_LANGUAGE: Automatic stopword removal, stemming, relevance scoring
|
1205
|
+
- BOOLEAN: Full control with operators, no automatic processing
|
1206
|
+
- QUERY_EXPANSION: Not supported in MatrixOne
|
1207
|
+
|
1208
|
+
Examples::
|
1209
|
+
|
1210
|
+
# Natural language search
|
1211
|
+
results = client.fulltext_search()
|
1212
|
+
.table("articles")
|
1213
|
+
.columns(["title", "content"])
|
1214
|
+
.with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
|
1215
|
+
.query("machine learning")
|
1216
|
+
.with_score()
|
1217
|
+
.limit(10)
|
1218
|
+
.execute()
|
1219
|
+
|
1220
|
+
# Boolean search with complex terms
|
1221
|
+
results = client.fulltext_search()
|
1222
|
+
.table("articles")
|
1223
|
+
.columns(["title", "content"])
|
1224
|
+
.with_mode(FulltextSearchMode.BOOLEAN)
|
1225
|
+
.add_term("machine", required=True)
|
1226
|
+
.add_term("learning", required=True)
|
1227
|
+
.where("category = 'AI'")
|
1228
|
+
.order_by("score", "DESC")
|
1229
|
+
.limit(20)
|
1230
|
+
.execute()
|
1231
|
+
"""
|
1232
|
+
|
1233
|
+
def __init__(self, client: "Client"):
|
1234
|
+
self.client = client
|
1235
|
+
self._table_name: Optional[str] = None
|
1236
|
+
self._columns: List[str] = []
|
1237
|
+
self._search_mode = FulltextSearchMode.NATURAL_LANGUAGE
|
1238
|
+
self._algorithm = FulltextSearchAlgorithm.BM25
|
1239
|
+
self._query_obj = FulltextQueryBuilder()
|
1240
|
+
self._include_score = False
|
1241
|
+
self._where_conditions: List[str] = []
|
1242
|
+
self._order_by: Optional[str] = None
|
1243
|
+
self._limit_value: Optional[int] = None
|
1244
|
+
self._offset_value: Optional[int] = None
|
1245
|
+
self._select_columns: List[str] = ["*"]
|
1246
|
+
|
1247
|
+
def table(self, table_name: str) -> "FulltextSearchBuilder":
|
1248
|
+
"""
|
1249
|
+
Set the target table for the search.
|
1250
|
+
|
1251
|
+
Args::
|
1252
|
+
|
1253
|
+
table_name: Name of the table to search
|
1254
|
+
|
1255
|
+
Returns::
|
1256
|
+
|
1257
|
+
FulltextSearchBuilder: Self for chaining
|
1258
|
+
"""
|
1259
|
+
self._table_name = table_name
|
1260
|
+
return self
|
1261
|
+
|
1262
|
+
def columns(self, columns: List[str]) -> "FulltextSearchBuilder":
|
1263
|
+
"""
|
1264
|
+
Set the columns to search in.
|
1265
|
+
|
1266
|
+
Args::
|
1267
|
+
|
1268
|
+
columns: List of column names to search
|
1269
|
+
|
1270
|
+
Returns::
|
1271
|
+
|
1272
|
+
FulltextSearchBuilder: Self for chaining
|
1273
|
+
"""
|
1274
|
+
self._columns = columns
|
1275
|
+
return self
|
1276
|
+
|
1277
|
+
def with_mode(self, mode: str) -> "FulltextSearchBuilder":
|
1278
|
+
"""
|
1279
|
+
Set the search mode.
|
1280
|
+
|
1281
|
+
Args::
|
1282
|
+
|
1283
|
+
mode: Search mode
|
1284
|
+
- FulltextSearchMode.NATURAL_LANGUAGE: Automatic processing, user-friendly
|
1285
|
+
- FulltextSearchMode.BOOLEAN: Full control with operators
|
1286
|
+
- FulltextSearchMode.QUERY_EXPANSION: Not supported in MatrixOne
|
1287
|
+
|
1288
|
+
Returns::
|
1289
|
+
|
1290
|
+
FulltextSearchBuilder: Self for chaining
|
1291
|
+
|
1292
|
+
Examples::
|
1293
|
+
|
1294
|
+
# Natural language mode (default)
|
1295
|
+
.with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
|
1296
|
+
|
1297
|
+
# Boolean mode for complex queries
|
1298
|
+
.with_mode(FulltextSearchMode.BOOLEAN)
|
1299
|
+
"""
|
1300
|
+
self._search_mode = mode
|
1301
|
+
return self
|
1302
|
+
|
1303
|
+
def with_algorithm(self, algorithm: str) -> "FulltextSearchBuilder":
|
1304
|
+
"""
|
1305
|
+
Set the search algorithm.
|
1306
|
+
|
1307
|
+
Args::
|
1308
|
+
|
1309
|
+
algorithm: Search algorithm
|
1310
|
+
- FulltextSearchAlgorithm.TF_IDF: Traditional TF-IDF scoring
|
1311
|
+
- FulltextSearchAlgorithm.BM25: Modern BM25 scoring (recommended)
|
1312
|
+
|
1313
|
+
Returns::
|
1314
|
+
|
1315
|
+
FulltextSearchBuilder: Self for chaining
|
1316
|
+
|
1317
|
+
Examples::
|
1318
|
+
|
1319
|
+
# Use BM25 algorithm (recommended)
|
1320
|
+
.with_algorithm(FulltextSearchAlgorithm.BM25)
|
1321
|
+
|
1322
|
+
# Use TF-IDF algorithm
|
1323
|
+
.with_algorithm(FulltextSearchAlgorithm.TF_IDF)
|
1324
|
+
"""
|
1325
|
+
self._algorithm = algorithm
|
1326
|
+
return self
|
1327
|
+
|
1328
|
+
def query(self, query_string: str) -> "FulltextSearchBuilder":
|
1329
|
+
"""
|
1330
|
+
Set a simple query string (resets previous terms).
|
1331
|
+
|
1332
|
+
Args::
|
1333
|
+
|
1334
|
+
query_string: The search query (natural language or boolean syntax)
|
1335
|
+
|
1336
|
+
Returns::
|
1337
|
+
|
1338
|
+
FulltextSearchBuilder: Self for chaining
|
1339
|
+
|
1340
|
+
Examples::
|
1341
|
+
|
1342
|
+
# Natural language query
|
1343
|
+
.query("machine learning algorithms")
|
1344
|
+
|
1345
|
+
# Boolean query
|
1346
|
+
.query("+machine +learning -java")
|
1347
|
+
|
1348
|
+
Note: This method resets any previously added terms, phrases, or wildcards.
|
1349
|
+
"""
|
1350
|
+
self._query_obj = FulltextQueryBuilder()
|
1351
|
+
self._query_obj.encourage(query_string)
|
1352
|
+
return self
|
1353
|
+
|
1354
|
+
def add_term(
|
1355
|
+
self,
|
1356
|
+
term: str,
|
1357
|
+
required: bool = False,
|
1358
|
+
excluded: bool = False,
|
1359
|
+
proximity: Optional[int] = None,
|
1360
|
+
) -> "FulltextSearchBuilder":
|
1361
|
+
"""
|
1362
|
+
Add a search term to the query.
|
1363
|
+
|
1364
|
+
Args::
|
1365
|
+
|
1366
|
+
term: The search term
|
1367
|
+
required: Whether the term is required (+) - must contain this term
|
1368
|
+
excluded: Whether the term is excluded (-) - must not contain this term
|
1369
|
+
proximity: Proximity modifier for boolean mode (not supported in MatrixOne)
|
1370
|
+
|
1371
|
+
Returns::
|
1372
|
+
|
1373
|
+
FulltextSearchBuilder: Self for chaining
|
1374
|
+
|
1375
|
+
Examples::
|
1376
|
+
|
1377
|
+
# Required term: +machine
|
1378
|
+
.add_term("machine", required=True)
|
1379
|
+
|
1380
|
+
# Excluded term: -java
|
1381
|
+
.add_term("java", excluded=True)
|
1382
|
+
|
1383
|
+
# Optional term: learning
|
1384
|
+
.add_term("learning")
|
1385
|
+
|
1386
|
+
# Complex query: +machine +learning -java
|
1387
|
+
.add_term("machine", required=True)
|
1388
|
+
.add_term("learning", required=True)
|
1389
|
+
.add_term("java", excluded=True)
|
1390
|
+
"""
|
1391
|
+
if required:
|
1392
|
+
self._query_obj.must(term)
|
1393
|
+
elif excluded:
|
1394
|
+
self._query_obj.must_not(term)
|
1395
|
+
else:
|
1396
|
+
self._query_obj.encourage(term)
|
1397
|
+
return self
|
1398
|
+
|
1399
|
+
def add_phrase(self, phrase: str) -> "FulltextSearchBuilder":
|
1400
|
+
"""
|
1401
|
+
Add an exact phrase to the query.
|
1402
|
+
|
1403
|
+
Args::
|
1404
|
+
|
1405
|
+
phrase: The exact phrase to search for (wrapped in double quotes)
|
1406
|
+
|
1407
|
+
Returns::
|
1408
|
+
|
1409
|
+
FulltextSearchBuilder: Self for chaining
|
1410
|
+
|
1411
|
+
Examples::
|
1412
|
+
|
1413
|
+
# Exact phrase: "machine learning"
|
1414
|
+
.add_phrase("machine learning")
|
1415
|
+
|
1416
|
+
# Multiple phrases
|
1417
|
+
.add_phrase("deep learning")
|
1418
|
+
.add_phrase("neural networks")
|
1419
|
+
"""
|
1420
|
+
self._query_obj.phrase(phrase)
|
1421
|
+
return self
|
1422
|
+
|
1423
|
+
def add_wildcard(self, pattern: str) -> "FulltextSearchBuilder":
|
1424
|
+
"""
|
1425
|
+
Add a wildcard pattern to the query.
|
1426
|
+
|
1427
|
+
Args::
|
1428
|
+
|
1429
|
+
pattern: Wildcard pattern with * suffix (e.g., "test*", "neural*")
|
1430
|
+
|
1431
|
+
Returns::
|
1432
|
+
|
1433
|
+
FulltextSearchBuilder: Self for chaining
|
1434
|
+
|
1435
|
+
Examples::
|
1436
|
+
|
1437
|
+
# Prefix match: neural*
|
1438
|
+
.add_wildcard("neural*")
|
1439
|
+
|
1440
|
+
# Multiple wildcards
|
1441
|
+
.add_wildcard("machine*")
|
1442
|
+
.add_wildcard("learn*")
|
1443
|
+
"""
|
1444
|
+
self._query_obj.prefix(pattern.rstrip('*'))
|
1445
|
+
return self
|
1446
|
+
|
1447
|
+
def with_score(self, include: bool = True) -> "FulltextSearchBuilder":
|
1448
|
+
"""
|
1449
|
+
Include relevance score in results.
|
1450
|
+
|
1451
|
+
Args::
|
1452
|
+
|
1453
|
+
include: Whether to include the score
|
1454
|
+
|
1455
|
+
Returns::
|
1456
|
+
|
1457
|
+
FulltextSearchBuilder: Self for chaining
|
1458
|
+
"""
|
1459
|
+
self._include_score = include
|
1460
|
+
return self
|
1461
|
+
|
1462
|
+
def select(self, columns: List[str]) -> "FulltextSearchBuilder":
|
1463
|
+
"""
|
1464
|
+
Set the columns to select in the result.
|
1465
|
+
|
1466
|
+
Args::
|
1467
|
+
|
1468
|
+
columns: List of column names to select
|
1469
|
+
|
1470
|
+
Returns::
|
1471
|
+
|
1472
|
+
FulltextSearchBuilder: Self for chaining
|
1473
|
+
"""
|
1474
|
+
self._select_columns = columns
|
1475
|
+
return self
|
1476
|
+
|
1477
|
+
def where(self, condition: str) -> "FulltextSearchBuilder":
|
1478
|
+
"""
|
1479
|
+
Add a WHERE condition.
|
1480
|
+
|
1481
|
+
Args::
|
1482
|
+
|
1483
|
+
condition: WHERE condition
|
1484
|
+
|
1485
|
+
Returns::
|
1486
|
+
|
1487
|
+
FulltextSearchBuilder: Self for chaining
|
1488
|
+
"""
|
1489
|
+
self._where_conditions.append(condition)
|
1490
|
+
return self
|
1491
|
+
|
1492
|
+
def order_by(self, column: str, direction: str = "DESC") -> "FulltextSearchBuilder":
|
1493
|
+
"""
|
1494
|
+
Set ORDER BY clause.
|
1495
|
+
|
1496
|
+
Args::
|
1497
|
+
|
1498
|
+
column: Column to order by
|
1499
|
+
direction: Order direction (ASC/DESC)
|
1500
|
+
|
1501
|
+
Returns::
|
1502
|
+
|
1503
|
+
FulltextSearchBuilder: Self for chaining
|
1504
|
+
"""
|
1505
|
+
self._order_by = f"{column} {direction}"
|
1506
|
+
return self
|
1507
|
+
|
1508
|
+
def limit(self, count: int) -> "FulltextSearchBuilder":
|
1509
|
+
"""
|
1510
|
+
Set LIMIT clause.
|
1511
|
+
|
1512
|
+
Args::
|
1513
|
+
|
1514
|
+
count: Number of results to return
|
1515
|
+
|
1516
|
+
Returns::
|
1517
|
+
|
1518
|
+
FulltextSearchBuilder: Self for chaining
|
1519
|
+
"""
|
1520
|
+
self._limit_value = count
|
1521
|
+
return self
|
1522
|
+
|
1523
|
+
def offset(self, count: int) -> "FulltextSearchBuilder":
|
1524
|
+
"""
|
1525
|
+
Set OFFSET clause.
|
1526
|
+
|
1527
|
+
Args::
|
1528
|
+
|
1529
|
+
count: Number of results to skip
|
1530
|
+
|
1531
|
+
Returns::
|
1532
|
+
|
1533
|
+
FulltextSearchBuilder: Self for chaining
|
1534
|
+
"""
|
1535
|
+
self._offset_value = count
|
1536
|
+
return self
|
1537
|
+
|
1538
|
+
def _build_sql(self) -> str:
|
1539
|
+
"""Build the final SQL query."""
|
1540
|
+
if not self._table_name:
|
1541
|
+
raise ValueError("Table name is required")
|
1542
|
+
|
1543
|
+
if not self._columns:
|
1544
|
+
raise ValueError("Search columns are required")
|
1545
|
+
|
1546
|
+
query_string = self._query_obj.build()
|
1547
|
+
if not query_string:
|
1548
|
+
raise ValueError("Query is required")
|
1549
|
+
|
1550
|
+
# Build SELECT clause
|
1551
|
+
select_parts = self._select_columns.copy()
|
1552
|
+
if self._include_score:
|
1553
|
+
columns_str = ", ".join(self._columns)
|
1554
|
+
select_parts.append(f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode}) AS score")
|
1555
|
+
|
1556
|
+
select_clause = f"SELECT {', '.join(select_parts)}"
|
1557
|
+
|
1558
|
+
# Build FROM clause
|
1559
|
+
from_clause = f"FROM {self._table_name}"
|
1560
|
+
|
1561
|
+
# Build WHERE clause
|
1562
|
+
where_parts = []
|
1563
|
+
|
1564
|
+
# Add fulltext search condition
|
1565
|
+
columns_str = ", ".join(self._columns)
|
1566
|
+
fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode})"
|
1567
|
+
where_parts.append(fulltext_condition)
|
1568
|
+
|
1569
|
+
# Add additional WHERE conditions
|
1570
|
+
where_parts.extend(self._where_conditions)
|
1571
|
+
|
1572
|
+
where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
|
1573
|
+
|
1574
|
+
# Build ORDER BY clause
|
1575
|
+
order_clause = f"ORDER BY {self._order_by}" if self._order_by else ""
|
1576
|
+
|
1577
|
+
# Build LIMIT clause
|
1578
|
+
limit_clause = f"LIMIT {self._limit_value}" if self._limit_value else ""
|
1579
|
+
|
1580
|
+
# Build OFFSET clause
|
1581
|
+
offset_clause = f"OFFSET {self._offset_value}" if self._offset_value else ""
|
1582
|
+
|
1583
|
+
# Combine all clauses
|
1584
|
+
sql_parts = [
|
1585
|
+
select_clause,
|
1586
|
+
from_clause,
|
1587
|
+
where_clause,
|
1588
|
+
order_clause,
|
1589
|
+
limit_clause,
|
1590
|
+
offset_clause,
|
1591
|
+
]
|
1592
|
+
return " ".join(filter(None, sql_parts))
|
1593
|
+
|
1594
|
+
def execute(self) -> Any:
|
1595
|
+
"""
|
1596
|
+
Execute the fulltext search query.
|
1597
|
+
|
1598
|
+
Returns::
|
1599
|
+
|
1600
|
+
Query results
|
1601
|
+
"""
|
1602
|
+
sql = self._build_sql()
|
1603
|
+
return self.client.execute(sql)
|
1604
|
+
|
1605
|
+
def explain(self) -> str:
|
1606
|
+
"""
|
1607
|
+
Get the SQL query that would be executed.
|
1608
|
+
|
1609
|
+
Returns::
|
1610
|
+
|
1611
|
+
SQL query string
|
1612
|
+
"""
|
1613
|
+
return self._build_sql()
|
1614
|
+
|
1615
|
+
|
1616
|
+
class FulltextIndexManager:
|
1617
|
+
"""Manager for fulltext index operations."""
|
1618
|
+
|
1619
|
+
def __init__(self, client: "Client"):
|
1620
|
+
self.client = client
|
1621
|
+
|
1622
|
+
def create(
|
1623
|
+
self,
|
1624
|
+
table_name: str,
|
1625
|
+
name: str,
|
1626
|
+
columns: List[str],
|
1627
|
+
algorithm: str = FulltextSearchAlgorithm.BM25,
|
1628
|
+
) -> bool:
|
1629
|
+
"""
|
1630
|
+
Create a fulltext index.
|
1631
|
+
|
1632
|
+
Args::
|
1633
|
+
|
1634
|
+
table_name: Name of the table
|
1635
|
+
name: Name of the index
|
1636
|
+
columns: List of columns to index
|
1637
|
+
algorithm: Search algorithm to use
|
1638
|
+
|
1639
|
+
Returns::
|
1640
|
+
|
1641
|
+
bool: True if successful
|
1642
|
+
"""
|
1643
|
+
from .fulltext_index import FulltextIndex
|
1644
|
+
|
1645
|
+
# Set the algorithm
|
1646
|
+
self.client.execute(f'SET ft_relevancy_algorithm = "{algorithm}"')
|
1647
|
+
|
1648
|
+
# Create the index
|
1649
|
+
success = FulltextIndex.create_index(
|
1650
|
+
engine=self.client.get_sqlalchemy_engine(),
|
1651
|
+
table_name=table_name,
|
1652
|
+
name=name,
|
1653
|
+
columns=columns,
|
1654
|
+
algorithm=algorithm,
|
1655
|
+
)
|
1656
|
+
|
1657
|
+
return success
|
1658
|
+
|
1659
|
+
def drop(self, table_name: str, name: str) -> bool:
|
1660
|
+
"""
|
1661
|
+
Drop a fulltext index.
|
1662
|
+
|
1663
|
+
Args::
|
1664
|
+
|
1665
|
+
table_name: Name of the table
|
1666
|
+
name: Name of the index
|
1667
|
+
|
1668
|
+
Returns::
|
1669
|
+
|
1670
|
+
bool: True if successful
|
1671
|
+
"""
|
1672
|
+
from .fulltext_index import FulltextIndex
|
1673
|
+
|
1674
|
+
success = FulltextIndex.drop_index(engine=self.client.get_sqlalchemy_engine(), table_name=table_name, name=name)
|
1675
|
+
|
1676
|
+
return success
|
1677
|
+
|
1678
|
+
def search(self) -> "FulltextSearchBuilder":
|
1679
|
+
"""
|
1680
|
+
Create a new fulltext search builder.
|
1681
|
+
|
1682
|
+
Returns::
|
1683
|
+
|
1684
|
+
FulltextSearchBuilder: New search builder instance
|
1685
|
+
"""
|
1686
|
+
return FulltextSearchBuilder(self.client)
|