matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,1686 @@
1
+ # Copyright 2021 - 2022 Matrix Origin
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Advanced Fulltext Search Builder for MatrixOne
17
+
18
+ This module provides an Elasticsearch-like query builder for MatrixOne fulltext search,
19
+ with chainable methods and comprehensive search capabilities.
20
+
21
+ ## Column Matching Requirements
22
+
23
+ **CRITICAL**: The columns specified in MATCH() must exactly match the columns
24
+ defined in the FULLTEXT index. This is a MatrixOne requirement.
25
+
26
+ Examples:
27
+ - If your index is: `FULLTEXT(title, content, tags)`
28
+ - Your MATCH() must be: `MATCH(title, content, tags) AGAINST(...)`
29
+ - NOT: `MATCH(title) AGAINST(...)` or `MATCH(title, content) AGAINST(...)`
30
+
31
+ ## MatrixOne Limitations
32
+
33
+ 1. **Multiple MATCH() Functions**: MatrixOne does not support multiple
34
+ MATCH() functions in the same query.
35
+
36
+ ❌ WRONG: `WHERE MATCH(...) AND MATCH(...)`
37
+ ✅ CORRECT: Use chained filter() calls or combine terms in single MATCH()
38
+
39
+ 2. **Complex Nested Groups**: Some complex nested syntaxes are not supported.
40
+
41
+ ❌ WRONG: `'+learning -basic (+machine AI) (+deep neural)'`
42
+ ✅ CORRECT: `'+learning -basic +machine +deep'`
43
+
44
+ ## Supported Boolean Mode Operators
45
+
46
+ ### Group-level operators (applied to entire groups):
47
+ - `+(group)`: Group must be present
48
+ - `-(group)`: Group must not be present
49
+
50
+ ### Element-level operators:
51
+ - `+term`: Term must contain (required)
52
+ - `-term`: Term must not contain (excluded)
53
+ - `term`: Term optional (should contain)
54
+ - `"phrase"`: Exact phrase match
55
+ - `term*`: Prefix match
56
+
57
+ ### Weight operators (within groups/elements):
58
+ - `term`: Optional term with normal positive weight boost
59
+ - `>term`: Higher relevance weight for term (high positive boost)
60
+ - `<term`: Lower relevance weight for term (low positive boost)
61
+ - `~term`: Reduced/suppressed relevance weight (negative or minimal boost)
62
+
63
+ ### Weight Operator Comparison:
64
+ - `encourage("tutorial")` → `tutorial` : Encourages documents with "tutorial"
65
+ - `discourage("legacy")` → `~legacy` : Discourages documents with "legacy"
66
+
67
+ Both are optional (don't filter documents) but affect ranking differently.
68
+
69
+ ### MatrixOne Example:
70
+ - `'+red -(<blue >is)'`: Must have 'red', must NOT have group containing 'blue' (low weight) and 'is' (high weight)
71
+
72
+ ## Supported Modes
73
+
74
+ - **NATURAL LANGUAGE**: Default full-text search
75
+ - **BOOLEAN**: Advanced boolean operators
76
+ - **QUERY EXPANSION**: Automatic query expansion (limited support)
77
+ """
78
+
79
+ from __future__ import annotations
80
+
81
+ from typing import TYPE_CHECKING, Any, List, Optional
82
+
83
+ from sqlalchemy import Boolean
84
+ from sqlalchemy.sql import and_, not_, or_, text
85
+ from sqlalchemy.sql.elements import ClauseElement
86
+
87
+ if TYPE_CHECKING:
88
+ from ..client import Client
89
+
90
+
91
+ class FulltextSearchMode:
92
+ """Enum-like class for fulltext search modes."""
93
+
94
+ NATURAL_LANGUAGE = "natural language mode"
95
+ BOOLEAN = "boolean mode"
96
+ QUERY_EXPANSION = "query expansion mode"
97
+
98
+
99
+ class FulltextSearchAlgorithm:
100
+ """Enum-like class for fulltext search algorithms."""
101
+
102
+ TF_IDF = "TF-IDF"
103
+ BM25 = "BM25"
104
+
105
+
106
+ class FulltextElement:
107
+ """Represents a single fulltext element (term, phrase, prefix, etc.)."""
108
+
109
+ def __init__(self, content: str, operator: str = "", weight_modifier: str = ""):
110
+ self.content = content
111
+ self.operator = operator # "+", "-", "", etc.
112
+ self.weight_modifier = weight_modifier # ">", "<", "~", etc.
113
+
114
+ def build(self) -> str:
115
+ """Build the element string."""
116
+ if self.weight_modifier:
117
+ return f"{self.operator}{self.weight_modifier}{self.content}"
118
+ return f"{self.operator}{self.content}"
119
+
120
+
121
+ class FulltextGroup:
122
+ """Represents a group of fulltext elements for building nested boolean queries.
123
+
124
+ This class is used to create groups of terms that can be combined with
125
+ group-level operators (+, -, ~, no prefix) in MatrixOne's boolean mode.
126
+
127
+ Element-level Methods (within groups):
128
+ - medium(): Add terms with medium weight (no operators)
129
+ - high(): Add terms with high weight (>term)
130
+ - low(): Add terms with low weight (<term)
131
+ - phrase(): Add exact phrase matches ("phrase")
132
+ - prefix(): Add prefix matches (term*)
133
+
134
+ Group Types:
135
+ - "or": OR semantics (default) - any term in group can match
136
+ - "and": AND semantics - all terms in group must match
137
+ - "not": NOT semantics - none of the terms in group can match
138
+
139
+ Usage with Group-level Operators:
140
+ # Create groups and apply group-level operators
141
+ query.must(group().medium("java", "kotlin")) # +(java kotlin)
142
+ query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
143
+ query.discourage(group().medium("old", "outdated")) # ~(old outdated)
144
+ query.must_not(group().medium("spam", "junk")) # -(spam junk)
145
+
146
+ Element-level Weight Operators (inside groups):
147
+ # MatrixOne syntax: '+red -(<blue >is)'
148
+ group().low("blue").high("is")
149
+ # Used as: query.must("red").must_not(group().low("blue").high("is"))
150
+
151
+ Important Notes:
152
+ - Use medium() for normal terms inside groups (no operators)
153
+ - Use high()/low() for element-level weight control
154
+ - Group-level operators (+, -, ~) are applied by the parent query builder
155
+ """
156
+
157
+ def __init__(self, group_type: str = "or"):
158
+ self.elements: List[FulltextElement] = []
159
+ self.groups: List["FulltextGroup"] = []
160
+ self.group_type = group_type # "or", "and", "not"
161
+ self.is_tilde = False # Whether this group has tilde weight
162
+
163
+ def must(self, *terms: str) -> "FulltextGroup":
164
+ """Add required terms (only for top-level, groups should use medium() instead)."""
165
+ for term in terms:
166
+ # Groups don't use +/- operators on elements, only at group level
167
+ if self.group_type in ["or", "not"]:
168
+ # Inside groups, elements should not have +/- operators
169
+ self.elements.append(FulltextElement(term, ""))
170
+ else:
171
+ # Top-level (main group) can use + operator
172
+ self.elements.append(FulltextElement(term, "+"))
173
+ return self
174
+
175
+ def must_not(self, *terms: str) -> "FulltextGroup":
176
+ """Add excluded terms (only for top-level, groups should use medium() instead)."""
177
+ for term in terms:
178
+ # Groups don't use +/- operators on elements, only at group level
179
+ if self.group_type in ["or", "not"]:
180
+ # Inside groups, elements should not have +/- operators
181
+ self.elements.append(FulltextElement(term, ""))
182
+ else:
183
+ # Top-level (main group) can use - operator
184
+ self.elements.append(FulltextElement(term, "-"))
185
+ return self
186
+
187
+ def encourage(self, *terms: str) -> "FulltextGroup":
188
+ """Add terms that should be encouraged (normal positive weight).
189
+
190
+ These terms are optional - documents without them can still match,
191
+ but documents containing them will get normal positive scoring boost.
192
+
193
+ Args::
194
+
195
+ *terms: Terms to add with normal positive weight
196
+
197
+ Example::
198
+
199
+ # Documents with 'python' get normal positive boost
200
+ group.encourage("python") # Generates: python
201
+ """
202
+ for term in terms:
203
+ # Optional terms never have operators
204
+ self.elements.append(FulltextElement(term, ""))
205
+ return self
206
+
207
+ def medium(self, *terms: str) -> "FulltextGroup":
208
+ """Add terms with medium/normal weight (no operators)."""
209
+ for term in terms:
210
+ self.elements.append(FulltextElement(term, ""))
211
+ return self
212
+
213
+ def phrase(self, phrase: str) -> "FulltextGroup":
214
+ """Add a phrase search."""
215
+ self.elements.append(FulltextElement(f'"{phrase}"', ""))
216
+ return self
217
+
218
+ def prefix(self, prefix: str) -> "FulltextGroup":
219
+ """Add a prefix search."""
220
+ self.elements.append(FulltextElement(f"{prefix}*", ""))
221
+ return self
222
+
223
+ def boost(self, term: str, weight: float) -> "FulltextGroup":
224
+ """Add a boosted term."""
225
+ self.elements.append(FulltextElement(f"{term}^{weight}", ""))
226
+ return self
227
+
228
+ def high(self, *terms: str) -> "FulltextGroup":
229
+ """Add terms with high weight (>term)."""
230
+ for term in terms:
231
+ self.elements.append(FulltextElement(term, "", ">"))
232
+ return self
233
+
234
+ def low(self, *terms: str) -> "FulltextGroup":
235
+ """Add terms with low weight (<term)."""
236
+ for term in terms:
237
+ self.elements.append(FulltextElement(term, "", "<"))
238
+ return self
239
+
240
+ def add_group(self, *groups: "FulltextGroup") -> "FulltextGroup":
241
+ """Add nested groups."""
242
+ for group in groups:
243
+ self.groups.append(group)
244
+ return self
245
+
246
+ def add_tilde_group(self, group: "FulltextGroup") -> "FulltextGroup":
247
+ """Add a group with tilde weight (~group)."""
248
+ group.is_tilde = True
249
+ self.groups.append(group)
250
+ return self
251
+
252
+ def build(self) -> str:
253
+ """Build the group string."""
254
+ parts = []
255
+
256
+ # Add elements
257
+ for element in self.elements:
258
+ parts.append(element.build())
259
+
260
+ # Add nested groups with appropriate prefix based on group type
261
+ for group in self.groups:
262
+ group_str = group.build()
263
+ if group_str:
264
+ if group.is_tilde:
265
+ # For tilde groups, use ~(<content>) format
266
+ parts.append(f"~({group_str})")
267
+ elif group.group_type == "not":
268
+ # For NOT groups, use -(<content>) format
269
+ parts.append(f"-({group_str})")
270
+ elif group.group_type == "and":
271
+ # For AND groups, use +(<content>) format
272
+ parts.append(f"+({group_str})")
273
+ else: # or (default)
274
+ # For OR groups, just use (<content>) format
275
+ parts.append(f"({group_str})")
276
+
277
+ return " ".join(parts)
278
+
279
+
280
+ class FulltextQueryBuilder:
281
+ """Builder for constructing fulltext boolean queries.
282
+
283
+ This class provides a chainable API for building complex fulltext search queries
284
+ that are compatible with MatrixOne's MATCH() AGAINST() syntax.
285
+
286
+ Core Methods:
287
+ - must(): Required terms/groups (+ operator)
288
+ - must_not(): Excluded terms/groups (- operator)
289
+ - encourage(): Optional terms/groups with normal weight (no prefix)
290
+ - discourage(): Optional terms/groups with reduced weight (~ operator)
291
+
292
+ Examples::
293
+
294
+ # Basic usage
295
+ query.must("python") # +python
296
+ query.encourage("tutorial") # tutorial
297
+ query.discourage("legacy") # ~legacy
298
+ query.must_not("deprecated") # -deprecated
299
+
300
+ # Group usage
301
+ query.must(group().medium("java", "kotlin")) # +(java kotlin)
302
+ query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
303
+ query.must_not(group().medium("spam", "junk")) # -(spam junk)
304
+
305
+ Note: Group-level operators (+, -, ~) applied to entire groups. Element-level operators (>, <)
306
+ applied within groups using high(), low()
307
+ """
308
+
309
+ def __init__(self):
310
+ self.main_group = FulltextGroup("and") # Main group with AND semantics
311
+
312
+ def must(self, *items) -> "FulltextQueryBuilder":
313
+ """Add required terms or groups (+ operator at group level).
314
+
315
+ Documents MUST contain these terms/groups to match. This is equivalent
316
+ to the '+' operator in MatrixOne's boolean mode syntax.
317
+
318
+ Args::
319
+
320
+ *items: Can be strings (terms) or FulltextGroup objects
321
+
322
+ Examples::
323
+
324
+ # Required term - documents must contain 'python'
325
+ query.must("python") # Generates: +python
326
+
327
+ # Required group - documents must contain either 'java' OR 'kotlin'
328
+ query.must(group().medium("java", "kotlin")) # Generates: +(java kotlin)
329
+
330
+ # Multiple required terms
331
+ query.must("python", "programming") # Generates: +python +programming
332
+
333
+ # Unpack list to search multiple terms
334
+ words = ["python", "programming"]
335
+ query.must(*words) # Correct: unpacks the list
336
+
337
+ Raises::
338
+
339
+ TypeError: If a list or tuple is passed directly without unpacking
340
+
341
+ Returns::
342
+
343
+ FulltextQueryBuilder: Self for method chaining
344
+ """
345
+ for item in items:
346
+ # Validate parameter types and provide friendly error messages
347
+ if isinstance(item, (list, tuple)):
348
+ raise TypeError(
349
+ f"must() received a {type(item).__name__} object, but expected individual terms. "
350
+ f"To search multiple terms, use the unpacking operator: must(*terms) instead of must(terms). "
351
+ f"Example: must(*{list(item)[:3]}) or must({', '.join(repr(str(t)) for t in list(item)[:3])})"
352
+ )
353
+ if isinstance(item, FulltextGroup):
354
+ item.group_type = "and" # Force group to be required
355
+ self.main_group.add_group(item)
356
+ else:
357
+ self.main_group.must(item)
358
+ return self
359
+
360
+ def must_not(self, *items) -> "FulltextQueryBuilder":
361
+ """Add excluded terms or groups (- operator at group level).
362
+
363
+ Documents MUST NOT contain these terms/groups to match. This is equivalent
364
+ to the '-' operator in MatrixOne's boolean mode syntax.
365
+
366
+ Args::
367
+
368
+ *items: Can be strings (terms) or FulltextGroup objects
369
+
370
+ Examples::
371
+
372
+ # Excluded term - documents must not contain 'deprecated'
373
+ query.must_not("deprecated") # Generates: -deprecated
374
+
375
+ # Excluded group - documents must not contain 'spam' OR 'junk'
376
+ query.must_not(group().medium("spam", "junk")) # Generates: -(spam junk)
377
+
378
+ # Multiple excluded terms
379
+ query.must_not("spam", "junk") # Generates: -spam -junk
380
+
381
+ # Unpack list to exclude multiple terms
382
+ words = ["spam", "junk"]
383
+ query.must_not(*words) # Correct: unpacks the list
384
+
385
+ Raises::
386
+
387
+ TypeError: If a list or tuple is passed directly without unpacking
388
+
389
+ Returns::
390
+
391
+ FulltextQueryBuilder: Self for method chaining
392
+ """
393
+ for item in items:
394
+ # Validate parameter types and provide friendly error messages
395
+ if isinstance(item, (list, tuple)):
396
+ raise TypeError(
397
+ f"must_not() received a {type(item).__name__} object, but expected individual terms. "
398
+ f"To exclude multiple terms, use the unpacking operator: must_not(*terms) instead of must_not(terms). "
399
+ f"Example: must_not(*{list(item)[:3]}) or must_not({', '.join(repr(str(t)) for t in list(item)[:3])})"
400
+ )
401
+ if isinstance(item, FulltextGroup):
402
+ item.group_type = "not" # Force group to be excluded
403
+ self.main_group.add_group(item)
404
+ else:
405
+ self.main_group.must_not(item)
406
+ return self
407
+
408
+ def encourage(self, *items) -> "FulltextQueryBuilder":
409
+ """Add terms or groups that should be encouraged (normal positive weight).
410
+
411
+ Documents can match without these terms, but containing them will
412
+ INCREASE the relevance score. This provides normal positive weight boost.
413
+
414
+ Args::
415
+
416
+ *items: Can be strings (terms) or FulltextGroup objects
417
+
418
+ Examples::
419
+
420
+ # Encourage documents with 'tutorial'
421
+ query.encourage("tutorial") # Generates: tutorial
422
+
423
+ # Encourage documents with 'beginner' OR 'intro'
424
+ query.encourage(group().medium("beginner", "intro")) # Generates: (beginner intro)
425
+
426
+ # Multiple encouraged terms
427
+ query.encourage("tutorial", "guide") # Generates: tutorial guide
428
+
429
+ # Unpack list to encourage multiple terms
430
+ words = ["tutorial", "guide"]
431
+ query.encourage(*words) # Correct: unpacks the list
432
+
433
+ Weight Comparison:
434
+ - encourage("term"): Normal positive boost (encourages term)
435
+ - discourage("term"): Reduced/negative boost (discourages term)
436
+
437
+ Raises::
438
+
439
+ TypeError: If a list or tuple is passed directly without unpacking
440
+
441
+ Returns::
442
+
443
+ FulltextQueryBuilder: Self for method chaining
444
+ """
445
+ for item in items:
446
+ # Validate parameter types and provide friendly error messages
447
+ if isinstance(item, (list, tuple)):
448
+ raise TypeError(
449
+ f"encourage() received a {type(item).__name__} object, but expected individual terms. "
450
+ f"To encourage multiple terms, use the unpacking operator: encourage(*terms) "
451
+ f"instead of encourage(terms). "
452
+ f"Example: encourage(*{list(item)[:3]}) or "
453
+ f"encourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
454
+ )
455
+ if isinstance(item, FulltextGroup):
456
+ item.group_type = "or" # Force group to be optional
457
+ self.main_group.add_group(item)
458
+ else:
459
+ self.main_group.encourage(item)
460
+ return self
461
+
462
+ def discourage(self, *items) -> "FulltextQueryBuilder":
463
+ """Add terms or groups that should be discouraged (~ operator at group level).
464
+
465
+ Documents can match without these terms, but containing them will
466
+ DECREASE the relevance score. This provides reduced or negative weight boost,
467
+ effectively discouraging documents that contain these terms.
468
+
469
+ Args::
470
+
471
+ *items: Can be strings (terms) or FulltextGroup objects
472
+
473
+ Examples::
474
+
475
+ # Discourage documents with 'legacy'
476
+ query.discourage("legacy") # Generates: ~legacy
477
+
478
+ # Discourage documents with 'old' OR 'outdated'
479
+ query.discourage(group().medium("old", "outdated")) # Generates: ~(old outdated)
480
+
481
+ # Multiple discouraged terms
482
+ query.discourage("legacy", "deprecated") # Generates: ~legacy ~deprecated
483
+
484
+ # Unpack list to discourage multiple terms
485
+ words = ["legacy", "deprecated"]
486
+ query.discourage(*words) # Correct: unpacks the list
487
+
488
+ Weight Comparison:
489
+ - encourage("term"): Normal positive boost (encourages term)
490
+ - discourage("term"): Reduced/negative boost (discourages term)
491
+
492
+ Use Cases:
493
+ # Search Python content, but discourage legacy versions
494
+ query.must("python").encourage("3.11").discourage("2.7")
495
+
496
+ # Find tutorials, but avoid outdated content
497
+ query.must("tutorial").discourage(group().medium("old", "deprecated"))
498
+
499
+ Raises::
500
+
501
+ TypeError: If a list or tuple is passed directly without unpacking
502
+
503
+ Returns::
504
+
505
+ FulltextQueryBuilder: Self for method chaining
506
+ """
507
+ for item in items:
508
+ # Validate parameter types and provide friendly error messages
509
+ if isinstance(item, (list, tuple)):
510
+ raise TypeError(
511
+ f"discourage() received a {type(item).__name__} object, but expected individual terms. "
512
+ f"To discourage multiple terms, use the unpacking operator: discourage(*terms) "
513
+ f"instead of discourage(terms). "
514
+ f"Example: discourage(*{list(item)[:3]}) or "
515
+ f"discourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
516
+ )
517
+ if isinstance(item, FulltextGroup):
518
+ # Apply tilde to the entire group
519
+ self.main_group.add_tilde_group(item)
520
+ else:
521
+ # Apply tilde to individual term
522
+ self.main_group.elements.append(FulltextElement(item, "", "~"))
523
+ return self
524
+
525
+ def phrase(self, phrase: str) -> "FulltextQueryBuilder":
526
+ """Add a phrase search to the main group."""
527
+ self.main_group.phrase(phrase)
528
+ return self
529
+
530
+ def prefix(self, prefix: str) -> "FulltextQueryBuilder":
531
+ """Add a prefix search to the main group."""
532
+ self.main_group.prefix(prefix)
533
+ return self
534
+
535
+ def boost(self, term: str, weight: float) -> "FulltextQueryBuilder":
536
+ """Add a boosted term to the main group."""
537
+ self.main_group.boost(term, weight)
538
+ return self
539
+
540
+ def group(self, *builders: "FulltextQueryBuilder") -> "FulltextQueryBuilder":
541
+ """Add nested query builders as groups (OR semantics)."""
542
+ for builder in builders:
543
+ # Convert builder to group and add to main group
544
+ group = FulltextGroup("or")
545
+ # Add all elements from the builder's main group
546
+ group.elements.extend(builder.main_group.elements)
547
+ group.groups.extend(builder.main_group.groups)
548
+ self.main_group.groups.append(group)
549
+ return self
550
+
551
+ def build(self) -> str:
552
+ """Build the final query string."""
553
+ return self.main_group.build()
554
+
555
+ def as_sql(
556
+ self,
557
+ table: str,
558
+ columns: List[str],
559
+ mode: str = FulltextSearchMode.BOOLEAN,
560
+ include_score: bool = False,
561
+ select_columns: Optional[List[str]] = None,
562
+ where_conditions: Optional[List[str]] = None,
563
+ order_by: Optional[str] = None,
564
+ limit: Optional[int] = None,
565
+ offset: Optional[int] = None,
566
+ ) -> str:
567
+ """Build a complete SQL query with optional AS score support.
568
+
569
+ This method generates a full SQL query similar to FulltextSearchBuilder but using
570
+ the query built by FulltextQueryBuilder.
571
+
572
+ Args::
573
+
574
+ table: Table name to search in
575
+ columns: List of columns to search in (must match FULLTEXT index)
576
+ mode: Search mode (BOOLEAN, NATURAL_LANGUAGE, etc.)
577
+ include_score: Whether to include relevance score in results
578
+ select_columns: Columns to select (default: all columns "*")
579
+ where_conditions: Additional WHERE conditions
580
+ order_by: ORDER BY clause (e.g., "score DESC")
581
+ limit: LIMIT value
582
+ offset: OFFSET value
583
+
584
+ Returns::
585
+
586
+ str: Complete SQL query
587
+
588
+ Examples::
589
+
590
+ .. code-block:: python
591
+
592
+ # Basic query with score
593
+ query = FulltextQueryBuilder().must("python").encourage("tutorial")
594
+ sql = query.as_sql("articles", ["title", "content"], include_score=True)
595
+ # SELECT *, MATCH(title, content) AGAINST('+python tutorial' IN boolean mode) AS score
596
+ # FROM articles WHERE MATCH(title, content) AGAINST('+python tutorial' IN boolean mode)
597
+
598
+ # Query with custom columns and ORDER BY score
599
+ sql = query.as_sql("articles", ["title", "content"],
600
+ select_columns=["id", "title"], include_score=True,
601
+ order_by="score DESC", limit=10)
602
+ """
603
+ query_string = self.build()
604
+ if not query_string:
605
+ raise ValueError("Query is required - add at least one search term")
606
+
607
+ if not table:
608
+ raise ValueError("Table name is required")
609
+
610
+ if not columns:
611
+ raise ValueError("Search columns are required")
612
+
613
+ # Build columns string for MATCH()
614
+ columns_str = ", ".join(columns)
615
+
616
+ # Build SELECT clause
617
+ if select_columns:
618
+ select_parts = select_columns.copy()
619
+ else:
620
+ select_parts = ["*"]
621
+
622
+ if include_score:
623
+ score_expr = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode}) AS score"
624
+ select_parts.append(score_expr)
625
+
626
+ select_clause = f"SELECT {', '.join(select_parts)}"
627
+
628
+ # Build FROM clause
629
+ from_clause = f"FROM {table}"
630
+
631
+ # Build WHERE clause
632
+ where_parts = []
633
+
634
+ # Add fulltext search condition
635
+ fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode})"
636
+ where_parts.append(fulltext_condition)
637
+
638
+ # Add additional WHERE conditions
639
+ if where_conditions:
640
+ where_parts.extend(where_conditions)
641
+
642
+ where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
643
+
644
+ # Build ORDER BY clause
645
+ order_clause = f"ORDER BY {order_by}" if order_by else ""
646
+
647
+ # Build LIMIT clause
648
+ limit_clause = f"LIMIT {limit}" if limit else ""
649
+
650
+ # Build OFFSET clause
651
+ offset_clause = f"OFFSET {offset}" if offset else ""
652
+
653
+ # Combine all clauses
654
+ sql_parts = [
655
+ select_clause,
656
+ from_clause,
657
+ where_clause,
658
+ order_clause,
659
+ limit_clause,
660
+ offset_clause,
661
+ ]
662
+ return " ".join(filter(None, sql_parts))
663
+
664
+ def as_score_sql(self, table: str, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN) -> str:
665
+ """Convenient method to generate SQL with score included.
666
+
667
+ This is equivalent to calling as_sql() with include_score=True.
668
+
669
+ Args::
670
+
671
+ table: Table name to search in
672
+ columns: List of columns to search in
673
+ mode: Search mode
674
+
675
+ Returns::
676
+
677
+ str: Complete SQL query with AS score
678
+
679
+ Example::
680
+
681
+ query = FulltextQueryBuilder().must("python").encourage("tutorial")
682
+ sql = query.as_score_sql("articles", ["title", "content"])
683
+ # Generates SQL with AS score automatically included
684
+ """
685
+ return self.as_sql(table, columns, mode, include_score=True)
686
+
687
+
688
+ class FulltextFilter(ClauseElement):
689
+ """Advanced fulltext filter for integrating fulltext search with ORM queries.
690
+
691
+ This class wraps FulltextQueryBuilder to provide seamless integration
692
+ with MatrixOne ORM's filter() method, allowing fulltext search to be
693
+ combined with other SQL conditions.
694
+
695
+ Core Methods (Group-level operators):
696
+ - must(): Required terms/groups (+ operator)
697
+ - must_not(): Excluded terms/groups (- operator)
698
+ - encourage(): Optional terms/groups with normal weight (no prefix)
699
+ - discourage(): Optional terms/groups with reduced weight (~ operator)
700
+
701
+ Parameter Types:
702
+ - str: Single term (e.g., "python")
703
+ - FulltextGroup: Group of terms (e.g., group().medium("java", "kotlin"))
704
+
705
+ Usage with ORM:
706
+
707
+ .. code-block:: python
708
+
709
+ # Basic fulltext filter
710
+ results = client.query(Article).filter(
711
+ boolean_match("title", "content").must("python").encourage("tutorial")
712
+ ).all()
713
+
714
+ # Combined with other conditions
715
+ results = client.query(Article).filter(
716
+ boolean_match("title", "content").must("python")
717
+ ).filter(
718
+ Article.category == "Programming"
719
+ ).all()
720
+
721
+ # Complex fulltext with groups
722
+ results = client.query(Article).filter(
723
+ boolean_match("title", "content", "tags")
724
+ .must("programming")
725
+ .must(group().medium("python", "java"))
726
+ .discourage(group().medium("legacy", "deprecated"))
727
+ ).all()
728
+
729
+ Weight Operator Examples
730
+
731
+ .. code-block:: python
732
+
733
+ # Encourage tutorials, discourage legacy content
734
+ boolean_match("title", "content")
735
+ .must("python")
736
+ .encourage("tutorial") # Boost documents with 'tutorial'
737
+ .discourage("legacy") # Lower ranking for 'legacy' documents
738
+
739
+ Supported MatrixOne Boolean Mode Operators:
740
+ Group-level: +, -, ~, (no prefix) - applied to entire groups/terms
741
+ Element-level: >, < - applied within groups using high(), low()
742
+ Other: "phrase", term* - exact phrases and prefix matching
743
+ Complex: +red -(<blue >is) - nested groups with mixed operators
744
+
745
+ Important MatrixOne Requirements:
746
+ **Column Matching**: The columns specified must exactly match
747
+ the columns defined in the FULLTEXT index. If your index is
748
+ `FULLTEXT(title, content, tags)`, you must include all three columns.
749
+
750
+ **Limitations**:
751
+ - Only one MATCH() function per query is supported
752
+ - Complex nested groups may have syntax restrictions
753
+ - Use fulltext_and/fulltext_or for combining with other conditions
754
+ """
755
+
756
+ def __init__(self, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN):
757
+ super().__init__()
758
+ self.columns = columns
759
+ self.mode = mode
760
+ self.query_builder = FulltextQueryBuilder()
761
+ self._natural_query = None # Store natural language query separately
762
+ # Set SQLAlchemy type info for compatibility
763
+ self.type = Boolean()
764
+
765
+ def columns(self, *columns: str) -> "FulltextFilter":
766
+ """Set the columns to search in."""
767
+ self.columns = list(columns)
768
+ return self
769
+
770
+ def must(self, *items) -> "FulltextFilter":
771
+ """Add required terms or groups (+ operator at group level)."""
772
+ self.query_builder.must(*items)
773
+ return self
774
+
775
+ def must_not(self, *items) -> "FulltextFilter":
776
+ """Add excluded terms or groups (- operator at group level)."""
777
+ self.query_builder.must_not(*items)
778
+ return self
779
+
780
+ def encourage(self, *items) -> "FulltextFilter":
781
+ """Add terms or groups that should be encouraged (normal positive weight)."""
782
+ self.query_builder.encourage(*items)
783
+ return self
784
+
785
+ def phrase(self, *phrases: str) -> "FulltextFilter":
786
+ """Add exact phrases - equivalent to "phrase"."""
787
+ self.query_builder.phrase(*phrases)
788
+ return self
789
+
790
+ def prefix(self, *terms: str) -> "FulltextFilter":
791
+ """Add prefix terms - equivalent to term*."""
792
+ self.query_builder.prefix(*terms)
793
+ return self
794
+
795
+ def boost(self, term: str, weight: float) -> "FulltextFilter":
796
+ """Add a boosted term (term^weight)."""
797
+ self.query_builder.boost(term, weight)
798
+ return self
799
+
800
+ def discourage(self, *items) -> "FulltextFilter":
801
+ """Add terms or groups that should be discouraged (~ operator at group level)."""
802
+ self.query_builder.discourage(*items)
803
+ return self
804
+
805
+ def set_natural_query(self, query: str) -> "FulltextFilter":
806
+ """Set natural language query string (used for NATURAL_LANGUAGE mode)."""
807
+ self._natural_query = query
808
+ return self
809
+
810
+ def group(self, *filters: "FulltextFilter") -> "FulltextFilter":
811
+ """Add nested query groups (OR semantics)."""
812
+ builders = [f.query_builder for f in filters]
813
+ self.query_builder.group(*builders)
814
+ return self
815
+
816
+ def natural_language(self) -> "FulltextFilter":
817
+ """Set to natural language mode."""
818
+ self.mode = FulltextSearchMode.NATURAL_LANGUAGE
819
+ return self
820
+
821
+ def boolean_mode(self) -> "FulltextFilter":
822
+ """Set to boolean mode."""
823
+ self.mode = FulltextSearchMode.BOOLEAN
824
+ return self
825
+
826
+ def query_expansion(self) -> "FulltextFilter":
827
+ """Set to query expansion mode."""
828
+ self.mode = FulltextSearchMode.QUERY_EXPANSION
829
+ return self
830
+
831
+ def compile(self, compile_kwargs=None):
832
+ """Compile to SQL expression for use in filter() method."""
833
+ if not self.columns:
834
+ raise ValueError("Columns must be specified")
835
+
836
+ columns_str = ", ".join(self.columns)
837
+
838
+ # For natural language mode, use the stored natural query if available
839
+ if self.mode == FulltextSearchMode.NATURAL_LANGUAGE and self._natural_query:
840
+ query_string = self._natural_query
841
+ else:
842
+ query_string = self.query_builder.build()
843
+
844
+ if not query_string:
845
+ raise ValueError("Query cannot be empty")
846
+
847
+ if self.mode == FulltextSearchMode.NATURAL_LANGUAGE:
848
+ return f"MATCH({columns_str}) AGAINST('{query_string}')"
849
+ elif self.mode == FulltextSearchMode.BOOLEAN:
850
+ return f"MATCH({columns_str}) AGAINST('{query_string}' IN BOOLEAN MODE)"
851
+ elif self.mode == FulltextSearchMode.QUERY_EXPANSION:
852
+ return f"MATCH({columns_str}) AGAINST('{query_string}' WITH QUERY EXPANSION)"
853
+ else:
854
+ return f"MATCH({columns_str}) AGAINST('{query_string}')"
855
+
856
+ def _compiler_dispatch(self, visitor, **kw):
857
+ """SQLAlchemy compiler dispatch method for complete compatibility."""
858
+ # Generate the MATCH() AGAINST() SQL
859
+ sql_text = self.compile()
860
+ # Return a text clause that SQLAlchemy can handle
861
+ return visitor.process(text(sql_text), **kw)
862
+
863
+ def label(self, name: str):
864
+ """Create a labeled version for use in SELECT clauses.
865
+
866
+ This allows using fulltext expressions as selectable columns with aliases:
867
+
868
+ Args::
869
+
870
+ name: The alias name for the column
871
+
872
+ Returns::
873
+
874
+ A SQLAlchemy labeled expression
875
+
876
+ Examples::
877
+
878
+ .. code-block:: python
879
+
880
+ # Use as a SELECT column with score
881
+ query(Article, Article.id,
882
+ boolean_match("title", "content").must("python").label("score"))
883
+
884
+ # Multiple fulltext scores
885
+ query(Article, Article.id,
886
+ boolean_match("title", "content").must("python").label("relevance"),
887
+ boolean_match("tags").must("programming").label("tag_score"))
888
+
889
+ Generated SQL:
890
+
891
+ .. code-block:: sql
892
+
893
+ SELECT articles.id,
894
+ MATCH(title, content) AGAINST('+python' IN BOOLEAN MODE) AS score
895
+ FROM articles
896
+ """
897
+ # Create a text expression that can be labeled
898
+ sql_text = self.compile()
899
+ text_expr = text(sql_text)
900
+
901
+ # Create a custom labeled expression
902
+ class FulltextLabel:
903
+ def __init__(self, text_expr, name):
904
+ self.text_expr = text_expr
905
+ self.name = name
906
+
907
+ def __str__(self):
908
+ # For ORM integration, return only the expression without AS
909
+ # The ORM will add the AS alias part
910
+ return sql_text
911
+
912
+ # Make it compatible with SQLAlchemy's compilation
913
+ def compile(self, compile_kwargs=None):
914
+ # For standalone use, include AS
915
+ return f"{sql_text} AS {self.name}"
916
+
917
+ def _compiler_dispatch(self, visitor, **kw):
918
+ # For SQLAlchemy integration, return only the expression
919
+ # SQLAlchemy will handle the AS alias
920
+ return sql_text
921
+
922
+ return FulltextLabel(text_expr, name)
923
+
924
+ def __str__(self):
925
+ """String representation for debugging."""
926
+ return f"FulltextFilter({self.columns}, mode={self.mode})"
927
+
928
+ def __repr__(self):
929
+ """Detailed representation for debugging."""
930
+ return f"FulltextFilter(columns={self.columns}, mode='{self.mode}', query='{self.query_builder.build()}')"
931
+
932
+ def as_text(self):
933
+ """Convert to SQLAlchemy text() object for compatibility with and_(), or_(), etc."""
934
+ return text(self.compile())
935
+
936
+ @classmethod
937
+ def _create_and(cls, *conditions):
938
+ """Helper to create AND expressions with FulltextFilter support."""
939
+ processed_conditions = []
940
+ for condition in conditions:
941
+ if isinstance(condition, cls):
942
+ processed_conditions.append(condition.as_text())
943
+ else:
944
+ processed_conditions.append(condition)
945
+ return and_(*processed_conditions)
946
+
947
+ @classmethod
948
+ def _create_or(cls, *conditions):
949
+ """Helper to create OR expressions with FulltextFilter support."""
950
+ processed_conditions = []
951
+ for condition in conditions:
952
+ if isinstance(condition, cls):
953
+ processed_conditions.append(condition.as_text())
954
+ else:
955
+ processed_conditions.append(condition)
956
+ return or_(*processed_conditions)
957
+
958
+
959
+ # Convenience functions for common use cases
960
+
961
+
962
+ def boolean_match(*columns) -> FulltextFilter:
963
+ """Create a boolean mode fulltext filter for specified columns.
964
+
965
+ This is the main entry point for creating fulltext search queries that integrate
966
+ seamlessly with MatrixOne ORM's filter() method.
967
+
968
+ Args::
969
+
970
+ *columns: Column names or SQLAlchemy Column objects to search against
971
+
972
+ Returns::
973
+
974
+ FulltextFilter: A chainable filter object
975
+
976
+ Examples::
977
+
978
+ # Basic search - must contain 'python'
979
+ boolean_match("title", "content").must("python")
980
+
981
+ # Multiple conditions
982
+ boolean_match("title", "content")
983
+ .must("python")
984
+ .encourage("tutorial")
985
+ .discourage("legacy")
986
+
987
+ # Group search - either 'python' or 'java'
988
+ boolean_match("title", "content").must(group().medium("python", "java"))
989
+
990
+ # Using SQLAlchemy Column objects
991
+ boolean_match(Article.title, Article.content).must("python")
992
+
993
+ Note: The columns specified must exactly match the FULLTEXT index columns. For example, if your
994
+ index is FULLTEXT(title, content, tags), you must use boolean_match("title", "content", "tags")
995
+ """
996
+ # Convert columns to strings
997
+ column_names = []
998
+ for col in columns:
999
+ if hasattr(col, 'name'):
1000
+ # SQLAlchemy Column object
1001
+ column_names.append(col.name)
1002
+ elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
1003
+ # Model attribute
1004
+ column_names.append(col.name)
1005
+ else:
1006
+ # String column name
1007
+ column_names.append(str(col))
1008
+
1009
+ return FulltextFilter(column_names, FulltextSearchMode.BOOLEAN)
1010
+
1011
+
1012
+ def natural_match(*columns, query: str) -> FulltextFilter:
1013
+ """
1014
+ Create a natural language mode fulltext filter for specified columns.
1015
+
1016
+ Natural language mode provides user-friendly search with automatic processing:
1017
+ - Stopword removal (e.g., 'the', 'a', 'an')
1018
+ - Stemming and variations
1019
+ - Relevance scoring based on TF-IDF or BM25 algorithm
1020
+ - Best for end-user search interfaces
1021
+
1022
+ Args:
1023
+ *columns: Column names or SQLAlchemy Column objects to search against
1024
+ - Must exactly match the columns in your fulltext index
1025
+ - Can be strings or Column objects
1026
+ query: Natural language query string
1027
+ - User-friendly search terms
1028
+ - Automatically processed for best results
1029
+ - Multi-word queries are supported
1030
+
1031
+ Important - Column Matching:
1032
+ The columns specified in MATCH() must exactly match the columns defined in
1033
+ the FULLTEXT index. Mismatches will cause errors.
1034
+
1035
+ Examples:
1036
+ If index is: FULLTEXT(title, content)
1037
+ - ✅ natural_match("title", "content", query="...") - Correct
1038
+ - ❌ natural_match("title", query="...") - Error (partial)
1039
+ - ❌ natural_match("content", query="...") - Error (partial)
1040
+
1041
+ If index is: FULLTEXT(content)
1042
+ - ✅ natural_match("content", query="...") - Correct
1043
+ - ❌ natural_match("title", "content", query="...") - Error (extra column)
1044
+
1045
+ Parser Compatibility:
1046
+ Works with all parser types:
1047
+ - Default parser: Standard text tokenization
1048
+ - JSON parser: Searches JSON values within documents
1049
+ - NGRAM parser: Chinese and Asian language tokenization
1050
+
1051
+ Returns:
1052
+ FulltextFilter: A fulltext filter object for use in queries
1053
+
1054
+ Examples::
1055
+
1056
+ # Basic natural language search
1057
+ result = client.query("articles.id", "articles.title", "articles.content").filter(
1058
+ natural_match("title", "content", query="machine learning")
1059
+ ).execute()
1060
+
1061
+ # Using with ORM models
1062
+ result = client.query(Article).filter(
1063
+ natural_match(Article.title, Article.content, query="artificial intelligence")
1064
+ ).execute()
1065
+
1066
+ # Single column search
1067
+ result = client.query(Article).filter(
1068
+ natural_match(Article.content, query="python programming")
1069
+ ).execute()
1070
+
1071
+ # With relevance scoring
1072
+ result = client.query(
1073
+ Article.id,
1074
+ Article.title,
1075
+ Article.content,
1076
+ natural_match(Article.content, query="deep learning").label("score")
1077
+ ).execute()
1078
+
1079
+ # JSON parser - searching within JSON documents
1080
+ result = client.query(Product).filter(
1081
+ natural_match(Product.details, query="Dell laptop")
1082
+ ).execute()
1083
+
1084
+ # NGRAM parser - Chinese content search
1085
+ result = client.query(ChineseArticle).filter(
1086
+ natural_match(ChineseArticle.title, ChineseArticle.body, query="神雕侠侣")
1087
+ ).execute()
1088
+
1089
+ # Combined with SQL filters
1090
+ result = client.query(Article).filter(
1091
+ natural_match(Article.content, query="programming tutorial")
1092
+ ).filter(Article.category == "Education").execute()
1093
+ """
1094
+ # Convert columns to strings
1095
+ column_names = []
1096
+ for col in columns:
1097
+ if hasattr(col, 'name'):
1098
+ # SQLAlchemy Column object
1099
+ column_names.append(col.name)
1100
+ elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
1101
+ # Model attribute
1102
+ column_names.append(col.name)
1103
+ else:
1104
+ # String column name
1105
+ column_names.append(str(col))
1106
+
1107
+ return FulltextFilter(column_names, FulltextSearchMode.NATURAL_LANGUAGE).set_natural_query(query)
1108
+
1109
+
1110
+ def group() -> FulltextGroup:
1111
+ """Create a new query group builder with OR semantics between elements.
1112
+
1113
+ Creates a group where elements have OR relationship. The group-level semantics
1114
+ (required, excluded, optional, reduced weight) are determined by how it's used:
1115
+ - must(group()) → +(...) - group is required
1116
+ - must_not(group()) → -(...) - group is excluded
1117
+ - encourage(group()) → (...) - group is optional with normal weight
1118
+ - discourage(group()) → ~(...) - group is optional with reduced weight
1119
+
1120
+ Element-level Methods (use inside groups):
1121
+ - medium(): Add terms with medium weight (no operators)
1122
+ - high(): Add terms with high weight (>term)
1123
+ - low(): Add terms with low weight (<term)
1124
+ - phrase(): Add exact phrase matches ("phrase")
1125
+ - prefix(): Add prefix matches (term*)
1126
+
1127
+ IMPORTANT: Inside groups, do NOT use must()/must_not() as they add +/- operators.
1128
+ Use medium() for plain terms or high()/low() for element-level weight control.
1129
+
1130
+ Examples
1131
+ # Required group - must contain 'java' OR 'kotlin'
1132
+ query.must(group().medium("java", "kotlin")) # +(java kotlin)
1133
+
1134
+ # Excluded group - must not contain 'spam' OR 'junk'
1135
+ query.must_not(group().medium("spam", "junk")) # -(spam junk)
1136
+
1137
+ # Optional group with normal weight
1138
+ query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
1139
+
1140
+ # Optional group with reduced weight
1141
+ query.discourage(group().medium("old", "outdated")) # ~(old outdated)
1142
+
1143
+ # Complex MatrixOne style with element-level weights
1144
+ query.must("red").must_not(group().low("blue").high("is"))
1145
+ # Generates: '+red -(<blue >is)'
1146
+ """
1147
+ return FulltextGroup("or")
1148
+
1149
+
1150
+ # Import generic logical adapters at the end to avoid circular imports
1151
+ try:
1152
+ from .adapters import logical_and, logical_not, logical_or
1153
+ except ImportError:
1154
+ # Fallback implementations if adapters module is not available
1155
+ def logical_and(*conditions):
1156
+ processed_conditions = []
1157
+ for condition in conditions:
1158
+ if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
1159
+ processed_conditions.append(text(f"({condition.compile()})"))
1160
+ else:
1161
+ processed_conditions.append(condition)
1162
+ return and_(*processed_conditions)
1163
+
1164
+ def logical_or(*conditions):
1165
+ processed_conditions = []
1166
+ for condition in conditions:
1167
+ if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
1168
+ processed_conditions.append(text(f"({condition.compile()})"))
1169
+ else:
1170
+ processed_conditions.append(condition)
1171
+ return or_(*processed_conditions)
1172
+
1173
+ def logical_not(condition):
1174
+ if hasattr(condition, 'compile') and callable(getattr(condition, 'compile')):
1175
+ return text(f"NOT ({condition.compile()})")
1176
+ else:
1177
+ return not_(condition)
1178
+
1179
+
1180
+ # Remove old FulltextTerm and FulltextQuery classes as they are replaced by FulltextQueryBuilder
1181
+
1182
+
1183
+ class FulltextSearchBuilder:
1184
+ """
1185
+ Elasticsearch-like fulltext search builder for MatrixOne.
1186
+
1187
+ Provides a chainable interface for building complex fulltext queries
1188
+ with support for various search modes, filters, and sorting.
1189
+
1190
+ Boolean Mode Operators:
1191
+ - ``+word`` : Required term (must contain)
1192
+ - ``-word`` : Excluded term (must not contain)
1193
+ - ``~word`` : Lower weight term (reduces relevance score)
1194
+ - ``<word`` : Lower weight term (reduces relevance score)
1195
+ - ``>word`` : Higher weight term (increases relevance score)
1196
+ - word : Optional term (may contain)
1197
+ - ``"phrase"`` : Exact phrase match
1198
+ - ``word*`` : Wildcard prefix match
1199
+ - (word1 word2) : Grouping (contains any of the words)
1200
+
1201
+ Note: MatrixOne supports all boolean mode operators.
1202
+
1203
+ Search Modes:
1204
+ - NATURAL_LANGUAGE: Automatic stopword removal, stemming, relevance scoring
1205
+ - BOOLEAN: Full control with operators, no automatic processing
1206
+ - QUERY_EXPANSION: Not supported in MatrixOne
1207
+
1208
+ Examples::
1209
+
1210
+ # Natural language search
1211
+ results = client.fulltext_search()
1212
+ .table("articles")
1213
+ .columns(["title", "content"])
1214
+ .with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
1215
+ .query("machine learning")
1216
+ .with_score()
1217
+ .limit(10)
1218
+ .execute()
1219
+
1220
+ # Boolean search with complex terms
1221
+ results = client.fulltext_search()
1222
+ .table("articles")
1223
+ .columns(["title", "content"])
1224
+ .with_mode(FulltextSearchMode.BOOLEAN)
1225
+ .add_term("machine", required=True)
1226
+ .add_term("learning", required=True)
1227
+ .where("category = 'AI'")
1228
+ .order_by("score", "DESC")
1229
+ .limit(20)
1230
+ .execute()
1231
+ """
1232
+
1233
+ def __init__(self, client: "Client"):
1234
+ self.client = client
1235
+ self._table_name: Optional[str] = None
1236
+ self._columns: List[str] = []
1237
+ self._search_mode = FulltextSearchMode.NATURAL_LANGUAGE
1238
+ self._algorithm = FulltextSearchAlgorithm.BM25
1239
+ self._query_obj = FulltextQueryBuilder()
1240
+ self._include_score = False
1241
+ self._where_conditions: List[str] = []
1242
+ self._order_by: Optional[str] = None
1243
+ self._limit_value: Optional[int] = None
1244
+ self._offset_value: Optional[int] = None
1245
+ self._select_columns: List[str] = ["*"]
1246
+
1247
+ def table(self, table_name: str) -> "FulltextSearchBuilder":
1248
+ """
1249
+ Set the target table for the search.
1250
+
1251
+ Args::
1252
+
1253
+ table_name: Name of the table to search
1254
+
1255
+ Returns::
1256
+
1257
+ FulltextSearchBuilder: Self for chaining
1258
+ """
1259
+ self._table_name = table_name
1260
+ return self
1261
+
1262
+ def columns(self, columns: List[str]) -> "FulltextSearchBuilder":
1263
+ """
1264
+ Set the columns to search in.
1265
+
1266
+ Args::
1267
+
1268
+ columns: List of column names to search
1269
+
1270
+ Returns::
1271
+
1272
+ FulltextSearchBuilder: Self for chaining
1273
+ """
1274
+ self._columns = columns
1275
+ return self
1276
+
1277
+ def with_mode(self, mode: str) -> "FulltextSearchBuilder":
1278
+ """
1279
+ Set the search mode.
1280
+
1281
+ Args::
1282
+
1283
+ mode: Search mode
1284
+ - FulltextSearchMode.NATURAL_LANGUAGE: Automatic processing, user-friendly
1285
+ - FulltextSearchMode.BOOLEAN: Full control with operators
1286
+ - FulltextSearchMode.QUERY_EXPANSION: Not supported in MatrixOne
1287
+
1288
+ Returns::
1289
+
1290
+ FulltextSearchBuilder: Self for chaining
1291
+
1292
+ Examples::
1293
+
1294
+ # Natural language mode (default)
1295
+ .with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
1296
+
1297
+ # Boolean mode for complex queries
1298
+ .with_mode(FulltextSearchMode.BOOLEAN)
1299
+ """
1300
+ self._search_mode = mode
1301
+ return self
1302
+
1303
+ def with_algorithm(self, algorithm: str) -> "FulltextSearchBuilder":
1304
+ """
1305
+ Set the search algorithm.
1306
+
1307
+ Args::
1308
+
1309
+ algorithm: Search algorithm
1310
+ - FulltextSearchAlgorithm.TF_IDF: Traditional TF-IDF scoring
1311
+ - FulltextSearchAlgorithm.BM25: Modern BM25 scoring (recommended)
1312
+
1313
+ Returns::
1314
+
1315
+ FulltextSearchBuilder: Self for chaining
1316
+
1317
+ Examples::
1318
+
1319
+ # Use BM25 algorithm (recommended)
1320
+ .with_algorithm(FulltextSearchAlgorithm.BM25)
1321
+
1322
+ # Use TF-IDF algorithm
1323
+ .with_algorithm(FulltextSearchAlgorithm.TF_IDF)
1324
+ """
1325
+ self._algorithm = algorithm
1326
+ return self
1327
+
1328
+ def query(self, query_string: str) -> "FulltextSearchBuilder":
1329
+ """
1330
+ Set a simple query string (resets previous terms).
1331
+
1332
+ Args::
1333
+
1334
+ query_string: The search query (natural language or boolean syntax)
1335
+
1336
+ Returns::
1337
+
1338
+ FulltextSearchBuilder: Self for chaining
1339
+
1340
+ Examples::
1341
+
1342
+ # Natural language query
1343
+ .query("machine learning algorithms")
1344
+
1345
+ # Boolean query
1346
+ .query("+machine +learning -java")
1347
+
1348
+ Note: This method resets any previously added terms, phrases, or wildcards.
1349
+ """
1350
+ self._query_obj = FulltextQueryBuilder()
1351
+ self._query_obj.encourage(query_string)
1352
+ return self
1353
+
1354
+ def add_term(
1355
+ self,
1356
+ term: str,
1357
+ required: bool = False,
1358
+ excluded: bool = False,
1359
+ proximity: Optional[int] = None,
1360
+ ) -> "FulltextSearchBuilder":
1361
+ """
1362
+ Add a search term to the query.
1363
+
1364
+ Args::
1365
+
1366
+ term: The search term
1367
+ required: Whether the term is required (+) - must contain this term
1368
+ excluded: Whether the term is excluded (-) - must not contain this term
1369
+ proximity: Proximity modifier for boolean mode (not supported in MatrixOne)
1370
+
1371
+ Returns::
1372
+
1373
+ FulltextSearchBuilder: Self for chaining
1374
+
1375
+ Examples::
1376
+
1377
+ # Required term: +machine
1378
+ .add_term("machine", required=True)
1379
+
1380
+ # Excluded term: -java
1381
+ .add_term("java", excluded=True)
1382
+
1383
+ # Optional term: learning
1384
+ .add_term("learning")
1385
+
1386
+ # Complex query: +machine +learning -java
1387
+ .add_term("machine", required=True)
1388
+ .add_term("learning", required=True)
1389
+ .add_term("java", excluded=True)
1390
+ """
1391
+ if required:
1392
+ self._query_obj.must(term)
1393
+ elif excluded:
1394
+ self._query_obj.must_not(term)
1395
+ else:
1396
+ self._query_obj.encourage(term)
1397
+ return self
1398
+
1399
+ def add_phrase(self, phrase: str) -> "FulltextSearchBuilder":
1400
+ """
1401
+ Add an exact phrase to the query.
1402
+
1403
+ Args::
1404
+
1405
+ phrase: The exact phrase to search for (wrapped in double quotes)
1406
+
1407
+ Returns::
1408
+
1409
+ FulltextSearchBuilder: Self for chaining
1410
+
1411
+ Examples::
1412
+
1413
+ # Exact phrase: "machine learning"
1414
+ .add_phrase("machine learning")
1415
+
1416
+ # Multiple phrases
1417
+ .add_phrase("deep learning")
1418
+ .add_phrase("neural networks")
1419
+ """
1420
+ self._query_obj.phrase(phrase)
1421
+ return self
1422
+
1423
+ def add_wildcard(self, pattern: str) -> "FulltextSearchBuilder":
1424
+ """
1425
+ Add a wildcard pattern to the query.
1426
+
1427
+ Args::
1428
+
1429
+ pattern: Wildcard pattern with * suffix (e.g., "test*", "neural*")
1430
+
1431
+ Returns::
1432
+
1433
+ FulltextSearchBuilder: Self for chaining
1434
+
1435
+ Examples::
1436
+
1437
+ # Prefix match: neural*
1438
+ .add_wildcard("neural*")
1439
+
1440
+ # Multiple wildcards
1441
+ .add_wildcard("machine*")
1442
+ .add_wildcard("learn*")
1443
+ """
1444
+ self._query_obj.prefix(pattern.rstrip('*'))
1445
+ return self
1446
+
1447
+ def with_score(self, include: bool = True) -> "FulltextSearchBuilder":
1448
+ """
1449
+ Include relevance score in results.
1450
+
1451
+ Args::
1452
+
1453
+ include: Whether to include the score
1454
+
1455
+ Returns::
1456
+
1457
+ FulltextSearchBuilder: Self for chaining
1458
+ """
1459
+ self._include_score = include
1460
+ return self
1461
+
1462
+ def select(self, columns: List[str]) -> "FulltextSearchBuilder":
1463
+ """
1464
+ Set the columns to select in the result.
1465
+
1466
+ Args::
1467
+
1468
+ columns: List of column names to select
1469
+
1470
+ Returns::
1471
+
1472
+ FulltextSearchBuilder: Self for chaining
1473
+ """
1474
+ self._select_columns = columns
1475
+ return self
1476
+
1477
+ def where(self, condition: str) -> "FulltextSearchBuilder":
1478
+ """
1479
+ Add a WHERE condition.
1480
+
1481
+ Args::
1482
+
1483
+ condition: WHERE condition
1484
+
1485
+ Returns::
1486
+
1487
+ FulltextSearchBuilder: Self for chaining
1488
+ """
1489
+ self._where_conditions.append(condition)
1490
+ return self
1491
+
1492
+ def order_by(self, column: str, direction: str = "DESC") -> "FulltextSearchBuilder":
1493
+ """
1494
+ Set ORDER BY clause.
1495
+
1496
+ Args::
1497
+
1498
+ column: Column to order by
1499
+ direction: Order direction (ASC/DESC)
1500
+
1501
+ Returns::
1502
+
1503
+ FulltextSearchBuilder: Self for chaining
1504
+ """
1505
+ self._order_by = f"{column} {direction}"
1506
+ return self
1507
+
1508
+ def limit(self, count: int) -> "FulltextSearchBuilder":
1509
+ """
1510
+ Set LIMIT clause.
1511
+
1512
+ Args::
1513
+
1514
+ count: Number of results to return
1515
+
1516
+ Returns::
1517
+
1518
+ FulltextSearchBuilder: Self for chaining
1519
+ """
1520
+ self._limit_value = count
1521
+ return self
1522
+
1523
+ def offset(self, count: int) -> "FulltextSearchBuilder":
1524
+ """
1525
+ Set OFFSET clause.
1526
+
1527
+ Args::
1528
+
1529
+ count: Number of results to skip
1530
+
1531
+ Returns::
1532
+
1533
+ FulltextSearchBuilder: Self for chaining
1534
+ """
1535
+ self._offset_value = count
1536
+ return self
1537
+
1538
+ def _build_sql(self) -> str:
1539
+ """Build the final SQL query."""
1540
+ if not self._table_name:
1541
+ raise ValueError("Table name is required")
1542
+
1543
+ if not self._columns:
1544
+ raise ValueError("Search columns are required")
1545
+
1546
+ query_string = self._query_obj.build()
1547
+ if not query_string:
1548
+ raise ValueError("Query is required")
1549
+
1550
+ # Build SELECT clause
1551
+ select_parts = self._select_columns.copy()
1552
+ if self._include_score:
1553
+ columns_str = ", ".join(self._columns)
1554
+ select_parts.append(f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode}) AS score")
1555
+
1556
+ select_clause = f"SELECT {', '.join(select_parts)}"
1557
+
1558
+ # Build FROM clause
1559
+ from_clause = f"FROM {self._table_name}"
1560
+
1561
+ # Build WHERE clause
1562
+ where_parts = []
1563
+
1564
+ # Add fulltext search condition
1565
+ columns_str = ", ".join(self._columns)
1566
+ fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode})"
1567
+ where_parts.append(fulltext_condition)
1568
+
1569
+ # Add additional WHERE conditions
1570
+ where_parts.extend(self._where_conditions)
1571
+
1572
+ where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
1573
+
1574
+ # Build ORDER BY clause
1575
+ order_clause = f"ORDER BY {self._order_by}" if self._order_by else ""
1576
+
1577
+ # Build LIMIT clause
1578
+ limit_clause = f"LIMIT {self._limit_value}" if self._limit_value else ""
1579
+
1580
+ # Build OFFSET clause
1581
+ offset_clause = f"OFFSET {self._offset_value}" if self._offset_value else ""
1582
+
1583
+ # Combine all clauses
1584
+ sql_parts = [
1585
+ select_clause,
1586
+ from_clause,
1587
+ where_clause,
1588
+ order_clause,
1589
+ limit_clause,
1590
+ offset_clause,
1591
+ ]
1592
+ return " ".join(filter(None, sql_parts))
1593
+
1594
+ def execute(self) -> Any:
1595
+ """
1596
+ Execute the fulltext search query.
1597
+
1598
+ Returns::
1599
+
1600
+ Query results
1601
+ """
1602
+ sql = self._build_sql()
1603
+ return self.client.execute(sql)
1604
+
1605
+ def explain(self) -> str:
1606
+ """
1607
+ Get the SQL query that would be executed.
1608
+
1609
+ Returns::
1610
+
1611
+ SQL query string
1612
+ """
1613
+ return self._build_sql()
1614
+
1615
+
1616
+ class FulltextIndexManager:
1617
+ """Manager for fulltext index operations."""
1618
+
1619
+ def __init__(self, client: "Client"):
1620
+ self.client = client
1621
+
1622
+ def create(
1623
+ self,
1624
+ table_name: str,
1625
+ name: str,
1626
+ columns: List[str],
1627
+ algorithm: str = FulltextSearchAlgorithm.BM25,
1628
+ ) -> bool:
1629
+ """
1630
+ Create a fulltext index.
1631
+
1632
+ Args::
1633
+
1634
+ table_name: Name of the table
1635
+ name: Name of the index
1636
+ columns: List of columns to index
1637
+ algorithm: Search algorithm to use
1638
+
1639
+ Returns::
1640
+
1641
+ bool: True if successful
1642
+ """
1643
+ from .fulltext_index import FulltextIndex
1644
+
1645
+ # Set the algorithm
1646
+ self.client.execute(f'SET ft_relevancy_algorithm = "{algorithm}"')
1647
+
1648
+ # Create the index
1649
+ success = FulltextIndex.create_index(
1650
+ engine=self.client.get_sqlalchemy_engine(),
1651
+ table_name=table_name,
1652
+ name=name,
1653
+ columns=columns,
1654
+ algorithm=algorithm,
1655
+ )
1656
+
1657
+ return success
1658
+
1659
+ def drop(self, table_name: str, name: str) -> bool:
1660
+ """
1661
+ Drop a fulltext index.
1662
+
1663
+ Args::
1664
+
1665
+ table_name: Name of the table
1666
+ name: Name of the index
1667
+
1668
+ Returns::
1669
+
1670
+ bool: True if successful
1671
+ """
1672
+ from .fulltext_index import FulltextIndex
1673
+
1674
+ success = FulltextIndex.drop_index(engine=self.client.get_sqlalchemy_engine(), table_name=table_name, name=name)
1675
+
1676
+ return success
1677
+
1678
+ def search(self) -> "FulltextSearchBuilder":
1679
+ """
1680
+ Create a new fulltext search builder.
1681
+
1682
+ Returns::
1683
+
1684
+ FulltextSearchBuilder: New search builder instance
1685
+ """
1686
+ return FulltextSearchBuilder(self.client)