matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,895 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Fulltext index support for SQLAlchemy integration with MatrixOne.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from typing import Any, List, Union
|
20
|
+
|
21
|
+
from sqlalchemy import Index, text
|
22
|
+
from sqlalchemy.ext.compiler import compiles
|
23
|
+
from sqlalchemy.schema import CreateIndex
|
24
|
+
|
25
|
+
|
26
|
+
def _exec_sql_safe(connection, sql: str):
|
27
|
+
"""
|
28
|
+
Execute SQL safely, bypassing SQLAlchemy's bind parameter parsing.
|
29
|
+
|
30
|
+
This prevents JSON strings like {"a":1} from being incorrectly parsed as :1 bind params.
|
31
|
+
Uses exec_driver_sql() when available, falls back to text() for testing/compatibility.
|
32
|
+
"""
|
33
|
+
if hasattr(connection, 'exec_driver_sql'):
|
34
|
+
# Escape % to %% for pymysql's format string handling
|
35
|
+
escaped_sql = sql.replace('%', '%%')
|
36
|
+
return connection.exec_driver_sql(escaped_sql)
|
37
|
+
else:
|
38
|
+
# Fallback for testing or older SQLAlchemy versions
|
39
|
+
return connection.execute(text(sql))
|
40
|
+
|
41
|
+
|
42
|
+
class FulltextAlgorithmType:
|
43
|
+
"""
|
44
|
+
Enum-like class for fulltext algorithm types.
|
45
|
+
|
46
|
+
MatrixOne supports two main fulltext relevancy algorithms:
|
47
|
+
|
48
|
+
Attributes:
|
49
|
+
TF_IDF (str): Term Frequency-Inverse Document Frequency
|
50
|
+
|
51
|
+
* Traditional information retrieval algorithm
|
52
|
+
* Good for specific use cases with proven reliability
|
53
|
+
* Formula: TF(term) × IDF(term)
|
54
|
+
* Use case: Academic search, technical documentation
|
55
|
+
|
56
|
+
BM25 (str): Best Matching 25 (Okapi BM25)
|
57
|
+
|
58
|
+
* Modern probabilistic ranking algorithm
|
59
|
+
* Generally superior to TF-IDF for diverse content
|
60
|
+
* Handles document length normalization better
|
61
|
+
* Use case: General-purpose search, modern applications
|
62
|
+
* Recommended as default for new applications
|
63
|
+
|
64
|
+
Note:
|
65
|
+
The algorithm is set at runtime via SQL command, not in the index DDL.
|
66
|
+
|
67
|
+
Examples::
|
68
|
+
|
69
|
+
# Set algorithm to BM25
|
70
|
+
client.execute('SET ft_relevancy_algorithm = "BM25"')
|
71
|
+
|
72
|
+
# Create index with BM25 reference
|
73
|
+
index = FulltextIndex("ftidx_content", ["title", "content"],
|
74
|
+
algorithm=FulltextAlgorithmType.BM25)
|
75
|
+
|
76
|
+
# Perform searches with BM25 scoring
|
77
|
+
result = client.query(Article).filter(
|
78
|
+
boolean_match(Article.content).must("search term")
|
79
|
+
).execute()
|
80
|
+
"""
|
81
|
+
|
82
|
+
TF_IDF = "TF-IDF"
|
83
|
+
BM25 = "BM25"
|
84
|
+
|
85
|
+
|
86
|
+
class FulltextParserType:
|
87
|
+
"""
|
88
|
+
Enum-like class for fulltext parser types.
|
89
|
+
|
90
|
+
MatrixOne supports specialized parsers for different content types.
|
91
|
+
|
92
|
+
Attributes:
|
93
|
+
JSON (str): Parser for JSON documents
|
94
|
+
|
95
|
+
* Indexes JSON values (not keys)
|
96
|
+
* Suitable for text/varchar/json columns containing JSON data
|
97
|
+
* Use case: Product details, user profiles, metadata
|
98
|
+
* Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER json
|
99
|
+
|
100
|
+
NGRAM (str): Parser for Chinese and Asian languages
|
101
|
+
|
102
|
+
* N-gram based tokenization for languages without word delimiters
|
103
|
+
* Better word segmentation for Chinese, Japanese, Korean, etc.
|
104
|
+
* Use case: Chinese articles, mixed language content
|
105
|
+
* Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER ngram
|
106
|
+
|
107
|
+
Examples::
|
108
|
+
|
109
|
+
# Using JSON parser in ORM
|
110
|
+
class Product(Base):
|
111
|
+
__tablename__ = "products"
|
112
|
+
details = Column(Text)
|
113
|
+
__table_args__ = (
|
114
|
+
FulltextIndex("ftidx_json", "details",
|
115
|
+
parser=FulltextParserType.JSON),
|
116
|
+
)
|
117
|
+
|
118
|
+
# Using NGRAM parser for Chinese content
|
119
|
+
class ChineseArticle(Base):
|
120
|
+
__tablename__ = "chinese_articles"
|
121
|
+
title = Column(String(200))
|
122
|
+
body = Column(Text)
|
123
|
+
__table_args__ = (
|
124
|
+
FulltextIndex("ftidx_chinese", ["title", "body"],
|
125
|
+
parser=FulltextParserType.NGRAM),
|
126
|
+
)
|
127
|
+
|
128
|
+
# Using parser in create_index method
|
129
|
+
FulltextIndex.create_index(
|
130
|
+
engine, 'products', 'ftidx_json', 'details',
|
131
|
+
parser=FulltextParserType.JSON
|
132
|
+
)
|
133
|
+
"""
|
134
|
+
|
135
|
+
JSON = "json"
|
136
|
+
NGRAM = "ngram"
|
137
|
+
|
138
|
+
|
139
|
+
class FulltextModeType:
|
140
|
+
"""Enum-like class for fulltext search modes."""
|
141
|
+
|
142
|
+
NATURAL_LANGUAGE = "natural language mode"
|
143
|
+
BOOLEAN = "boolean mode"
|
144
|
+
QUERY_EXPANSION = "query expansion mode"
|
145
|
+
|
146
|
+
|
147
|
+
class FulltextIndex(Index):
|
148
|
+
"""
|
149
|
+
SQLAlchemy Index for fulltext columns with MatrixOne-specific syntax.
|
150
|
+
|
151
|
+
Specialized class for fulltext indexes with type safety and clear API.
|
152
|
+
|
153
|
+
Usage Examples
|
154
|
+
|
155
|
+
1. Class Methods (Recommended for one-time operations)::
|
156
|
+
|
157
|
+
# Create index using class method
|
158
|
+
success = FulltextIndex.create_index(
|
159
|
+
engine=engine,
|
160
|
+
table_name='my_table',
|
161
|
+
name='ftidx_content',
|
162
|
+
columns=['title', 'content'],
|
163
|
+
algorithm=FulltextAlgorithmType.BM25
|
164
|
+
)
|
165
|
+
|
166
|
+
# Drop index using class method
|
167
|
+
success = FulltextIndex.drop_index(
|
168
|
+
engine=engine,
|
169
|
+
table_name='my_table',
|
170
|
+
name='ftidx_content'
|
171
|
+
)
|
172
|
+
|
173
|
+
# Create index within existing transaction
|
174
|
+
with engine.begin() as conn:
|
175
|
+
success = FulltextIndex.create_index_in_transaction(
|
176
|
+
connection=conn,
|
177
|
+
table_name='my_table',
|
178
|
+
name='ftidx_content',
|
179
|
+
columns=['title', 'content']
|
180
|
+
)
|
181
|
+
|
182
|
+
# Drop index within existing transaction
|
183
|
+
with engine.begin() as conn:
|
184
|
+
success = FulltextIndex.drop_index_in_transaction(
|
185
|
+
connection=conn,
|
186
|
+
table_name='my_table',
|
187
|
+
name='ftidx_content'
|
188
|
+
)
|
189
|
+
|
190
|
+
2. Instance Methods (Useful for reusable index configurations)::
|
191
|
+
|
192
|
+
# Create index object
|
193
|
+
index = FulltextIndex('ftidx_content', ['title', 'content'], algorithm=FulltextAlgorithmType.BM25)
|
194
|
+
|
195
|
+
# Create index using instance method
|
196
|
+
success = index.create(engine, 'my_table')
|
197
|
+
|
198
|
+
# Drop index using instance method
|
199
|
+
success = index.drop(engine, 'my_table')
|
200
|
+
|
201
|
+
# Create index within existing transaction
|
202
|
+
with engine.begin() as conn:
|
203
|
+
success = index.create_in_transaction(conn, 'my_table')
|
204
|
+
|
205
|
+
# Drop index within existing transaction
|
206
|
+
with engine.begin() as conn:
|
207
|
+
success = index.drop_in_transaction(conn, 'my_table')
|
208
|
+
|
209
|
+
3. SQLAlchemy ORM Integration::
|
210
|
+
|
211
|
+
# In table definition
|
212
|
+
class Document(Base):
|
213
|
+
__tablename__ = 'documents'
|
214
|
+
id = Column(Integer, primary_key=True)
|
215
|
+
title = Column(String)
|
216
|
+
content = Column(Text)
|
217
|
+
|
218
|
+
# Define fulltext index in table
|
219
|
+
__table_args__ = (FulltextIndex('ftidx_doc', ['title', 'content']),)
|
220
|
+
|
221
|
+
# Or create index separately
|
222
|
+
FulltextIndex.create_index(engine, 'documents', 'ftidx_doc', ['title', 'content'])
|
223
|
+
|
224
|
+
4. Using client.fulltext_index.create() method::
|
225
|
+
|
226
|
+
# Using client.fulltext_index.create() method
|
227
|
+
client.fulltext_index.create(
|
228
|
+
'my_table', 'ftidx_content', ['title', 'content'],
|
229
|
+
algorithm=FulltextAlgorithmType.BM25
|
230
|
+
)
|
231
|
+
|
232
|
+
# Using client.fulltext_index.create_in_transaction() method
|
233
|
+
with client.transaction() as tx:
|
234
|
+
client.fulltext_index.create_in_transaction(
|
235
|
+
tx, 'my_table', 'ftidx_content', ['title', 'content']
|
236
|
+
)
|
237
|
+
"""
|
238
|
+
|
239
|
+
def __init__(
|
240
|
+
self,
|
241
|
+
name: str,
|
242
|
+
columns: Union[str, List[str]],
|
243
|
+
algorithm: str = FulltextAlgorithmType.TF_IDF,
|
244
|
+
parser: str = None,
|
245
|
+
):
|
246
|
+
"""
|
247
|
+
Initialize FulltextIndex.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
name (str): Index name (e.g., 'ftidx_content', 'idx_search')
|
251
|
+
|
252
|
+
columns (str or list): Column(s) to index
|
253
|
+
|
254
|
+
* Single column: "content" or ["content"]
|
255
|
+
* Multiple columns: ["title", "content"]
|
256
|
+
|
257
|
+
algorithm (str): Fulltext algorithm type (stored but not part of DDL)
|
258
|
+
|
259
|
+
* FulltextAlgorithmType.TF_IDF (default): Traditional TF-IDF
|
260
|
+
* FulltextAlgorithmType.BM25: Modern BM25 ranking
|
261
|
+
* Note: Set via SET ft_relevancy_algorithm at runtime
|
262
|
+
|
263
|
+
parser (str, optional): Parser type for specialized content
|
264
|
+
|
265
|
+
* None (default): Standard text parser
|
266
|
+
* FulltextParserType.JSON: Parse JSON documents
|
267
|
+
* FulltextParserType.NGRAM: N-gram for Chinese/Asian languages
|
268
|
+
|
269
|
+
Examples::
|
270
|
+
|
271
|
+
# Basic fulltext index (no parser)
|
272
|
+
index = FulltextIndex("ftidx_content", "content")
|
273
|
+
|
274
|
+
# Multiple columns with BM25
|
275
|
+
index = FulltextIndex("ftidx_search", ["title", "content"],
|
276
|
+
algorithm=FulltextAlgorithmType.BM25)
|
277
|
+
|
278
|
+
# JSON parser for JSON content
|
279
|
+
index = FulltextIndex("ftidx_json", "json_data",
|
280
|
+
parser=FulltextParserType.JSON)
|
281
|
+
|
282
|
+
# NGRAM parser for Chinese content
|
283
|
+
index = FulltextIndex("ftidx_chinese", ["title", "body"],
|
284
|
+
parser=FulltextParserType.NGRAM)
|
285
|
+
|
286
|
+
# Combined: Multiple columns with JSON parser
|
287
|
+
index = FulltextIndex("ftidx_multi_json", ["json1", "json2"],
|
288
|
+
parser=FulltextParserType.JSON)
|
289
|
+
"""
|
290
|
+
if isinstance(columns, str):
|
291
|
+
columns = [columns]
|
292
|
+
|
293
|
+
self.algorithm = algorithm
|
294
|
+
self.parser = parser
|
295
|
+
self._column_names = columns.copy() # Store column names for easy access
|
296
|
+
super().__init__(name, *columns)
|
297
|
+
|
298
|
+
def get_columns(self):
|
299
|
+
"""Get column names as a list"""
|
300
|
+
return self._column_names.copy()
|
301
|
+
|
302
|
+
def _create_index_sql(self, table_name: str) -> str:
|
303
|
+
"""Generate the CREATE INDEX SQL for fulltext index."""
|
304
|
+
columns_str = ", ".join(self._column_names)
|
305
|
+
sql = f"CREATE FULLTEXT INDEX {self.name} ON {table_name} ({columns_str})"
|
306
|
+
if self.parser:
|
307
|
+
sql += f" WITH PARSER {self.parser}"
|
308
|
+
return sql
|
309
|
+
|
310
|
+
@classmethod
|
311
|
+
def create_index(
|
312
|
+
cls,
|
313
|
+
engine,
|
314
|
+
table_name: str,
|
315
|
+
name: str,
|
316
|
+
columns: Union[str, List[str]],
|
317
|
+
algorithm: str = FulltextAlgorithmType.TF_IDF,
|
318
|
+
parser: str = None,
|
319
|
+
) -> bool:
|
320
|
+
"""
|
321
|
+
Create a fulltext index using class method.
|
322
|
+
|
323
|
+
This method creates a fulltext index on specified columns with optional
|
324
|
+
parser support for specialized content types (JSON, Chinese text, etc.).
|
325
|
+
|
326
|
+
Args:
|
327
|
+
engine: SQLAlchemy engine instance
|
328
|
+
table_name (str): Target table name (e.g., 'articles', 'documents')
|
329
|
+
name (str): Index name (e.g., 'ftidx_content', 'idx_search')
|
330
|
+
columns (str or list): Column(s) to index
|
331
|
+
|
332
|
+
* Single: "content" or ["content"]
|
333
|
+
* Multiple: ["title", "content", "summary"]
|
334
|
+
|
335
|
+
algorithm (str): Algorithm type (stored for reference, not in DDL)
|
336
|
+
|
337
|
+
* FulltextAlgorithmType.TF_IDF (default)
|
338
|
+
* FulltextAlgorithmType.BM25
|
339
|
+
* Set via: SET ft_relevancy_algorithm = "BM25"
|
340
|
+
|
341
|
+
parser (str, optional): Parser type for specialized content
|
342
|
+
|
343
|
+
* None (default): Standard parser
|
344
|
+
* FulltextParserType.JSON: For JSON documents
|
345
|
+
* FulltextParserType.NGRAM: For Chinese/Asian languages
|
346
|
+
|
347
|
+
Returns:
|
348
|
+
bool: True if succeeded, False otherwise
|
349
|
+
|
350
|
+
Examples::
|
351
|
+
|
352
|
+
# Basic fulltext index
|
353
|
+
FulltextIndex.create_index(
|
354
|
+
engine, 'articles', 'ftidx_content', 'content'
|
355
|
+
)
|
356
|
+
|
357
|
+
# Multiple columns with BM25
|
358
|
+
FulltextIndex.create_index(
|
359
|
+
engine, 'articles', 'ftidx_search', ['title', 'content'],
|
360
|
+
algorithm=FulltextAlgorithmType.BM25
|
361
|
+
)
|
362
|
+
|
363
|
+
# JSON parser
|
364
|
+
FulltextIndex.create_index(
|
365
|
+
engine, 'products', 'ftidx_json', 'details',
|
366
|
+
parser=FulltextParserType.JSON
|
367
|
+
)
|
368
|
+
|
369
|
+
# NGRAM parser
|
370
|
+
FulltextIndex.create_index(
|
371
|
+
engine, 'chinese_articles', 'ftidx_chinese', ['title', 'body'],
|
372
|
+
parser=FulltextParserType.NGRAM
|
373
|
+
)
|
374
|
+
"""
|
375
|
+
try:
|
376
|
+
if isinstance(columns, str):
|
377
|
+
columns = [columns]
|
378
|
+
|
379
|
+
columns_str = ", ".join(columns)
|
380
|
+
sql = f"CREATE FULLTEXT INDEX {name} ON {table_name} ({columns_str})"
|
381
|
+
if parser:
|
382
|
+
sql += f" WITH PARSER {parser}"
|
383
|
+
|
384
|
+
with engine.begin() as conn:
|
385
|
+
_exec_sql_safe(conn, sql)
|
386
|
+
|
387
|
+
return True
|
388
|
+
except Exception as e:
|
389
|
+
print(f"Failed to create fulltext index: {e}")
|
390
|
+
return False
|
391
|
+
|
392
|
+
@classmethod
|
393
|
+
def create_index_in_transaction(
|
394
|
+
cls,
|
395
|
+
connection,
|
396
|
+
table_name: str,
|
397
|
+
name: str,
|
398
|
+
columns: Union[str, List[str]],
|
399
|
+
algorithm: str = FulltextAlgorithmType.TF_IDF,
|
400
|
+
parser: str = None,
|
401
|
+
) -> bool:
|
402
|
+
"""
|
403
|
+
Create a fulltext index within an existing transaction.
|
404
|
+
|
405
|
+
Use this method when you need to create a fulltext index as part of a
|
406
|
+
larger transaction, ensuring atomic operations.
|
407
|
+
|
408
|
+
Args:
|
409
|
+
connection: Active SQLAlchemy connection within a transaction
|
410
|
+
table_name (str): Target table name
|
411
|
+
name (str): Index name
|
412
|
+
columns (str or list): Column(s) to index
|
413
|
+
|
414
|
+
* Single: "content" or ["content"]
|
415
|
+
* Multiple: ["title", "content"]
|
416
|
+
|
417
|
+
algorithm (str): Algorithm type (stored for reference)
|
418
|
+
|
419
|
+
* FulltextAlgorithmType.TF_IDF (default)
|
420
|
+
* FulltextAlgorithmType.BM25
|
421
|
+
|
422
|
+
parser (str, optional): Parser type
|
423
|
+
|
424
|
+
* None (default): Standard parser
|
425
|
+
* FulltextParserType.JSON: For JSON documents
|
426
|
+
* FulltextParserType.NGRAM: For Chinese/Asian languages
|
427
|
+
|
428
|
+
Returns:
|
429
|
+
bool: True if succeeded, False otherwise
|
430
|
+
|
431
|
+
Examples::
|
432
|
+
|
433
|
+
# Basic usage within transaction
|
434
|
+
with engine.begin() as conn:
|
435
|
+
FulltextIndex.create_index_in_transaction(
|
436
|
+
conn, 'articles', 'ftidx_content', 'content'
|
437
|
+
)
|
438
|
+
|
439
|
+
# With JSON parser
|
440
|
+
with engine.begin() as conn:
|
441
|
+
FulltextIndex.create_index_in_transaction(
|
442
|
+
conn, 'products', 'ftidx_json', 'details',
|
443
|
+
parser=FulltextParserType.JSON
|
444
|
+
)
|
445
|
+
|
446
|
+
# With NGRAM parser
|
447
|
+
with engine.begin() as conn:
|
448
|
+
FulltextIndex.create_index_in_transaction(
|
449
|
+
conn, 'chinese_docs', 'ftidx_chinese', ['title', 'body'],
|
450
|
+
parser=FulltextParserType.NGRAM
|
451
|
+
)
|
452
|
+
"""
|
453
|
+
try:
|
454
|
+
if isinstance(columns, str):
|
455
|
+
columns = [columns]
|
456
|
+
|
457
|
+
columns_str = ", ".join(columns)
|
458
|
+
sql = f"CREATE FULLTEXT INDEX {name} ON {table_name} ({columns_str})"
|
459
|
+
if parser:
|
460
|
+
sql += f" WITH PARSER {parser}"
|
461
|
+
|
462
|
+
_exec_sql_safe(connection, sql)
|
463
|
+
return True
|
464
|
+
except Exception as e:
|
465
|
+
print(f"Failed to create fulltext index in transaction: {e}")
|
466
|
+
return False
|
467
|
+
|
468
|
+
@classmethod
|
469
|
+
def drop_index(cls, engine, table_name: str, name: str) -> bool:
|
470
|
+
"""
|
471
|
+
Drop a fulltext index using ORM-style method.
|
472
|
+
|
473
|
+
Args:
|
474
|
+
|
475
|
+
engine: SQLAlchemy engine
|
476
|
+
table_name: Target table name
|
477
|
+
name: Index name
|
478
|
+
|
479
|
+
Returns:
|
480
|
+
|
481
|
+
bool: True if successful, False otherwise
|
482
|
+
"""
|
483
|
+
try:
|
484
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
485
|
+
|
486
|
+
with engine.begin() as conn:
|
487
|
+
_exec_sql_safe(conn, sql)
|
488
|
+
|
489
|
+
return True
|
490
|
+
except Exception as e:
|
491
|
+
print(f"Failed to drop fulltext index: {e}")
|
492
|
+
return False
|
493
|
+
|
494
|
+
@classmethod
|
495
|
+
def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
|
496
|
+
"""
|
497
|
+
Drop a fulltext index within an existing transaction.
|
498
|
+
|
499
|
+
Args:
|
500
|
+
|
501
|
+
connection: SQLAlchemy connection
|
502
|
+
table_name: Target table name
|
503
|
+
name: Index name
|
504
|
+
|
505
|
+
Returns:
|
506
|
+
|
507
|
+
bool: True if successful, False otherwise
|
508
|
+
"""
|
509
|
+
try:
|
510
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
511
|
+
_exec_sql_safe(connection, sql)
|
512
|
+
return True
|
513
|
+
except Exception as e:
|
514
|
+
print(f"Failed to drop fulltext index in transaction: {e}")
|
515
|
+
return False
|
516
|
+
|
517
|
+
def create(self, engine, table_name: str) -> bool:
|
518
|
+
"""
|
519
|
+
Create this fulltext index using ORM-style method.
|
520
|
+
|
521
|
+
Args:
|
522
|
+
|
523
|
+
engine: SQLAlchemy engine
|
524
|
+
table_name: Target table name
|
525
|
+
|
526
|
+
Returns:
|
527
|
+
|
528
|
+
bool: True if successful, False otherwise
|
529
|
+
"""
|
530
|
+
try:
|
531
|
+
sql = self._create_index_sql(table_name)
|
532
|
+
|
533
|
+
with engine.begin() as conn:
|
534
|
+
_exec_sql_safe(conn, sql)
|
535
|
+
|
536
|
+
return True
|
537
|
+
except Exception as e:
|
538
|
+
print(f"Failed to create fulltext index: {e}")
|
539
|
+
return False
|
540
|
+
|
541
|
+
def drop(self, engine, table_name: str) -> bool:
|
542
|
+
"""
|
543
|
+
Drop this fulltext index using ORM-style method.
|
544
|
+
|
545
|
+
Args:
|
546
|
+
|
547
|
+
engine: SQLAlchemy engine
|
548
|
+
table_name: Target table name
|
549
|
+
|
550
|
+
Returns:
|
551
|
+
|
552
|
+
bool: True if successful, False otherwise
|
553
|
+
"""
|
554
|
+
try:
|
555
|
+
sql = f"DROP INDEX {self.name} ON {table_name}"
|
556
|
+
|
557
|
+
with engine.begin() as conn:
|
558
|
+
_exec_sql_safe(conn, sql)
|
559
|
+
|
560
|
+
return True
|
561
|
+
except Exception as e:
|
562
|
+
print(f"Failed to drop fulltext index: {e}")
|
563
|
+
return False
|
564
|
+
|
565
|
+
def create_in_transaction(self, connection, table_name: str) -> bool:
|
566
|
+
"""
|
567
|
+
Create this fulltext index within an existing transaction.
|
568
|
+
|
569
|
+
Args:
|
570
|
+
|
571
|
+
connection: SQLAlchemy connection
|
572
|
+
table_name: Target table name
|
573
|
+
|
574
|
+
Returns:
|
575
|
+
|
576
|
+
bool: True if successful, False otherwise
|
577
|
+
"""
|
578
|
+
try:
|
579
|
+
sql = self._create_index_sql(table_name)
|
580
|
+
_exec_sql_safe(connection, sql)
|
581
|
+
return True
|
582
|
+
except Exception as e:
|
583
|
+
print(f"Failed to create fulltext index in transaction: {e}")
|
584
|
+
return False
|
585
|
+
|
586
|
+
def drop_in_transaction(self, connection, table_name: str) -> bool:
|
587
|
+
"""
|
588
|
+
Drop this fulltext index within an existing transaction.
|
589
|
+
|
590
|
+
Args:
|
591
|
+
|
592
|
+
connection: SQLAlchemy connection
|
593
|
+
table_name: Target table name
|
594
|
+
|
595
|
+
Returns:
|
596
|
+
|
597
|
+
bool: True if successful, False otherwise
|
598
|
+
"""
|
599
|
+
try:
|
600
|
+
sql = f"DROP INDEX {self.name} ON {table_name}"
|
601
|
+
_exec_sql_safe(connection, sql)
|
602
|
+
return True
|
603
|
+
except Exception as e:
|
604
|
+
print(f"Failed to drop fulltext index in transaction: {e}")
|
605
|
+
return False
|
606
|
+
|
607
|
+
|
608
|
+
class FulltextSearchBuilder:
|
609
|
+
"""
|
610
|
+
Builder class for fulltext search queries.
|
611
|
+
|
612
|
+
Provides a fluent interface for building MATCH...AGAINST queries.
|
613
|
+
"""
|
614
|
+
|
615
|
+
def __init__(self, table_name: str, columns: Union[str, List[str]]):
|
616
|
+
"""
|
617
|
+
Initialize FulltextSearchBuilder.
|
618
|
+
|
619
|
+
Args:
|
620
|
+
|
621
|
+
table_name: Table to search in
|
622
|
+
columns: Column(s) to search in
|
623
|
+
"""
|
624
|
+
self.table_name = table_name
|
625
|
+
if isinstance(columns, str):
|
626
|
+
columns = [columns]
|
627
|
+
self.columns = columns
|
628
|
+
self.search_term = None
|
629
|
+
self.search_mode = FulltextModeType.NATURAL_LANGUAGE
|
630
|
+
self.include_score = False
|
631
|
+
self.where_conditions = []
|
632
|
+
self.order_clause = None
|
633
|
+
self.limit_value = None
|
634
|
+
self.offset_value = None
|
635
|
+
|
636
|
+
@property
|
637
|
+
def with_score(self):
|
638
|
+
"""Get the with_score setting for backward compatibility"""
|
639
|
+
return self.include_score
|
640
|
+
|
641
|
+
@property
|
642
|
+
def mode(self):
|
643
|
+
"""Get the search mode for backward compatibility"""
|
644
|
+
return self.search_mode
|
645
|
+
|
646
|
+
@property
|
647
|
+
def order_by(self):
|
648
|
+
"""Get the order by clause for backward compatibility"""
|
649
|
+
return self.order_clause
|
650
|
+
|
651
|
+
def search(self, term: str) -> "FulltextSearchBuilder":
|
652
|
+
"""
|
653
|
+
Set the search term.
|
654
|
+
|
655
|
+
Args:
|
656
|
+
|
657
|
+
term: Search term
|
658
|
+
|
659
|
+
Returns:
|
660
|
+
|
661
|
+
FulltextSearchBuilder: Self for chaining
|
662
|
+
"""
|
663
|
+
self.search_term = term
|
664
|
+
return self
|
665
|
+
|
666
|
+
def set_mode(self, mode: str) -> "FulltextSearchBuilder":
|
667
|
+
"""
|
668
|
+
Set the search mode.
|
669
|
+
|
670
|
+
Args:
|
671
|
+
|
672
|
+
mode: Search mode (natural language, boolean, query expansion)
|
673
|
+
|
674
|
+
Returns:
|
675
|
+
|
676
|
+
FulltextSearchBuilder: Self for chaining
|
677
|
+
"""
|
678
|
+
self.search_mode = mode
|
679
|
+
return self
|
680
|
+
|
681
|
+
def set_with_score(self, include_score: bool = True) -> "FulltextSearchBuilder":
|
682
|
+
"""
|
683
|
+
Include relevance score in results.
|
684
|
+
|
685
|
+
Args:
|
686
|
+
|
687
|
+
include_score: Whether to include score
|
688
|
+
|
689
|
+
Returns:
|
690
|
+
|
691
|
+
FulltextSearchBuilder: Self for chaining
|
692
|
+
"""
|
693
|
+
self.include_score = include_score
|
694
|
+
return self
|
695
|
+
|
696
|
+
def where(self, condition: str) -> "FulltextSearchBuilder":
|
697
|
+
"""
|
698
|
+
Add WHERE condition.
|
699
|
+
|
700
|
+
Args:
|
701
|
+
|
702
|
+
condition: WHERE condition
|
703
|
+
|
704
|
+
Returns:
|
705
|
+
|
706
|
+
FulltextSearchBuilder: Self for chaining
|
707
|
+
"""
|
708
|
+
self.where_conditions.append(condition)
|
709
|
+
return self
|
710
|
+
|
711
|
+
def set_order_by(self, column: str, direction: str = "DESC") -> "FulltextSearchBuilder":
|
712
|
+
"""
|
713
|
+
Set ORDER BY clause.
|
714
|
+
|
715
|
+
Args:
|
716
|
+
|
717
|
+
column: Column to order by
|
718
|
+
direction: Order direction (ASC/DESC)
|
719
|
+
|
720
|
+
Returns:
|
721
|
+
|
722
|
+
FulltextSearchBuilder: Self for chaining
|
723
|
+
"""
|
724
|
+
self.order_clause = f"{column} {direction}"
|
725
|
+
return self
|
726
|
+
|
727
|
+
def limit(self, count: int) -> "FulltextSearchBuilder":
|
728
|
+
"""
|
729
|
+
Set LIMIT clause.
|
730
|
+
|
731
|
+
Args:
|
732
|
+
|
733
|
+
count: Number of rows to limit
|
734
|
+
|
735
|
+
Returns:
|
736
|
+
|
737
|
+
FulltextSearchBuilder: Self for chaining
|
738
|
+
"""
|
739
|
+
self.limit_value = count
|
740
|
+
return self
|
741
|
+
|
742
|
+
def offset(self, count: int) -> "FulltextSearchBuilder":
|
743
|
+
"""
|
744
|
+
Set OFFSET clause.
|
745
|
+
|
746
|
+
Args:
|
747
|
+
|
748
|
+
count: Number of rows to offset
|
749
|
+
|
750
|
+
Returns:
|
751
|
+
|
752
|
+
FulltextSearchBuilder: Self for chaining
|
753
|
+
"""
|
754
|
+
self.offset_value = count
|
755
|
+
return self
|
756
|
+
|
757
|
+
def build_sql(self) -> str:
|
758
|
+
"""
|
759
|
+
Build the SQL query using unified SQL builder.
|
760
|
+
|
761
|
+
Returns:
|
762
|
+
|
763
|
+
str: SQL query string
|
764
|
+
"""
|
765
|
+
if not self.search_term:
|
766
|
+
raise ValueError("Search term is required")
|
767
|
+
|
768
|
+
from ..sql_builder import MatrixOneSQLBuilder
|
769
|
+
|
770
|
+
builder = MatrixOneSQLBuilder()
|
771
|
+
|
772
|
+
# Build SELECT clause
|
773
|
+
columns_str = ", ".join(self.columns)
|
774
|
+
# MatrixOne doesn't support "IN NATURAL_LANGUAGE" syntax, use simple AGAINST
|
775
|
+
if self.search_mode == FulltextModeType.NATURAL_LANGUAGE or self.search_mode == "natural language mode":
|
776
|
+
match_clause = f"MATCH({columns_str}) AGAINST('{self.search_term}')"
|
777
|
+
elif self.search_mode == FulltextModeType.BOOLEAN or self.search_mode == "boolean mode":
|
778
|
+
match_clause = f"MATCH({columns_str}) AGAINST('{self.search_term}' IN BOOLEAN MODE)"
|
779
|
+
elif self.search_mode == FulltextModeType.QUERY_EXPANSION or self.search_mode == "query expansion mode":
|
780
|
+
match_clause = f"MATCH({columns_str}) AGAINST('{self.search_term}' WITH QUERY EXPANSION)"
|
781
|
+
else:
|
782
|
+
# Default to simple AGAINST for unknown modes
|
783
|
+
match_clause = f"MATCH({columns_str}) AGAINST('{self.search_term}')"
|
784
|
+
|
785
|
+
if self.include_score:
|
786
|
+
builder.select("*", f"{match_clause} AS score")
|
787
|
+
else:
|
788
|
+
builder.select_all()
|
789
|
+
|
790
|
+
# Build FROM clause
|
791
|
+
builder.from_table(self.table_name)
|
792
|
+
|
793
|
+
# Build WHERE clause with MATCH AGAINST
|
794
|
+
builder.where(match_clause)
|
795
|
+
|
796
|
+
# Add additional WHERE conditions
|
797
|
+
for condition in self.where_conditions:
|
798
|
+
builder.where(condition)
|
799
|
+
|
800
|
+
# Add ORDER BY clause
|
801
|
+
if self.order_clause:
|
802
|
+
builder.order_by(self.order_clause)
|
803
|
+
elif self.include_score:
|
804
|
+
builder.order_by("score DESC")
|
805
|
+
|
806
|
+
# Add LIMIT/OFFSET clause
|
807
|
+
if self.limit_value:
|
808
|
+
builder.limit(self.limit_value)
|
809
|
+
if self.offset_value:
|
810
|
+
builder.offset(self.offset_value)
|
811
|
+
|
812
|
+
return builder.build_with_parameter_substitution()
|
813
|
+
|
814
|
+
def execute(self, connection) -> Any:
|
815
|
+
"""
|
816
|
+
Execute the search query.
|
817
|
+
|
818
|
+
Args:
|
819
|
+
|
820
|
+
connection: Database connection
|
821
|
+
|
822
|
+
Returns:
|
823
|
+
|
824
|
+
Query result
|
825
|
+
"""
|
826
|
+
sql = self.build_sql()
|
827
|
+
return _exec_sql_safe(connection, sql)
|
828
|
+
|
829
|
+
|
830
|
+
# Convenience functions
|
831
|
+
def create_fulltext_index(
|
832
|
+
engine,
|
833
|
+
table_name: str,
|
834
|
+
name: str,
|
835
|
+
columns: Union[str, List[str]],
|
836
|
+
algorithm: str = FulltextAlgorithmType.TF_IDF,
|
837
|
+
parser: str = None,
|
838
|
+
) -> bool:
|
839
|
+
"""
|
840
|
+
Convenience function to create a fulltext index.
|
841
|
+
|
842
|
+
Args:
|
843
|
+
|
844
|
+
engine: SQLAlchemy engine
|
845
|
+
table_name: Target table name
|
846
|
+
name: Index name
|
847
|
+
columns: Column(s) to index
|
848
|
+
algorithm: Fulltext algorithm type
|
849
|
+
parser: Parser type for fulltext index (json, ngram, or None)
|
850
|
+
|
851
|
+
Returns:
|
852
|
+
|
853
|
+
bool: True if successful, False otherwise
|
854
|
+
"""
|
855
|
+
return FulltextIndex.create_index(engine, table_name, name, columns, algorithm, parser)
|
856
|
+
|
857
|
+
|
858
|
+
def fulltext_search_builder(table_name: str, columns: Union[str, List[str]]) -> FulltextSearchBuilder:
|
859
|
+
"""
|
860
|
+
Convenience function to create a fulltext search builder.
|
861
|
+
|
862
|
+
Args:
|
863
|
+
|
864
|
+
table_name: Table to search in
|
865
|
+
columns: Column(s) to search in
|
866
|
+
|
867
|
+
Returns:
|
868
|
+
|
869
|
+
FulltextSearchBuilder: Search builder instance
|
870
|
+
"""
|
871
|
+
return FulltextSearchBuilder(table_name, columns)
|
872
|
+
|
873
|
+
|
874
|
+
# Register SQLAlchemy compiler for FulltextIndex to generate FULLTEXT DDL
|
875
|
+
@compiles(CreateIndex)
|
876
|
+
def compile_create_index(element, compiler, **kw):
|
877
|
+
"""
|
878
|
+
Custom compiler for CREATE INDEX that handles FulltextIndex specially.
|
879
|
+
|
880
|
+
This function intercepts SQLAlchemy's CREATE INDEX statement generation
|
881
|
+
and adds the FULLTEXT keyword and parser clause for FulltextIndex instances.
|
882
|
+
"""
|
883
|
+
index = element.element
|
884
|
+
|
885
|
+
# Check if this is a FulltextIndex
|
886
|
+
if isinstance(index, FulltextIndex):
|
887
|
+
# Generate FULLTEXT index DDL
|
888
|
+
columns_str = ", ".join(col.name for col in index.columns)
|
889
|
+
sql = f"CREATE FULLTEXT INDEX {index.name} ON {index.table.name} ({columns_str})"
|
890
|
+
if hasattr(index, 'parser') and index.parser:
|
891
|
+
sql += f" WITH PARSER {index.parser}"
|
892
|
+
return sql
|
893
|
+
|
894
|
+
# Default behavior for regular indexes
|
895
|
+
return compiler.visit_create_index(element, **kw)
|