matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,1721 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Vector index support for SQLAlchemy integration with MatrixOne.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from typing import List, Optional, Union
|
20
|
+
|
21
|
+
from sqlalchemy import Column, Index, text
|
22
|
+
from sqlalchemy.ext.compiler import compiles
|
23
|
+
from sqlalchemy.schema import DDLElement
|
24
|
+
from sqlalchemy.sql.ddl import CreateIndex as SQLAlchemyCreateIndex
|
25
|
+
|
26
|
+
|
27
|
+
def _exec_sql_safe(connection, sql: str):
|
28
|
+
"""
|
29
|
+
Execute SQL safely, bypassing SQLAlchemy's bind parameter parsing.
|
30
|
+
|
31
|
+
This prevents JSON strings like {"a":1} from being incorrectly parsed as :1 bind params.
|
32
|
+
Uses exec_driver_sql() when available, falls back to text() for testing/compatibility.
|
33
|
+
"""
|
34
|
+
if hasattr(connection, 'exec_driver_sql'):
|
35
|
+
# Escape % to %% for pymysql's format string handling
|
36
|
+
escaped_sql = sql.replace('%', '%%')
|
37
|
+
return connection.exec_driver_sql(escaped_sql)
|
38
|
+
else:
|
39
|
+
# Fallback for testing or older SQLAlchemy versions
|
40
|
+
return connection.execute(text(sql))
|
41
|
+
|
42
|
+
|
43
|
+
class VectorIndexType:
|
44
|
+
"""Enum-like class for vector index types."""
|
45
|
+
|
46
|
+
IVFFLAT = "ivfflat"
|
47
|
+
HNSW = "hnsw" # Future support
|
48
|
+
|
49
|
+
|
50
|
+
class VectorOpType:
|
51
|
+
"""Enum-like class for vector operation types."""
|
52
|
+
|
53
|
+
VECTOR_L2_OPS = "vector_l2_ops"
|
54
|
+
VECTOR_IP_OPS = "vector_ip_ops"
|
55
|
+
VECTOR_COSINE_OPS = "vector_cosine_ops"
|
56
|
+
|
57
|
+
|
58
|
+
class IVFVectorIndex(Index):
|
59
|
+
"""
|
60
|
+
SQLAlchemy Index for IVFFLAT vector columns with MatrixOne-specific syntax.
|
61
|
+
|
62
|
+
Specialized class for IVFFLAT vector indexes with type safety and clear API.
|
63
|
+
|
64
|
+
Usage Examples
|
65
|
+
|
66
|
+
1. Class Methods (Recommended for one-time operations):
|
67
|
+
|
68
|
+
# Create index using class method
|
69
|
+
success = IVFVectorIndex.create_index(
|
70
|
+
engine=engine,
|
71
|
+
table_name='my_table',
|
72
|
+
name='idx_embedding',
|
73
|
+
column='embedding',
|
74
|
+
lists=100,
|
75
|
+
op_type=VectorOpType.VECTOR_L2_OPS
|
76
|
+
)
|
77
|
+
|
78
|
+
# Drop index using class method
|
79
|
+
success = IVFVectorIndex.drop_index(
|
80
|
+
engine=engine,
|
81
|
+
table_name='my_table',
|
82
|
+
name='idx_embedding'
|
83
|
+
)
|
84
|
+
|
85
|
+
# Create index within existing transaction
|
86
|
+
with engine.begin() as conn:
|
87
|
+
success = IVFVectorIndex.create_index_in_transaction(
|
88
|
+
connection=conn,
|
89
|
+
table_name='my_table',
|
90
|
+
name='idx_embedding',
|
91
|
+
column='embedding',
|
92
|
+
lists=100
|
93
|
+
)
|
94
|
+
|
95
|
+
# Drop index within existing transaction
|
96
|
+
with engine.begin() as conn:
|
97
|
+
success = IVFVectorIndex.drop_index_in_transaction(
|
98
|
+
connection=conn,
|
99
|
+
table_name='my_table',
|
100
|
+
name='idx_embedding'
|
101
|
+
)
|
102
|
+
|
103
|
+
2. Instance Methods (Useful for reusable index configurations):
|
104
|
+
|
105
|
+
# Create index object
|
106
|
+
index = IVFVectorIndex('idx_embedding', 'embedding', lists=100)
|
107
|
+
|
108
|
+
# Create index using instance method
|
109
|
+
success = index.create(engine, 'my_table')
|
110
|
+
|
111
|
+
# Drop index using instance method
|
112
|
+
success = index.drop(engine, 'my_table')
|
113
|
+
|
114
|
+
# Create index within existing transaction
|
115
|
+
with engine.begin() as conn:
|
116
|
+
success = index.create_in_transaction(conn, 'my_table')
|
117
|
+
|
118
|
+
# Drop index within existing transaction
|
119
|
+
with engine.begin() as conn:
|
120
|
+
success = index.drop_in_transaction(conn, 'my_table')
|
121
|
+
|
122
|
+
3. SQLAlchemy ORM Integration:
|
123
|
+
|
124
|
+
# In table definition
|
125
|
+
class Document(Base):
|
126
|
+
__tablename__ = 'documents'
|
127
|
+
id = Column(Integer, primary_key=True)
|
128
|
+
embedding = create_vector_column(128, "f32")
|
129
|
+
|
130
|
+
# Note: For ORM integration, create table first, then create index separately
|
131
|
+
# __table_args__ = (IVFVectorIndex('idx_embedding', 'embedding', lists=100),)
|
132
|
+
|
133
|
+
# Create table first
|
134
|
+
Base.metadata.create_all(engine)
|
135
|
+
|
136
|
+
# Then create index separately
|
137
|
+
IVFVectorIndex.create_index(engine, 'documents', 'idx_embedding', 'embedding', lists=100)
|
138
|
+
|
139
|
+
4. Client Chain Operations:
|
140
|
+
|
141
|
+
# Using client.vector_index.create_ivf() method
|
142
|
+
client.vector_index.create_ivf('my_table', 'idx_embedding', 'embedding', lists=100)
|
143
|
+
|
144
|
+
# Using client.vector_index.create_ivf_in_transaction() method
|
145
|
+
with client.transaction() as tx:
|
146
|
+
client.vector_index.create_ivf_in_transaction(
|
147
|
+
'my_table', 'idx_embedding', 'embedding', tx.connection, lists=100
|
148
|
+
)
|
149
|
+
|
150
|
+
Parameters:
|
151
|
+
name (str): Index name
|
152
|
+
column (Union[str, Column]): Vector column to index
|
153
|
+
lists (int): Number of lists for IVFFLAT (default: 100)
|
154
|
+
op_type (str): Vector operation type (default: vector_l2_ops)
|
155
|
+
**kwargs: Additional index parameters
|
156
|
+
|
157
|
+
Note:
|
158
|
+
|
159
|
+
- MatrixOne supports only ONE index per vector column
|
160
|
+
- Enable IVF indexing before creating IVFFLAT indexes: SET experimental_ivf_index = 1
|
161
|
+
- Set probe limit for search: SET probe_limit = 1
|
162
|
+
"""
|
163
|
+
|
164
|
+
def __init__(
|
165
|
+
self,
|
166
|
+
name: str,
|
167
|
+
column: Union[str, Column],
|
168
|
+
lists: int = 100,
|
169
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
170
|
+
**kwargs,
|
171
|
+
):
|
172
|
+
"""
|
173
|
+
Initialize IVFVectorIndex.
|
174
|
+
|
175
|
+
Args::
|
176
|
+
|
177
|
+
name: Index name
|
178
|
+
column: Vector column to index
|
179
|
+
lists: Number of lists for IVFFLAT (default: 100)
|
180
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
181
|
+
**kwargs: Additional index parameters
|
182
|
+
"""
|
183
|
+
self.index_type = VectorIndexType.IVFFLAT
|
184
|
+
self.lists = lists
|
185
|
+
self.op_type = op_type
|
186
|
+
|
187
|
+
# Store column name for later use
|
188
|
+
self._column_name = str(column) if not isinstance(column, str) else column
|
189
|
+
|
190
|
+
# Call parent constructor first
|
191
|
+
super().__init__(name, column, **kwargs)
|
192
|
+
|
193
|
+
# Set dialect options after initialization to bind to matrixone dialect
|
194
|
+
self.dialect_options["matrixone"] = {"length": None, "using": None}
|
195
|
+
# Also provide mysql fallback for compatibility
|
196
|
+
self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
|
197
|
+
|
198
|
+
def _create_index_sql(self, table_name: str) -> str:
|
199
|
+
"""Generate the CREATE INDEX SQL for IVFFLAT vector index."""
|
200
|
+
column_name = self._column_name
|
201
|
+
sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
|
202
|
+
sql_parts.append(f"LISTS {self.lists}")
|
203
|
+
sql_parts.append(f"op_type '{self.op_type}'")
|
204
|
+
return " ".join(sql_parts)
|
205
|
+
|
206
|
+
def create_sql(self, table_name: str) -> str:
|
207
|
+
"""Generate CREATE INDEX SQL for the given table name."""
|
208
|
+
return self._create_index_sql(table_name)
|
209
|
+
|
210
|
+
def drop_sql(self, table_name: str) -> str:
|
211
|
+
"""Generate DROP INDEX SQL for the given table name."""
|
212
|
+
return f"DROP INDEX {self.name} ON {table_name}"
|
213
|
+
|
214
|
+
@classmethod
|
215
|
+
def create_index(
|
216
|
+
cls,
|
217
|
+
engine,
|
218
|
+
table_name: str,
|
219
|
+
name: str,
|
220
|
+
column: Union[str, Column],
|
221
|
+
lists: int = 100,
|
222
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
223
|
+
**kwargs,
|
224
|
+
) -> bool:
|
225
|
+
"""
|
226
|
+
Create an IVFFLAT vector index using ORM-style method.
|
227
|
+
|
228
|
+
Args::
|
229
|
+
|
230
|
+
engine: SQLAlchemy engine
|
231
|
+
table_name: Name of the table
|
232
|
+
name: Name of the index
|
233
|
+
column: Vector column to index
|
234
|
+
lists: Number of lists for IVFFLAT (default: 100)
|
235
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
236
|
+
**kwargs: Additional index parameters
|
237
|
+
|
238
|
+
Returns::
|
239
|
+
|
240
|
+
bool: True if successful, False otherwise
|
241
|
+
"""
|
242
|
+
try:
|
243
|
+
index = cls(name, column, lists, op_type, **kwargs)
|
244
|
+
sql = index.create_sql(table_name)
|
245
|
+
|
246
|
+
with engine.begin() as conn:
|
247
|
+
# Enable IVF indexing
|
248
|
+
_exec_sql_safe(conn, "SET experimental_ivf_index = 1")
|
249
|
+
_exec_sql_safe(conn, "SET probe_limit = 1")
|
250
|
+
_exec_sql_safe(conn, sql)
|
251
|
+
return True
|
252
|
+
except Exception as e:
|
253
|
+
print(f"Failed to create IVFFLAT vector index: {e}")
|
254
|
+
return False
|
255
|
+
|
256
|
+
@classmethod
|
257
|
+
def create_index_in_transaction(
|
258
|
+
cls,
|
259
|
+
connection,
|
260
|
+
table_name: str,
|
261
|
+
name: str,
|
262
|
+
column: Union[str, Column],
|
263
|
+
lists: int = 100,
|
264
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
265
|
+
**kwargs,
|
266
|
+
) -> bool:
|
267
|
+
"""
|
268
|
+
Create an IVFFLAT vector index within an existing transaction.
|
269
|
+
|
270
|
+
Args::
|
271
|
+
|
272
|
+
connection: SQLAlchemy connection object
|
273
|
+
table_name: Name of the table
|
274
|
+
name: Name of the index
|
275
|
+
column: Vector column to index
|
276
|
+
lists: Number of lists for IVFFLAT (default: 100)
|
277
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
278
|
+
**kwargs: Additional index parameters
|
279
|
+
|
280
|
+
Returns::
|
281
|
+
|
282
|
+
bool: True if successful, False otherwise
|
283
|
+
"""
|
284
|
+
try:
|
285
|
+
index = cls(name, column, lists, op_type, **kwargs)
|
286
|
+
sql = index.create_sql(table_name)
|
287
|
+
|
288
|
+
# Enable IVF indexing
|
289
|
+
_exec_sql_safe(connection, "SET experimental_ivf_index = 1")
|
290
|
+
_exec_sql_safe(connection, "SET probe_limit = 1")
|
291
|
+
_exec_sql_safe(connection, sql)
|
292
|
+
return True
|
293
|
+
except Exception as e:
|
294
|
+
print(f"Failed to create IVFFLAT vector index in transaction: {e}")
|
295
|
+
return False
|
296
|
+
|
297
|
+
@classmethod
|
298
|
+
def drop_index(cls, engine, table_name: str, name: str) -> bool:
|
299
|
+
"""
|
300
|
+
Drop an IVFFLAT vector index using ORM-style method.
|
301
|
+
|
302
|
+
Args::
|
303
|
+
|
304
|
+
engine: SQLAlchemy engine
|
305
|
+
table_name: Name of the table
|
306
|
+
name: Name of the index to drop
|
307
|
+
|
308
|
+
Returns::
|
309
|
+
|
310
|
+
bool: True if successful, False otherwise
|
311
|
+
"""
|
312
|
+
try:
|
313
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
314
|
+
with engine.begin() as conn:
|
315
|
+
_exec_sql_safe(conn, sql)
|
316
|
+
return True
|
317
|
+
except Exception as e:
|
318
|
+
print(f"Failed to drop IVFFLAT vector index: {e}")
|
319
|
+
return False
|
320
|
+
|
321
|
+
@classmethod
|
322
|
+
def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
|
323
|
+
"""
|
324
|
+
Drop an IVFFLAT vector index within an existing transaction.
|
325
|
+
|
326
|
+
Args::
|
327
|
+
|
328
|
+
connection: SQLAlchemy connection object
|
329
|
+
table_name: Name of the table
|
330
|
+
name: Name of the index to drop
|
331
|
+
|
332
|
+
Returns::
|
333
|
+
|
334
|
+
bool: True if successful, False otherwise
|
335
|
+
"""
|
336
|
+
try:
|
337
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
338
|
+
_exec_sql_safe(connection, sql)
|
339
|
+
return True
|
340
|
+
except Exception as e:
|
341
|
+
print(f"Failed to drop IVFFLAT vector index in transaction: {e}")
|
342
|
+
return False
|
343
|
+
|
344
|
+
def create(self, engine, table_name: str) -> bool:
|
345
|
+
"""
|
346
|
+
Create this IVFFLAT vector index using ORM-style method.
|
347
|
+
|
348
|
+
Args::
|
349
|
+
|
350
|
+
engine: SQLAlchemy engine
|
351
|
+
table_name: Name of the table
|
352
|
+
|
353
|
+
Returns::
|
354
|
+
|
355
|
+
bool: True if successful, False otherwise
|
356
|
+
"""
|
357
|
+
try:
|
358
|
+
sql = self.create_sql(table_name)
|
359
|
+
|
360
|
+
with engine.begin() as conn:
|
361
|
+
# Enable IVF indexing
|
362
|
+
_exec_sql_safe(conn, "SET experimental_ivf_index = 1")
|
363
|
+
_exec_sql_safe(conn, "SET probe_limit = 1")
|
364
|
+
_exec_sql_safe(conn, sql)
|
365
|
+
return True
|
366
|
+
except Exception as e:
|
367
|
+
print(f"Failed to create IVFFLAT vector index: {e}")
|
368
|
+
return False
|
369
|
+
|
370
|
+
def drop(self, engine, table_name: str) -> bool:
|
371
|
+
"""
|
372
|
+
Drop this IVFFLAT vector index using ORM-style method.
|
373
|
+
|
374
|
+
Args::
|
375
|
+
|
376
|
+
engine: SQLAlchemy engine
|
377
|
+
table_name: Name of the table
|
378
|
+
|
379
|
+
Returns::
|
380
|
+
|
381
|
+
bool: True if successful, False otherwise
|
382
|
+
"""
|
383
|
+
try:
|
384
|
+
sql = self.drop_sql(table_name)
|
385
|
+
with engine.begin() as conn:
|
386
|
+
_exec_sql_safe(conn, sql)
|
387
|
+
return True
|
388
|
+
except Exception as e:
|
389
|
+
print(f"Failed to drop IVFFLAT vector index: {e}")
|
390
|
+
return False
|
391
|
+
|
392
|
+
def create_in_transaction(self, connection, table_name: str) -> bool:
|
393
|
+
"""
|
394
|
+
Create this IVFFLAT vector index within an existing transaction.
|
395
|
+
|
396
|
+
Args::
|
397
|
+
|
398
|
+
connection: SQLAlchemy connection object
|
399
|
+
table_name: Name of the table
|
400
|
+
|
401
|
+
Returns::
|
402
|
+
|
403
|
+
bool: True if successful, False otherwise
|
404
|
+
"""
|
405
|
+
try:
|
406
|
+
sql = self.create_sql(table_name)
|
407
|
+
|
408
|
+
# Enable IVF indexing
|
409
|
+
_exec_sql_safe(connection, "SET experimental_ivf_index = 1")
|
410
|
+
_exec_sql_safe(connection, "SET probe_limit = 1")
|
411
|
+
_exec_sql_safe(connection, sql)
|
412
|
+
return True
|
413
|
+
except Exception as e:
|
414
|
+
print(f"Failed to create IVFFLAT vector index in transaction: {e}")
|
415
|
+
return False
|
416
|
+
|
417
|
+
def drop_in_transaction(self, connection, table_name: str) -> bool:
|
418
|
+
"""
|
419
|
+
Drop this IVFFLAT vector index within an existing transaction.
|
420
|
+
|
421
|
+
Args::
|
422
|
+
|
423
|
+
connection: SQLAlchemy connection object
|
424
|
+
table_name: Name of the table
|
425
|
+
|
426
|
+
Returns::
|
427
|
+
|
428
|
+
bool: True if successful, False otherwise
|
429
|
+
"""
|
430
|
+
try:
|
431
|
+
sql = self.drop_sql(table_name)
|
432
|
+
_exec_sql_safe(connection, sql)
|
433
|
+
return True
|
434
|
+
except Exception as e:
|
435
|
+
print(f"Failed to drop IVFFLAT vector index in transaction: {e}")
|
436
|
+
return False
|
437
|
+
|
438
|
+
|
439
|
+
class HnswVectorIndex(Index):
|
440
|
+
"""
|
441
|
+
SQLAlchemy Index for HNSW vector columns with MatrixOne-specific syntax.
|
442
|
+
|
443
|
+
Specialized class for HNSW vector indexes with type safety and clear API.
|
444
|
+
|
445
|
+
Usage Examples
|
446
|
+
|
447
|
+
1. Class Methods (Recommended for one-time operations):
|
448
|
+
|
449
|
+
# Create index using class method
|
450
|
+
success = HnswVectorIndex.create_index(
|
451
|
+
engine=engine,
|
452
|
+
table_name='my_table',
|
453
|
+
name='idx_embedding',
|
454
|
+
column='embedding',
|
455
|
+
m=16,
|
456
|
+
ef_construction=200,
|
457
|
+
ef_search=50,
|
458
|
+
op_type=VectorOpType.VECTOR_L2_OPS
|
459
|
+
)
|
460
|
+
|
461
|
+
# Drop index using class method
|
462
|
+
success = HnswVectorIndex.drop_index(
|
463
|
+
engine=engine,
|
464
|
+
table_name='my_table',
|
465
|
+
name='idx_embedding'
|
466
|
+
)
|
467
|
+
|
468
|
+
# Create index within existing transaction
|
469
|
+
with engine.begin() as conn:
|
470
|
+
success = HnswVectorIndex.create_index_in_transaction(
|
471
|
+
connection=conn,
|
472
|
+
table_name='my_table',
|
473
|
+
name='idx_embedding',
|
474
|
+
column='embedding',
|
475
|
+
m=16,
|
476
|
+
ef_construction=200,
|
477
|
+
ef_search=50
|
478
|
+
)
|
479
|
+
|
480
|
+
# Drop index within existing transaction
|
481
|
+
with engine.begin() as conn:
|
482
|
+
success = HnswVectorIndex.drop_index_in_transaction(
|
483
|
+
connection=conn,
|
484
|
+
table_name='my_table',
|
485
|
+
name='idx_embedding'
|
486
|
+
)
|
487
|
+
|
488
|
+
2. Instance Methods (Useful for reusable index configurations):
|
489
|
+
|
490
|
+
# Create index object
|
491
|
+
index = HnswVectorIndex('idx_embedding', 'embedding', m=16, ef_construction=200, ef_search=50)
|
492
|
+
|
493
|
+
# Create index using instance method
|
494
|
+
success = index.create(engine, 'my_table')
|
495
|
+
|
496
|
+
# Drop index using instance method
|
497
|
+
success = index.drop(engine, 'my_table')
|
498
|
+
|
499
|
+
# Create index within existing transaction
|
500
|
+
with engine.begin() as conn:
|
501
|
+
success = index.create_in_transaction(conn, 'my_table')
|
502
|
+
|
503
|
+
# Drop index within existing transaction
|
504
|
+
with engine.begin() as conn:
|
505
|
+
success = index.drop_in_transaction(conn, 'my_table')
|
506
|
+
|
507
|
+
3. SQLAlchemy ORM Integration:
|
508
|
+
|
509
|
+
# In table definition (requires BigInteger primary key for HNSW)
|
510
|
+
class Document(Base):
|
511
|
+
__tablename__ = 'documents'
|
512
|
+
id = Column(BigInteger, primary_key=True) # BigInteger required for HNSW
|
513
|
+
embedding = create_vector_column(128, "f32")
|
514
|
+
|
515
|
+
# Note: For ORM integration, create table first, then create index separately
|
516
|
+
# __table_args__ = (HnswVectorIndex('idx_embedding', 'embedding', m=16),)
|
517
|
+
|
518
|
+
# Create table first
|
519
|
+
Base.metadata.create_all(engine)
|
520
|
+
|
521
|
+
# Then create index separately
|
522
|
+
HnswVectorIndex.create_index(engine, 'documents', 'idx_embedding', 'embedding', m=16)
|
523
|
+
|
524
|
+
4. Client Chain Operations:
|
525
|
+
|
526
|
+
# Using client.vector_index.create_hnsw() method
|
527
|
+
client.vector_index.create_hnsw('my_table', 'idx_embedding', 'embedding', m=16, ef_construction=200)
|
528
|
+
|
529
|
+
# Using client.vector_index.create_hnsw_in_transaction() method
|
530
|
+
with client.transaction() as tx:
|
531
|
+
client.vector_index.create_hnsw_in_transaction(
|
532
|
+
'my_table', 'idx_embedding', 'embedding', tx.connection, m=16
|
533
|
+
)
|
534
|
+
|
535
|
+
Parameters:
|
536
|
+
name (str): Index name
|
537
|
+
column (Union[str, Column]): Vector column to index
|
538
|
+
m (int): Number of bi-directional links for HNSW (default: 16)
|
539
|
+
ef_construction (int): Size of dynamic candidate list for HNSW construction (default: 200)
|
540
|
+
ef_search (int): Size of dynamic candidate list for HNSW search (default: 50)
|
541
|
+
op_type (str): Vector operation type (default: vector_l2_ops)
|
542
|
+
**kwargs: Additional index parameters
|
543
|
+
|
544
|
+
Note:
|
545
|
+
|
546
|
+
- MatrixOne supports only ONE index per vector column
|
547
|
+
- Enable HNSW indexing before creating HNSW indexes: SET experimental_hnsw_index = 1
|
548
|
+
- HNSW indexes require BigInteger primary key in the table
|
549
|
+
- Higher M values provide better recall but slower construction
|
550
|
+
- Higher ef_construction provides better index quality but slower construction
|
551
|
+
- Higher ef_search provides better recall but slower search
|
552
|
+
"""
|
553
|
+
|
554
|
+
def __init__(
|
555
|
+
self,
|
556
|
+
name: str,
|
557
|
+
column: Union[str, Column],
|
558
|
+
m: int = 16,
|
559
|
+
ef_construction: int = 200,
|
560
|
+
ef_search: int = 50,
|
561
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
562
|
+
**kwargs,
|
563
|
+
):
|
564
|
+
"""
|
565
|
+
Initialize HnswVectorIndex.
|
566
|
+
|
567
|
+
Args::
|
568
|
+
|
569
|
+
name: Index name
|
570
|
+
column: Vector column to index
|
571
|
+
m: Number of bi-directional links for HNSW (default: 16)
|
572
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
|
573
|
+
ef_search: Size of dynamic candidate list for HNSW search (default: 50)
|
574
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
575
|
+
**kwargs: Additional index parameters
|
576
|
+
"""
|
577
|
+
self.index_type = VectorIndexType.HNSW
|
578
|
+
self.m = m
|
579
|
+
self.ef_construction = ef_construction
|
580
|
+
self.ef_search = ef_search
|
581
|
+
self.op_type = op_type
|
582
|
+
|
583
|
+
# Store column name for later use
|
584
|
+
self._column_name = str(column) if not isinstance(column, str) else column
|
585
|
+
|
586
|
+
# Call parent constructor first
|
587
|
+
super().__init__(name, column, **kwargs)
|
588
|
+
|
589
|
+
# Set dialect options after initialization to bind to matrixone dialect
|
590
|
+
self.dialect_options["matrixone"] = {"length": None, "using": None}
|
591
|
+
# Also provide mysql fallback for compatibility
|
592
|
+
self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
|
593
|
+
|
594
|
+
def _create_index_sql(self, table_name: str) -> str:
|
595
|
+
"""Generate the CREATE INDEX SQL for HNSW vector index."""
|
596
|
+
column_name = self._column_name
|
597
|
+
sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
|
598
|
+
sql_parts.append(f"M {self.m}")
|
599
|
+
sql_parts.append(f"EF_CONSTRUCTION {self.ef_construction}")
|
600
|
+
sql_parts.append(f"EF_SEARCH {self.ef_search}")
|
601
|
+
sql_parts.append(f"op_type '{self.op_type}'")
|
602
|
+
return " ".join(sql_parts)
|
603
|
+
|
604
|
+
def create_sql(self, table_name: str) -> str:
|
605
|
+
"""Generate CREATE INDEX SQL for the given table name."""
|
606
|
+
return self._create_index_sql(table_name)
|
607
|
+
|
608
|
+
def drop_sql(self, table_name: str) -> str:
|
609
|
+
"""Generate DROP INDEX SQL for the given table name."""
|
610
|
+
return f"DROP INDEX {self.name} ON {table_name}"
|
611
|
+
|
612
|
+
@classmethod
|
613
|
+
def create_index(
|
614
|
+
cls,
|
615
|
+
engine,
|
616
|
+
table_name: str,
|
617
|
+
name: str,
|
618
|
+
column: Union[str, Column],
|
619
|
+
m: int = 16,
|
620
|
+
ef_construction: int = 200,
|
621
|
+
ef_search: int = 50,
|
622
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
623
|
+
**kwargs,
|
624
|
+
) -> bool:
|
625
|
+
"""
|
626
|
+
Create an HNSW vector index using ORM-style method.
|
627
|
+
|
628
|
+
Args::
|
629
|
+
|
630
|
+
engine: SQLAlchemy engine
|
631
|
+
table_name: Name of the table
|
632
|
+
name: Name of the index
|
633
|
+
column: Vector column to index
|
634
|
+
m: Number of bi-directional links for HNSW (default: 16)
|
635
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
|
636
|
+
ef_search: Size of dynamic candidate list for HNSW search (default: 50)
|
637
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
638
|
+
**kwargs: Additional index parameters
|
639
|
+
|
640
|
+
Returns::
|
641
|
+
|
642
|
+
bool: True if successful, False otherwise
|
643
|
+
"""
|
644
|
+
try:
|
645
|
+
index = cls(name, column, m, ef_construction, ef_search, op_type, **kwargs)
|
646
|
+
sql = index.create_sql(table_name)
|
647
|
+
|
648
|
+
with engine.begin() as conn:
|
649
|
+
# Enable HNSW indexing
|
650
|
+
_exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
|
651
|
+
_exec_sql_safe(conn, sql)
|
652
|
+
return True
|
653
|
+
except Exception as e:
|
654
|
+
print(f"Failed to create HNSW vector index: {e}")
|
655
|
+
return False
|
656
|
+
|
657
|
+
@classmethod
|
658
|
+
def create_index_in_transaction(
|
659
|
+
cls,
|
660
|
+
connection,
|
661
|
+
table_name: str,
|
662
|
+
name: str,
|
663
|
+
column: Union[str, Column],
|
664
|
+
m: int = 16,
|
665
|
+
ef_construction: int = 200,
|
666
|
+
ef_search: int = 50,
|
667
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
668
|
+
**kwargs,
|
669
|
+
) -> bool:
|
670
|
+
"""
|
671
|
+
Create an HNSW vector index within an existing transaction.
|
672
|
+
|
673
|
+
Args::
|
674
|
+
|
675
|
+
connection: SQLAlchemy connection object
|
676
|
+
table_name: Name of the table
|
677
|
+
name: Name of the index
|
678
|
+
column: Vector column to index
|
679
|
+
m: Number of bi-directional links for HNSW (default: 16)
|
680
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
|
681
|
+
ef_search: Size of dynamic candidate list for HNSW search (default: 50)
|
682
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
683
|
+
**kwargs: Additional index parameters
|
684
|
+
|
685
|
+
Returns::
|
686
|
+
|
687
|
+
bool: True if successful, False otherwise
|
688
|
+
"""
|
689
|
+
try:
|
690
|
+
index = cls(name, column, m, ef_construction, ef_search, op_type, **kwargs)
|
691
|
+
sql = index.create_sql(table_name)
|
692
|
+
|
693
|
+
# Enable HNSW indexing
|
694
|
+
_exec_sql_safe(connection, "SET experimental_hnsw_index = 1")
|
695
|
+
_exec_sql_safe(connection, sql)
|
696
|
+
return True
|
697
|
+
except Exception as e:
|
698
|
+
print(f"Failed to create HNSW vector index in transaction: {e}")
|
699
|
+
return False
|
700
|
+
|
701
|
+
@classmethod
|
702
|
+
def drop_index(cls, engine, table_name: str, name: str) -> bool:
|
703
|
+
"""
|
704
|
+
Drop an HNSW vector index using ORM-style method.
|
705
|
+
|
706
|
+
Args::
|
707
|
+
|
708
|
+
engine: SQLAlchemy engine
|
709
|
+
table_name: Name of the table
|
710
|
+
name: Name of the index to drop
|
711
|
+
|
712
|
+
Returns::
|
713
|
+
|
714
|
+
bool: True if successful, False otherwise
|
715
|
+
"""
|
716
|
+
try:
|
717
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
718
|
+
with engine.begin() as conn:
|
719
|
+
_exec_sql_safe(conn, sql)
|
720
|
+
return True
|
721
|
+
except Exception as e:
|
722
|
+
print(f"Failed to drop HNSW vector index: {e}")
|
723
|
+
return False
|
724
|
+
|
725
|
+
@classmethod
|
726
|
+
def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
|
727
|
+
"""
|
728
|
+
Drop an HNSW vector index within an existing transaction.
|
729
|
+
|
730
|
+
Args::
|
731
|
+
|
732
|
+
connection: SQLAlchemy connection object
|
733
|
+
table_name: Name of the table
|
734
|
+
name: Name of the index to drop
|
735
|
+
|
736
|
+
Returns::
|
737
|
+
|
738
|
+
bool: True if successful, False otherwise
|
739
|
+
"""
|
740
|
+
try:
|
741
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
742
|
+
_exec_sql_safe(connection, sql)
|
743
|
+
return True
|
744
|
+
except Exception as e:
|
745
|
+
print(f"Failed to drop HNSW vector index in transaction: {e}")
|
746
|
+
return False
|
747
|
+
|
748
|
+
def create(self, engine, table_name: str) -> bool:
|
749
|
+
"""
|
750
|
+
Create this HNSW vector index using ORM-style method.
|
751
|
+
|
752
|
+
Args::
|
753
|
+
|
754
|
+
engine: SQLAlchemy engine
|
755
|
+
table_name: Name of the table
|
756
|
+
|
757
|
+
Returns::
|
758
|
+
|
759
|
+
bool: True if successful, False otherwise
|
760
|
+
"""
|
761
|
+
try:
|
762
|
+
sql = self.create_sql(table_name)
|
763
|
+
|
764
|
+
with engine.begin() as conn:
|
765
|
+
# Enable HNSW indexing
|
766
|
+
_exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
|
767
|
+
_exec_sql_safe(conn, sql)
|
768
|
+
return True
|
769
|
+
except Exception as e:
|
770
|
+
print(f"Failed to create HNSW vector index: {e}")
|
771
|
+
return False
|
772
|
+
|
773
|
+
def drop(self, engine, table_name: str) -> bool:
|
774
|
+
"""
|
775
|
+
Drop this HNSW vector index using ORM-style method.
|
776
|
+
|
777
|
+
Args::
|
778
|
+
|
779
|
+
engine: SQLAlchemy engine
|
780
|
+
table_name: Name of the table
|
781
|
+
|
782
|
+
Returns::
|
783
|
+
|
784
|
+
bool: True if successful, False otherwise
|
785
|
+
"""
|
786
|
+
try:
|
787
|
+
sql = self.drop_sql(table_name)
|
788
|
+
with engine.begin() as conn:
|
789
|
+
_exec_sql_safe(conn, sql)
|
790
|
+
return True
|
791
|
+
except Exception as e:
|
792
|
+
print(f"Failed to drop HNSW vector index: {e}")
|
793
|
+
return False
|
794
|
+
|
795
|
+
def create_in_transaction(self, connection, table_name: str) -> bool:
|
796
|
+
"""
|
797
|
+
Create this HNSW vector index within an existing transaction.
|
798
|
+
|
799
|
+
Args::
|
800
|
+
|
801
|
+
connection: SQLAlchemy connection object
|
802
|
+
table_name: Name of the table
|
803
|
+
|
804
|
+
Returns::
|
805
|
+
|
806
|
+
bool: True if successful, False otherwise
|
807
|
+
"""
|
808
|
+
try:
|
809
|
+
sql = self.create_sql(table_name)
|
810
|
+
|
811
|
+
# Enable HNSW indexing
|
812
|
+
_exec_sql_safe(connection, "SET experimental_hnsw_index = 1")
|
813
|
+
_exec_sql_safe(connection, sql)
|
814
|
+
return True
|
815
|
+
except Exception as e:
|
816
|
+
print(f"Failed to create HNSW vector index in transaction: {e}")
|
817
|
+
return False
|
818
|
+
|
819
|
+
def drop_in_transaction(self, connection, table_name: str) -> bool:
|
820
|
+
"""
|
821
|
+
Drop this HNSW vector index within an existing transaction.
|
822
|
+
|
823
|
+
Args::
|
824
|
+
|
825
|
+
connection: SQLAlchemy connection object
|
826
|
+
table_name: Name of the table
|
827
|
+
|
828
|
+
Returns::
|
829
|
+
|
830
|
+
bool: True if successful, False otherwise
|
831
|
+
"""
|
832
|
+
try:
|
833
|
+
sql = self.drop_sql(table_name)
|
834
|
+
_exec_sql_safe(connection, sql)
|
835
|
+
return True
|
836
|
+
except Exception as e:
|
837
|
+
print(f"Failed to drop HNSW vector index in transaction: {e}")
|
838
|
+
return False
|
839
|
+
|
840
|
+
|
841
|
+
class VectorIndex(Index):
|
842
|
+
"""
|
843
|
+
SQLAlchemy Index for vector columns with MatrixOne-specific syntax.
|
844
|
+
|
845
|
+
This class provides a generic interface for creating vector indexes with various
|
846
|
+
algorithms and operation types. It supports both IVF (Inverted File) and HNSW
|
847
|
+
(Hierarchical Navigable Small World) indexing algorithms.
|
848
|
+
|
849
|
+
Key Features:
|
850
|
+
|
851
|
+
- Support for multiple vector indexing algorithms (IVF, HNSW)
|
852
|
+
- Configurable operation types (L2 distance, cosine similarity, inner product)
|
853
|
+
- Automatic SQL generation for index creation and management
|
854
|
+
- Integration with MatrixOne's vector search capabilities
|
855
|
+
- Support for both class methods and instance methods
|
856
|
+
|
857
|
+
Supported Index Types:
|
858
|
+
- IVF (Inverted File): Good for large datasets, requires training
|
859
|
+
- HNSW: Good for high-dimensional vectors, no training required
|
860
|
+
|
861
|
+
Supported Operation Types:
|
862
|
+
- VECTOR_L2_OPS: L2 (Euclidean) distance
|
863
|
+
- VECTOR_COSINE_OPS: Cosine similarity
|
864
|
+
- VECTOR_INNER_PRODUCT_OPS: Inner product similarity
|
865
|
+
|
866
|
+
Usage Examples::
|
867
|
+
|
868
|
+
# Create IVF index
|
869
|
+
index = VectorIndex(
|
870
|
+
name='vec_idx_ivf',
|
871
|
+
column='embedding',
|
872
|
+
index_type=VectorIndexType.IVFFLAT,
|
873
|
+
lists=100,
|
874
|
+
op_type=VectorOpType.VECTOR_L2_OPS
|
875
|
+
)
|
876
|
+
|
877
|
+
# Create HNSW index
|
878
|
+
index = VectorIndex(
|
879
|
+
name='vec_idx_hnsw',
|
880
|
+
column='embedding',
|
881
|
+
index_type=VectorIndexType.HNSW,
|
882
|
+
m=16,
|
883
|
+
ef_construction=200,
|
884
|
+
op_type=VectorOpType.VECTOR_COSINE_OPS
|
885
|
+
)
|
886
|
+
|
887
|
+
Note: This is the legacy generic class. For better type safety and specific
|
888
|
+
algorithm features, consider using IVFVectorIndex or HnswVectorIndex instead.
|
889
|
+
"""
|
890
|
+
|
891
|
+
def __init__(
|
892
|
+
self,
|
893
|
+
name: str,
|
894
|
+
column: Union[str, Column],
|
895
|
+
index_type: str = VectorIndexType.IVFFLAT,
|
896
|
+
lists: Optional[int] = None,
|
897
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
898
|
+
# HNSW parameters
|
899
|
+
m: Optional[int] = None,
|
900
|
+
ef_construction: Optional[int] = None,
|
901
|
+
ef_search: Optional[int] = None,
|
902
|
+
**kwargs,
|
903
|
+
):
|
904
|
+
"""
|
905
|
+
Initialize VectorIndex.
|
906
|
+
|
907
|
+
Args::
|
908
|
+
|
909
|
+
name: Index name
|
910
|
+
column: Vector column to index
|
911
|
+
index_type: Type of vector index (ivfflat, hnsw, etc.)
|
912
|
+
lists: Number of lists for IVFFLAT (optional)
|
913
|
+
op_type: Vector operation type
|
914
|
+
m: Number of bi-directional links for HNSW (optional)
|
915
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (optional)
|
916
|
+
ef_search: Size of dynamic candidate list for HNSW search (optional)
|
917
|
+
**kwargs: Additional index parameters
|
918
|
+
"""
|
919
|
+
self.index_type = index_type
|
920
|
+
self.lists = lists
|
921
|
+
self.op_type = op_type
|
922
|
+
# HNSW parameters
|
923
|
+
self.m = m
|
924
|
+
self.ef_construction = ef_construction
|
925
|
+
self.ef_search = ef_search
|
926
|
+
|
927
|
+
# Store column name for later use
|
928
|
+
self._column_name = str(column) if not isinstance(column, str) else column
|
929
|
+
|
930
|
+
# Call parent constructor first
|
931
|
+
super().__init__(name, column, **kwargs)
|
932
|
+
|
933
|
+
# Set dialect options after initialization to bind to matrixone dialect
|
934
|
+
self.dialect_options["matrixone"] = {"length": None, "using": None}
|
935
|
+
# Also provide mysql fallback for compatibility
|
936
|
+
self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
|
937
|
+
|
938
|
+
def _create_index_sql(self, table_name: str) -> str:
|
939
|
+
"""Generate the CREATE INDEX SQL for vector index."""
|
940
|
+
# For simplicity, we'll use the column name passed during initialization
|
941
|
+
# This should be stored as a string in most cases
|
942
|
+
column_name = self._column_name
|
943
|
+
|
944
|
+
sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
|
945
|
+
|
946
|
+
# Add parameters based on index type
|
947
|
+
if self.index_type == VectorIndexType.IVFFLAT and self.lists is not None:
|
948
|
+
sql_parts.append(f"lists = {self.lists}")
|
949
|
+
elif self.index_type == VectorIndexType.HNSW:
|
950
|
+
# Add HNSW parameters
|
951
|
+
if self.m is not None:
|
952
|
+
sql_parts.append(f"M {self.m}")
|
953
|
+
if self.ef_construction is not None:
|
954
|
+
sql_parts.append(f"EF_CONSTRUCTION {self.ef_construction}")
|
955
|
+
if self.ef_search is not None:
|
956
|
+
sql_parts.append(f"EF_SEARCH {self.ef_search}")
|
957
|
+
|
958
|
+
# Add operation type
|
959
|
+
sql_parts.append(f"op_type '{self.op_type}'")
|
960
|
+
|
961
|
+
return " ".join(sql_parts)
|
962
|
+
|
963
|
+
def create_sql(self, table_name: str) -> str:
|
964
|
+
"""Generate CREATE INDEX SQL for the given table name."""
|
965
|
+
return self._create_index_sql(table_name)
|
966
|
+
|
967
|
+
def drop_sql(self, table_name: str) -> str:
|
968
|
+
"""Generate DROP INDEX SQL for the given table name."""
|
969
|
+
return f"DROP INDEX {self.name} ON {table_name}"
|
970
|
+
|
971
|
+
@classmethod
|
972
|
+
def create_index(
|
973
|
+
cls,
|
974
|
+
engine,
|
975
|
+
table_name: str,
|
976
|
+
name: str,
|
977
|
+
column: Union[str, Column],
|
978
|
+
index_type: str = VectorIndexType.IVFFLAT,
|
979
|
+
lists: Optional[int] = None,
|
980
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
981
|
+
# HNSW parameters
|
982
|
+
m: Optional[int] = None,
|
983
|
+
ef_construction: Optional[int] = None,
|
984
|
+
ef_search: Optional[int] = None,
|
985
|
+
**kwargs,
|
986
|
+
) -> bool:
|
987
|
+
"""
|
988
|
+
Create a vector index using ORM-style method.
|
989
|
+
|
990
|
+
Args::
|
991
|
+
|
992
|
+
engine: SQLAlchemy engine
|
993
|
+
table_name: Name of the table
|
994
|
+
name: Name of the index
|
995
|
+
column: Vector column to index
|
996
|
+
index_type: Type of vector index (ivfflat, hnsw, etc.)
|
997
|
+
lists: Number of lists for IVFFLAT (optional)
|
998
|
+
op_type: Vector operation type
|
999
|
+
m: Number of bi-directional links for HNSW (optional)
|
1000
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (optional)
|
1001
|
+
ef_search: Size of dynamic candidate list for HNSW search (optional)
|
1002
|
+
**kwargs: Additional index parameters
|
1003
|
+
|
1004
|
+
Returns::
|
1005
|
+
|
1006
|
+
bool: True if successful, False otherwise
|
1007
|
+
"""
|
1008
|
+
try:
|
1009
|
+
index = cls(name, column, index_type, lists, op_type, m, ef_construction, ef_search, **kwargs)
|
1010
|
+
sql = index.create_sql(table_name)
|
1011
|
+
|
1012
|
+
with engine.begin() as conn:
|
1013
|
+
# Enable appropriate indexing in the same connection
|
1014
|
+
if index_type == VectorIndexType.IVFFLAT:
|
1015
|
+
_exec_sql_safe(conn, "SET experimental_ivf_index = 1")
|
1016
|
+
_exec_sql_safe(conn, "SET probe_limit = 1")
|
1017
|
+
elif index_type == VectorIndexType.HNSW:
|
1018
|
+
_exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
|
1019
|
+
|
1020
|
+
_exec_sql_safe(conn, sql)
|
1021
|
+
return True
|
1022
|
+
except Exception as e:
|
1023
|
+
print(f"Failed to create vector index: {e}")
|
1024
|
+
return False
|
1025
|
+
|
1026
|
+
@classmethod
|
1027
|
+
def drop_index(cls, engine, table_name: str, name: str) -> bool:
|
1028
|
+
"""
|
1029
|
+
Drop a vector index using ORM-style method.
|
1030
|
+
|
1031
|
+
Args::
|
1032
|
+
|
1033
|
+
engine: SQLAlchemy engine
|
1034
|
+
table_name: Name of the table
|
1035
|
+
name: Name of the index to drop
|
1036
|
+
|
1037
|
+
Returns::
|
1038
|
+
|
1039
|
+
bool: True if successful, False otherwise
|
1040
|
+
"""
|
1041
|
+
try:
|
1042
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
1043
|
+
with engine.begin() as conn:
|
1044
|
+
_exec_sql_safe(conn, sql)
|
1045
|
+
return True
|
1046
|
+
except Exception as e:
|
1047
|
+
print(f"Failed to drop vector index: {e}")
|
1048
|
+
return False
|
1049
|
+
|
1050
|
+
def create(self, engine, table_name: str) -> bool:
|
1051
|
+
"""
|
1052
|
+
Create this vector index using ORM-style method.
|
1053
|
+
|
1054
|
+
Args::
|
1055
|
+
|
1056
|
+
engine: SQLAlchemy engine
|
1057
|
+
table_name: Name of the table
|
1058
|
+
|
1059
|
+
Returns::
|
1060
|
+
|
1061
|
+
bool: True if successful, False otherwise
|
1062
|
+
"""
|
1063
|
+
return self.__class__.create_index(
|
1064
|
+
engine,
|
1065
|
+
table_name,
|
1066
|
+
self.name,
|
1067
|
+
self._column_name,
|
1068
|
+
self.index_type,
|
1069
|
+
self.lists,
|
1070
|
+
self.op_type,
|
1071
|
+
self.m,
|
1072
|
+
self.ef_construction,
|
1073
|
+
self.ef_search,
|
1074
|
+
)
|
1075
|
+
|
1076
|
+
def drop(self, engine, table_name: str) -> bool:
|
1077
|
+
"""
|
1078
|
+
Drop this vector index using ORM-style method.
|
1079
|
+
|
1080
|
+
Args::
|
1081
|
+
|
1082
|
+
engine: SQLAlchemy engine
|
1083
|
+
table_name: Name of the table
|
1084
|
+
|
1085
|
+
Returns::
|
1086
|
+
|
1087
|
+
bool: True if successful, False otherwise
|
1088
|
+
"""
|
1089
|
+
return self.__class__.drop_index(engine, table_name, self.name)
|
1090
|
+
|
1091
|
+
@classmethod
|
1092
|
+
def create_index_in_transaction(
|
1093
|
+
cls,
|
1094
|
+
connection,
|
1095
|
+
table_name: str,
|
1096
|
+
name: str,
|
1097
|
+
column: Union[str, Column],
|
1098
|
+
index_type: str = VectorIndexType.IVFFLAT,
|
1099
|
+
lists: Optional[int] = None,
|
1100
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
1101
|
+
# HNSW parameters
|
1102
|
+
m: Optional[int] = None,
|
1103
|
+
ef_construction: Optional[int] = None,
|
1104
|
+
ef_search: Optional[int] = None,
|
1105
|
+
**kwargs,
|
1106
|
+
) -> bool:
|
1107
|
+
"""
|
1108
|
+
Create a vector index within an existing transaction.
|
1109
|
+
|
1110
|
+
Args::
|
1111
|
+
|
1112
|
+
connection: SQLAlchemy connection (within a transaction)
|
1113
|
+
table_name: Name of the table
|
1114
|
+
name: Name of the index
|
1115
|
+
column: Vector column to index
|
1116
|
+
index_type: Type of vector index (ivfflat, hnsw, etc.)
|
1117
|
+
lists: Number of lists for IVFFLAT (optional)
|
1118
|
+
op_type: Vector operation type
|
1119
|
+
m: Number of bi-directional links for HNSW (optional)
|
1120
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (optional)
|
1121
|
+
ef_search: Size of dynamic candidate list for HNSW search (optional)
|
1122
|
+
**kwargs: Additional index parameters
|
1123
|
+
|
1124
|
+
Returns::
|
1125
|
+
|
1126
|
+
bool: True if successful, False otherwise
|
1127
|
+
"""
|
1128
|
+
try:
|
1129
|
+
index = cls(name, column, index_type, lists, op_type, m, ef_construction, ef_search, **kwargs)
|
1130
|
+
sql = index.create_sql(table_name)
|
1131
|
+
|
1132
|
+
# Note: Indexing should be enabled before calling this method
|
1133
|
+
# The SET statements are removed to avoid interfering with transaction rollback
|
1134
|
+
|
1135
|
+
_exec_sql_safe(connection, sql)
|
1136
|
+
return True
|
1137
|
+
except Exception as e:
|
1138
|
+
print(f"Failed to create vector index in transaction: {e}")
|
1139
|
+
# Re-raise the exception to ensure transaction rollback
|
1140
|
+
raise
|
1141
|
+
|
1142
|
+
@classmethod
|
1143
|
+
def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
|
1144
|
+
"""
|
1145
|
+
Drop a vector index within an existing transaction.
|
1146
|
+
|
1147
|
+
Args::
|
1148
|
+
|
1149
|
+
connection: SQLAlchemy connection (within a transaction)
|
1150
|
+
table_name: Name of the table
|
1151
|
+
name: Name of the index to drop
|
1152
|
+
|
1153
|
+
Returns::
|
1154
|
+
|
1155
|
+
bool: True if successful, False otherwise
|
1156
|
+
"""
|
1157
|
+
try:
|
1158
|
+
sql = f"DROP INDEX {name} ON {table_name}"
|
1159
|
+
_exec_sql_safe(connection, sql)
|
1160
|
+
return True
|
1161
|
+
except Exception as e:
|
1162
|
+
print(f"Failed to drop vector index in transaction: {e}")
|
1163
|
+
return False
|
1164
|
+
|
1165
|
+
def create_in_transaction(self, connection, table_name: str) -> bool:
|
1166
|
+
"""
|
1167
|
+
Create this vector index within an existing transaction.
|
1168
|
+
|
1169
|
+
Args::
|
1170
|
+
|
1171
|
+
connection: SQLAlchemy connection (within a transaction)
|
1172
|
+
table_name: Name of the table
|
1173
|
+
|
1174
|
+
Returns::
|
1175
|
+
|
1176
|
+
bool: True if successful, False otherwise
|
1177
|
+
"""
|
1178
|
+
return self.__class__.create_index_in_transaction(
|
1179
|
+
connection,
|
1180
|
+
table_name,
|
1181
|
+
self.name,
|
1182
|
+
self._column_name,
|
1183
|
+
self.index_type,
|
1184
|
+
self.lists,
|
1185
|
+
self.op_type,
|
1186
|
+
self.m,
|
1187
|
+
self.ef_construction,
|
1188
|
+
self.ef_search,
|
1189
|
+
)
|
1190
|
+
|
1191
|
+
def drop_in_transaction(self, connection, table_name: str) -> bool:
|
1192
|
+
"""
|
1193
|
+
Drop this vector index within an existing transaction.
|
1194
|
+
|
1195
|
+
Args::
|
1196
|
+
|
1197
|
+
connection: SQLAlchemy connection (within a transaction)
|
1198
|
+
table_name: Name of the table
|
1199
|
+
|
1200
|
+
Returns::
|
1201
|
+
|
1202
|
+
bool: True if successful, False otherwise
|
1203
|
+
"""
|
1204
|
+
return self.__class__.drop_index_in_transaction(connection, table_name, self.name)
|
1205
|
+
|
1206
|
+
|
1207
|
+
class CreateVectorIndex(DDLElement):
|
1208
|
+
"""DDL element for creating vector indexes."""
|
1209
|
+
|
1210
|
+
def __init__(self, index: VectorIndex, if_not_exists: bool = False):
|
1211
|
+
self.index = index
|
1212
|
+
self.if_not_exists = if_not_exists
|
1213
|
+
|
1214
|
+
|
1215
|
+
@compiles(CreateVectorIndex)
|
1216
|
+
def compile_create_vector_index(element: CreateVectorIndex, compiler, **kw):
|
1217
|
+
"""Compile CREATE VECTOR INDEX statement."""
|
1218
|
+
index = element.index
|
1219
|
+
|
1220
|
+
# Use the stored column name
|
1221
|
+
column_name = index._column_name
|
1222
|
+
|
1223
|
+
sql_parts = ["CREATE INDEX"]
|
1224
|
+
|
1225
|
+
if element.if_not_exists:
|
1226
|
+
sql_parts.append("IF NOT EXISTS")
|
1227
|
+
|
1228
|
+
sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
|
1229
|
+
|
1230
|
+
# Add parameters based on index type
|
1231
|
+
if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
|
1232
|
+
sql_parts.append(f"lists = {index.lists}")
|
1233
|
+
elif index.index_type == VectorIndexType.HNSW:
|
1234
|
+
# Add HNSW parameters
|
1235
|
+
if index.m is not None:
|
1236
|
+
sql_parts.append(f"M {index.m}")
|
1237
|
+
if index.ef_construction is not None:
|
1238
|
+
sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
|
1239
|
+
if index.ef_search is not None:
|
1240
|
+
sql_parts.append(f"EF_SEARCH {index.ef_search}")
|
1241
|
+
|
1242
|
+
# Add operation type
|
1243
|
+
sql_parts.append(f"op_type '{index.op_type}'")
|
1244
|
+
|
1245
|
+
return " ".join(sql_parts)
|
1246
|
+
|
1247
|
+
|
1248
|
+
@compiles(SQLAlchemyCreateIndex, "matrixone")
|
1249
|
+
def compile_create_vector_index_matrixone(element: SQLAlchemyCreateIndex, compiler, **kw):
|
1250
|
+
"""Compile CREATE INDEX for VectorIndex on MatrixOne dialect."""
|
1251
|
+
index = element.element
|
1252
|
+
|
1253
|
+
# Check if this is a VectorIndex
|
1254
|
+
if isinstance(index, VectorIndex):
|
1255
|
+
# Use the stored column name
|
1256
|
+
column_name = index._column_name
|
1257
|
+
|
1258
|
+
sql_parts = ["CREATE INDEX"]
|
1259
|
+
|
1260
|
+
if element.if_not_exists:
|
1261
|
+
sql_parts.append("IF NOT EXISTS")
|
1262
|
+
|
1263
|
+
sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
|
1264
|
+
|
1265
|
+
# Add parameters based on index type
|
1266
|
+
if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
|
1267
|
+
sql_parts.append(f"lists = {index.lists}")
|
1268
|
+
elif index.index_type == VectorIndexType.HNSW:
|
1269
|
+
# Add HNSW parameters
|
1270
|
+
if index.m is not None:
|
1271
|
+
sql_parts.append(f"M {index.m}")
|
1272
|
+
if index.ef_construction is not None:
|
1273
|
+
sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
|
1274
|
+
if index.ef_search is not None:
|
1275
|
+
sql_parts.append(f"EF_SEARCH {index.ef_search}")
|
1276
|
+
|
1277
|
+
# Add operation type
|
1278
|
+
sql_parts.append(f"op_type '{index.op_type}'")
|
1279
|
+
|
1280
|
+
return " ".join(sql_parts)
|
1281
|
+
else:
|
1282
|
+
# Fall back to default compilation
|
1283
|
+
return compiler.visit_create_index(element, **kw)
|
1284
|
+
|
1285
|
+
|
1286
|
+
@compiles(SQLAlchemyCreateIndex, "mysql")
|
1287
|
+
def compile_create_vector_index_mysql(element: SQLAlchemyCreateIndex, compiler, **kw):
|
1288
|
+
"""Compile CREATE INDEX for VectorIndex on MySQL dialect."""
|
1289
|
+
index = element.element
|
1290
|
+
|
1291
|
+
# Check if this is a VectorIndex
|
1292
|
+
if isinstance(index, VectorIndex):
|
1293
|
+
# Use the stored column name
|
1294
|
+
column_name = index._column_name
|
1295
|
+
|
1296
|
+
sql_parts = ["CREATE INDEX"]
|
1297
|
+
|
1298
|
+
if element.if_not_exists:
|
1299
|
+
sql_parts.append("IF NOT EXISTS")
|
1300
|
+
|
1301
|
+
sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
|
1302
|
+
|
1303
|
+
# Add parameters based on index type
|
1304
|
+
if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
|
1305
|
+
sql_parts.append(f"lists = {index.lists}")
|
1306
|
+
elif index.index_type == VectorIndexType.HNSW:
|
1307
|
+
# Add HNSW parameters
|
1308
|
+
if index.m is not None:
|
1309
|
+
sql_parts.append(f"M {index.m}")
|
1310
|
+
if index.ef_construction is not None:
|
1311
|
+
sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
|
1312
|
+
if index.ef_search is not None:
|
1313
|
+
sql_parts.append(f"EF_SEARCH {index.ef_search}")
|
1314
|
+
|
1315
|
+
# Add operation type
|
1316
|
+
sql_parts.append(f"op_type '{index.op_type}'")
|
1317
|
+
|
1318
|
+
return " ".join(sql_parts)
|
1319
|
+
else:
|
1320
|
+
# Fall back to default MySQL index compilation
|
1321
|
+
return compiler.visit_create_index(element, **kw)
|
1322
|
+
|
1323
|
+
|
1324
|
+
def create_vector_index(
|
1325
|
+
name: str,
|
1326
|
+
column: Union[str, Column],
|
1327
|
+
index_type: str = VectorIndexType.IVFFLAT,
|
1328
|
+
lists: Optional[int] = None,
|
1329
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
1330
|
+
# HNSW parameters
|
1331
|
+
m: Optional[int] = None,
|
1332
|
+
ef_construction: Optional[int] = None,
|
1333
|
+
ef_search: Optional[int] = None,
|
1334
|
+
**kwargs,
|
1335
|
+
) -> VectorIndex:
|
1336
|
+
"""
|
1337
|
+
Create a vector index.
|
1338
|
+
|
1339
|
+
Args::
|
1340
|
+
|
1341
|
+
name: Index name
|
1342
|
+
column: Vector column to index
|
1343
|
+
index_type: Type of vector index (ivfflat, hnsw, etc.)
|
1344
|
+
lists: Number of lists for IVFFLAT (optional)
|
1345
|
+
op_type: Vector operation type
|
1346
|
+
m: Number of bi-directional links for HNSW (optional)
|
1347
|
+
ef_construction: Size of dynamic candidate list for HNSW construction (optional)
|
1348
|
+
ef_search: Size of dynamic candidate list for HNSW search (optional)
|
1349
|
+
**kwargs: Additional index parameters
|
1350
|
+
|
1351
|
+
Returns::
|
1352
|
+
|
1353
|
+
VectorIndex instance
|
1354
|
+
|
1355
|
+
Example
|
1356
|
+
# Create IVFFLAT index with 256 lists
|
1357
|
+
idx = create_vector_index(
|
1358
|
+
"idx_vector_l2",
|
1359
|
+
"embedding",
|
1360
|
+
index_type="ivfflat",
|
1361
|
+
lists=256,
|
1362
|
+
op_type="vector_l2_ops"
|
1363
|
+
)
|
1364
|
+
|
1365
|
+
# Create HNSW index with custom parameters
|
1366
|
+
idx = create_vector_index(
|
1367
|
+
"idx_vector_hnsw",
|
1368
|
+
"embedding",
|
1369
|
+
index_type="hnsw",
|
1370
|
+
m=48,
|
1371
|
+
ef_construction=64,
|
1372
|
+
ef_search=64,
|
1373
|
+
op_type="vector_l2_ops"
|
1374
|
+
)
|
1375
|
+
"""
|
1376
|
+
return VectorIndex(
|
1377
|
+
name=name,
|
1378
|
+
column=column,
|
1379
|
+
index_type=index_type,
|
1380
|
+
lists=lists,
|
1381
|
+
op_type=op_type,
|
1382
|
+
m=m,
|
1383
|
+
ef_construction=ef_construction,
|
1384
|
+
ef_search=ef_search,
|
1385
|
+
**kwargs,
|
1386
|
+
)
|
1387
|
+
|
1388
|
+
|
1389
|
+
def create_ivfflat_index(
|
1390
|
+
name: str,
|
1391
|
+
column: Union[str, Column],
|
1392
|
+
lists: int = 256,
|
1393
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
1394
|
+
**kwargs,
|
1395
|
+
) -> VectorIndex:
|
1396
|
+
"""
|
1397
|
+
Create an IVFFLAT vector index.
|
1398
|
+
|
1399
|
+
Args::
|
1400
|
+
|
1401
|
+
name: Index name
|
1402
|
+
column: Vector column to index
|
1403
|
+
lists: Number of lists (default: 256)
|
1404
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
1405
|
+
**kwargs: Additional index parameters
|
1406
|
+
|
1407
|
+
Returns::
|
1408
|
+
|
1409
|
+
VectorIndex instance
|
1410
|
+
|
1411
|
+
Example
|
1412
|
+
# Create IVFFLAT index with 256 lists for L2 distance
|
1413
|
+
idx = create_ivfflat_index("idx_embedding_l2", "embedding", lists=256)
|
1414
|
+
|
1415
|
+
# Create IVFFLAT index with 128 lists for cosine similarity
|
1416
|
+
idx = create_ivfflat_index(
|
1417
|
+
"idx_embedding_cosine",
|
1418
|
+
"embedding",
|
1419
|
+
lists=128,
|
1420
|
+
op_type="vector_cosine_ops"
|
1421
|
+
)
|
1422
|
+
"""
|
1423
|
+
return create_vector_index(
|
1424
|
+
name=name,
|
1425
|
+
column=column,
|
1426
|
+
index_type=VectorIndexType.IVFFLAT,
|
1427
|
+
lists=lists,
|
1428
|
+
op_type=op_type,
|
1429
|
+
**kwargs,
|
1430
|
+
)
|
1431
|
+
|
1432
|
+
|
1433
|
+
def create_hnsw_index(
|
1434
|
+
name: str,
|
1435
|
+
column: Union[str, Column],
|
1436
|
+
m: int = 48,
|
1437
|
+
ef_construction: int = 64,
|
1438
|
+
ef_search: int = 64,
|
1439
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
1440
|
+
**kwargs,
|
1441
|
+
) -> VectorIndex:
|
1442
|
+
"""
|
1443
|
+
Create an HNSW vector index.
|
1444
|
+
|
1445
|
+
Args::
|
1446
|
+
|
1447
|
+
name: Index name
|
1448
|
+
column: Vector column to index
|
1449
|
+
m: Number of bi-directional links (default: 48)
|
1450
|
+
ef_construction: Size of dynamic candidate list for construction (default: 64)
|
1451
|
+
ef_search: Size of dynamic candidate list for search (default: 64)
|
1452
|
+
op_type: Vector operation type (default: vector_l2_ops)
|
1453
|
+
**kwargs: Additional index parameters
|
1454
|
+
|
1455
|
+
Returns::
|
1456
|
+
|
1457
|
+
VectorIndex instance
|
1458
|
+
|
1459
|
+
Example
|
1460
|
+
# Create HNSW index with default parameters
|
1461
|
+
idx = create_hnsw_index("idx_embedding_hnsw", "embedding")
|
1462
|
+
|
1463
|
+
# Create HNSW index with custom parameters
|
1464
|
+
idx = create_hnsw_index(
|
1465
|
+
"idx_embedding_hnsw_custom",
|
1466
|
+
"embedding",
|
1467
|
+
m=32,
|
1468
|
+
ef_construction=128,
|
1469
|
+
ef_search=128,
|
1470
|
+
op_type="vector_cosine_ops"
|
1471
|
+
)
|
1472
|
+
"""
|
1473
|
+
return create_vector_index(
|
1474
|
+
name=name,
|
1475
|
+
column=column,
|
1476
|
+
index_type=VectorIndexType.HNSW,
|
1477
|
+
m=m,
|
1478
|
+
ef_construction=ef_construction,
|
1479
|
+
ef_search=ef_search,
|
1480
|
+
op_type=op_type,
|
1481
|
+
**kwargs,
|
1482
|
+
)
|
1483
|
+
|
1484
|
+
|
1485
|
+
class VectorIndexBuilder:
|
1486
|
+
"""
|
1487
|
+
Builder class for creating vector indexes with different configurations.
|
1488
|
+
"""
|
1489
|
+
|
1490
|
+
def __init__(self, column: Union[str, Column]):
|
1491
|
+
"""
|
1492
|
+
Initialize VectorIndexBuilder.
|
1493
|
+
|
1494
|
+
Args::
|
1495
|
+
|
1496
|
+
column: Vector column to index
|
1497
|
+
"""
|
1498
|
+
self.column = column
|
1499
|
+
self._indexes = []
|
1500
|
+
|
1501
|
+
def ivfflat(
|
1502
|
+
self, name: str, lists: int = 256, op_type: str = VectorOpType.VECTOR_L2_OPS, **kwargs
|
1503
|
+
) -> "VectorIndexBuilder":
|
1504
|
+
"""
|
1505
|
+
Add an IVFFLAT index.
|
1506
|
+
|
1507
|
+
Args::
|
1508
|
+
|
1509
|
+
name: Index name
|
1510
|
+
lists: Number of lists
|
1511
|
+
op_type: Vector operation type
|
1512
|
+
**kwargs: Additional parameters
|
1513
|
+
|
1514
|
+
Returns::
|
1515
|
+
|
1516
|
+
Self for method chaining
|
1517
|
+
"""
|
1518
|
+
index = create_ivfflat_index(name, self.column, lists, op_type, **kwargs)
|
1519
|
+
self._indexes.append(index)
|
1520
|
+
return self
|
1521
|
+
|
1522
|
+
def l2_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
|
1523
|
+
"""
|
1524
|
+
Add an L2 distance index.
|
1525
|
+
|
1526
|
+
Args::
|
1527
|
+
|
1528
|
+
name: Index name
|
1529
|
+
lists: Number of lists for IVFFLAT
|
1530
|
+
**kwargs: Additional parameters
|
1531
|
+
|
1532
|
+
Returns::
|
1533
|
+
|
1534
|
+
Self for method chaining
|
1535
|
+
"""
|
1536
|
+
return self.ivfflat(name, lists, VectorOpType.VECTOR_L2_OPS, **kwargs)
|
1537
|
+
|
1538
|
+
def cosine_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
|
1539
|
+
"""
|
1540
|
+
Add a cosine similarity index.
|
1541
|
+
|
1542
|
+
Args::
|
1543
|
+
|
1544
|
+
name: Index name
|
1545
|
+
lists: Number of lists for IVFFLAT
|
1546
|
+
**kwargs: Additional parameters
|
1547
|
+
|
1548
|
+
Returns::
|
1549
|
+
|
1550
|
+
Self for method chaining
|
1551
|
+
"""
|
1552
|
+
return self.ivfflat(name, lists, VectorOpType.VECTOR_COSINE_OPS, **kwargs)
|
1553
|
+
|
1554
|
+
def ip_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
|
1555
|
+
"""
|
1556
|
+
Add an inner product index.
|
1557
|
+
|
1558
|
+
Args::
|
1559
|
+
|
1560
|
+
name: Index name
|
1561
|
+
lists: Number of lists for IVFFLAT
|
1562
|
+
**kwargs: Additional parameters
|
1563
|
+
|
1564
|
+
Returns::
|
1565
|
+
|
1566
|
+
Self for method chaining
|
1567
|
+
"""
|
1568
|
+
return self.ivfflat(name, lists, VectorOpType.VECTOR_IP_OPS, **kwargs)
|
1569
|
+
|
1570
|
+
def hnsw(
|
1571
|
+
self,
|
1572
|
+
name: str,
|
1573
|
+
m: int = 48,
|
1574
|
+
ef_construction: int = 64,
|
1575
|
+
ef_search: int = 64,
|
1576
|
+
op_type: str = VectorOpType.VECTOR_L2_OPS,
|
1577
|
+
**kwargs,
|
1578
|
+
) -> "VectorIndexBuilder":
|
1579
|
+
"""
|
1580
|
+
Add an HNSW index.
|
1581
|
+
|
1582
|
+
Args::
|
1583
|
+
|
1584
|
+
name: Index name
|
1585
|
+
m: Number of bi-directional links
|
1586
|
+
ef_construction: Size of dynamic candidate list for construction
|
1587
|
+
ef_search: Size of dynamic candidate list for search
|
1588
|
+
op_type: Vector operation type
|
1589
|
+
**kwargs: Additional parameters
|
1590
|
+
|
1591
|
+
Returns::
|
1592
|
+
|
1593
|
+
Self for method chaining
|
1594
|
+
"""
|
1595
|
+
index = create_hnsw_index(name, self.column, m, ef_construction, ef_search, op_type, **kwargs)
|
1596
|
+
self._indexes.append(index)
|
1597
|
+
return self
|
1598
|
+
|
1599
|
+
def hnsw_l2_index(
|
1600
|
+
self,
|
1601
|
+
name: str,
|
1602
|
+
m: int = 48,
|
1603
|
+
ef_construction: int = 64,
|
1604
|
+
ef_search: int = 64,
|
1605
|
+
**kwargs,
|
1606
|
+
) -> "VectorIndexBuilder":
|
1607
|
+
"""
|
1608
|
+
Add an HNSW L2 distance index.
|
1609
|
+
|
1610
|
+
Args::
|
1611
|
+
|
1612
|
+
name: Index name
|
1613
|
+
m: Number of bi-directional links
|
1614
|
+
ef_construction: Size of dynamic candidate list for construction
|
1615
|
+
ef_search: Size of dynamic candidate list for search
|
1616
|
+
**kwargs: Additional parameters
|
1617
|
+
|
1618
|
+
Returns::
|
1619
|
+
|
1620
|
+
Self for method chaining
|
1621
|
+
"""
|
1622
|
+
return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_L2_OPS, **kwargs)
|
1623
|
+
|
1624
|
+
def hnsw_cosine_index(
|
1625
|
+
self,
|
1626
|
+
name: str,
|
1627
|
+
m: int = 48,
|
1628
|
+
ef_construction: int = 64,
|
1629
|
+
ef_search: int = 64,
|
1630
|
+
**kwargs,
|
1631
|
+
) -> "VectorIndexBuilder":
|
1632
|
+
"""
|
1633
|
+
Add an HNSW cosine similarity index.
|
1634
|
+
|
1635
|
+
Args::
|
1636
|
+
|
1637
|
+
name: Index name
|
1638
|
+
m: Number of bi-directional links
|
1639
|
+
ef_construction: Size of dynamic candidate list for construction
|
1640
|
+
ef_search: Size of dynamic candidate list for search
|
1641
|
+
**kwargs: Additional parameters
|
1642
|
+
|
1643
|
+
Returns::
|
1644
|
+
|
1645
|
+
Self for method chaining
|
1646
|
+
"""
|
1647
|
+
return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_COSINE_OPS, **kwargs)
|
1648
|
+
|
1649
|
+
def hnsw_ip_index(
|
1650
|
+
self,
|
1651
|
+
name: str,
|
1652
|
+
m: int = 48,
|
1653
|
+
ef_construction: int = 64,
|
1654
|
+
ef_search: int = 64,
|
1655
|
+
**kwargs,
|
1656
|
+
) -> "VectorIndexBuilder":
|
1657
|
+
"""
|
1658
|
+
Add an HNSW inner product index.
|
1659
|
+
|
1660
|
+
Args::
|
1661
|
+
|
1662
|
+
name: Index name
|
1663
|
+
m: Number of bi-directional links
|
1664
|
+
ef_construction: Size of dynamic candidate list for construction
|
1665
|
+
ef_search: Size of dynamic candidate list for search
|
1666
|
+
**kwargs: Additional parameters
|
1667
|
+
|
1668
|
+
Returns::
|
1669
|
+
|
1670
|
+
Self for method chaining
|
1671
|
+
"""
|
1672
|
+
return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_IP_OPS, **kwargs)
|
1673
|
+
|
1674
|
+
def build(self) -> List[VectorIndex]:
|
1675
|
+
"""
|
1676
|
+
Build and return the list of vector indexes.
|
1677
|
+
|
1678
|
+
Returns::
|
1679
|
+
|
1680
|
+
List of VectorIndex instances
|
1681
|
+
"""
|
1682
|
+
return self._indexes.copy()
|
1683
|
+
|
1684
|
+
def add_to_table(self, table) -> "VectorIndexBuilder":
|
1685
|
+
"""
|
1686
|
+
Add indexes to a table.
|
1687
|
+
|
1688
|
+
Args::
|
1689
|
+
|
1690
|
+
table: SQLAlchemy Table instance
|
1691
|
+
|
1692
|
+
Returns::
|
1693
|
+
|
1694
|
+
Self for method chaining
|
1695
|
+
"""
|
1696
|
+
for index in self._indexes:
|
1697
|
+
index.table = table
|
1698
|
+
table.indexes.add(index)
|
1699
|
+
return self
|
1700
|
+
|
1701
|
+
|
1702
|
+
def vector_index_builder(column: Union[str, Column]) -> VectorIndexBuilder:
|
1703
|
+
"""
|
1704
|
+
Create a VectorIndexBuilder for a column.
|
1705
|
+
|
1706
|
+
Args::
|
1707
|
+
|
1708
|
+
column: Vector column to index
|
1709
|
+
|
1710
|
+
Returns::
|
1711
|
+
|
1712
|
+
VectorIndexBuilder instance
|
1713
|
+
|
1714
|
+
Example
|
1715
|
+
# Create multiple indexes for a vector column
|
1716
|
+
indexes = vector_index_builder("embedding") \
|
1717
|
+
.l2_index("idx_l2", lists=256) \
|
1718
|
+
.cosine_index("idx_cosine", lists=128) \
|
1719
|
+
.build()
|
1720
|
+
"""
|
1721
|
+
return VectorIndexBuilder(column)
|