matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,1721 @@
1
+ # Copyright 2021 - 2022 Matrix Origin
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Vector index support for SQLAlchemy integration with MatrixOne.
17
+ """
18
+
19
+ from typing import List, Optional, Union
20
+
21
+ from sqlalchemy import Column, Index, text
22
+ from sqlalchemy.ext.compiler import compiles
23
+ from sqlalchemy.schema import DDLElement
24
+ from sqlalchemy.sql.ddl import CreateIndex as SQLAlchemyCreateIndex
25
+
26
+
27
+ def _exec_sql_safe(connection, sql: str):
28
+ """
29
+ Execute SQL safely, bypassing SQLAlchemy's bind parameter parsing.
30
+
31
+ This prevents JSON strings like {"a":1} from being incorrectly parsed as :1 bind params.
32
+ Uses exec_driver_sql() when available, falls back to text() for testing/compatibility.
33
+ """
34
+ if hasattr(connection, 'exec_driver_sql'):
35
+ # Escape % to %% for pymysql's format string handling
36
+ escaped_sql = sql.replace('%', '%%')
37
+ return connection.exec_driver_sql(escaped_sql)
38
+ else:
39
+ # Fallback for testing or older SQLAlchemy versions
40
+ return connection.execute(text(sql))
41
+
42
+
43
+ class VectorIndexType:
44
+ """Enum-like class for vector index types."""
45
+
46
+ IVFFLAT = "ivfflat"
47
+ HNSW = "hnsw" # Future support
48
+
49
+
50
+ class VectorOpType:
51
+ """Enum-like class for vector operation types."""
52
+
53
+ VECTOR_L2_OPS = "vector_l2_ops"
54
+ VECTOR_IP_OPS = "vector_ip_ops"
55
+ VECTOR_COSINE_OPS = "vector_cosine_ops"
56
+
57
+
58
+ class IVFVectorIndex(Index):
59
+ """
60
+ SQLAlchemy Index for IVFFLAT vector columns with MatrixOne-specific syntax.
61
+
62
+ Specialized class for IVFFLAT vector indexes with type safety and clear API.
63
+
64
+ Usage Examples
65
+
66
+ 1. Class Methods (Recommended for one-time operations):
67
+
68
+ # Create index using class method
69
+ success = IVFVectorIndex.create_index(
70
+ engine=engine,
71
+ table_name='my_table',
72
+ name='idx_embedding',
73
+ column='embedding',
74
+ lists=100,
75
+ op_type=VectorOpType.VECTOR_L2_OPS
76
+ )
77
+
78
+ # Drop index using class method
79
+ success = IVFVectorIndex.drop_index(
80
+ engine=engine,
81
+ table_name='my_table',
82
+ name='idx_embedding'
83
+ )
84
+
85
+ # Create index within existing transaction
86
+ with engine.begin() as conn:
87
+ success = IVFVectorIndex.create_index_in_transaction(
88
+ connection=conn,
89
+ table_name='my_table',
90
+ name='idx_embedding',
91
+ column='embedding',
92
+ lists=100
93
+ )
94
+
95
+ # Drop index within existing transaction
96
+ with engine.begin() as conn:
97
+ success = IVFVectorIndex.drop_index_in_transaction(
98
+ connection=conn,
99
+ table_name='my_table',
100
+ name='idx_embedding'
101
+ )
102
+
103
+ 2. Instance Methods (Useful for reusable index configurations):
104
+
105
+ # Create index object
106
+ index = IVFVectorIndex('idx_embedding', 'embedding', lists=100)
107
+
108
+ # Create index using instance method
109
+ success = index.create(engine, 'my_table')
110
+
111
+ # Drop index using instance method
112
+ success = index.drop(engine, 'my_table')
113
+
114
+ # Create index within existing transaction
115
+ with engine.begin() as conn:
116
+ success = index.create_in_transaction(conn, 'my_table')
117
+
118
+ # Drop index within existing transaction
119
+ with engine.begin() as conn:
120
+ success = index.drop_in_transaction(conn, 'my_table')
121
+
122
+ 3. SQLAlchemy ORM Integration:
123
+
124
+ # In table definition
125
+ class Document(Base):
126
+ __tablename__ = 'documents'
127
+ id = Column(Integer, primary_key=True)
128
+ embedding = create_vector_column(128, "f32")
129
+
130
+ # Note: For ORM integration, create table first, then create index separately
131
+ # __table_args__ = (IVFVectorIndex('idx_embedding', 'embedding', lists=100),)
132
+
133
+ # Create table first
134
+ Base.metadata.create_all(engine)
135
+
136
+ # Then create index separately
137
+ IVFVectorIndex.create_index(engine, 'documents', 'idx_embedding', 'embedding', lists=100)
138
+
139
+ 4. Client Chain Operations:
140
+
141
+ # Using client.vector_index.create_ivf() method
142
+ client.vector_index.create_ivf('my_table', 'idx_embedding', 'embedding', lists=100)
143
+
144
+ # Using client.vector_index.create_ivf_in_transaction() method
145
+ with client.transaction() as tx:
146
+ client.vector_index.create_ivf_in_transaction(
147
+ 'my_table', 'idx_embedding', 'embedding', tx.connection, lists=100
148
+ )
149
+
150
+ Parameters:
151
+ name (str): Index name
152
+ column (Union[str, Column]): Vector column to index
153
+ lists (int): Number of lists for IVFFLAT (default: 100)
154
+ op_type (str): Vector operation type (default: vector_l2_ops)
155
+ **kwargs: Additional index parameters
156
+
157
+ Note:
158
+
159
+ - MatrixOne supports only ONE index per vector column
160
+ - Enable IVF indexing before creating IVFFLAT indexes: SET experimental_ivf_index = 1
161
+ - Set probe limit for search: SET probe_limit = 1
162
+ """
163
+
164
+ def __init__(
165
+ self,
166
+ name: str,
167
+ column: Union[str, Column],
168
+ lists: int = 100,
169
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
170
+ **kwargs,
171
+ ):
172
+ """
173
+ Initialize IVFVectorIndex.
174
+
175
+ Args::
176
+
177
+ name: Index name
178
+ column: Vector column to index
179
+ lists: Number of lists for IVFFLAT (default: 100)
180
+ op_type: Vector operation type (default: vector_l2_ops)
181
+ **kwargs: Additional index parameters
182
+ """
183
+ self.index_type = VectorIndexType.IVFFLAT
184
+ self.lists = lists
185
+ self.op_type = op_type
186
+
187
+ # Store column name for later use
188
+ self._column_name = str(column) if not isinstance(column, str) else column
189
+
190
+ # Call parent constructor first
191
+ super().__init__(name, column, **kwargs)
192
+
193
+ # Set dialect options after initialization to bind to matrixone dialect
194
+ self.dialect_options["matrixone"] = {"length": None, "using": None}
195
+ # Also provide mysql fallback for compatibility
196
+ self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
197
+
198
+ def _create_index_sql(self, table_name: str) -> str:
199
+ """Generate the CREATE INDEX SQL for IVFFLAT vector index."""
200
+ column_name = self._column_name
201
+ sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
202
+ sql_parts.append(f"LISTS {self.lists}")
203
+ sql_parts.append(f"op_type '{self.op_type}'")
204
+ return " ".join(sql_parts)
205
+
206
+ def create_sql(self, table_name: str) -> str:
207
+ """Generate CREATE INDEX SQL for the given table name."""
208
+ return self._create_index_sql(table_name)
209
+
210
+ def drop_sql(self, table_name: str) -> str:
211
+ """Generate DROP INDEX SQL for the given table name."""
212
+ return f"DROP INDEX {self.name} ON {table_name}"
213
+
214
+ @classmethod
215
+ def create_index(
216
+ cls,
217
+ engine,
218
+ table_name: str,
219
+ name: str,
220
+ column: Union[str, Column],
221
+ lists: int = 100,
222
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
223
+ **kwargs,
224
+ ) -> bool:
225
+ """
226
+ Create an IVFFLAT vector index using ORM-style method.
227
+
228
+ Args::
229
+
230
+ engine: SQLAlchemy engine
231
+ table_name: Name of the table
232
+ name: Name of the index
233
+ column: Vector column to index
234
+ lists: Number of lists for IVFFLAT (default: 100)
235
+ op_type: Vector operation type (default: vector_l2_ops)
236
+ **kwargs: Additional index parameters
237
+
238
+ Returns::
239
+
240
+ bool: True if successful, False otherwise
241
+ """
242
+ try:
243
+ index = cls(name, column, lists, op_type, **kwargs)
244
+ sql = index.create_sql(table_name)
245
+
246
+ with engine.begin() as conn:
247
+ # Enable IVF indexing
248
+ _exec_sql_safe(conn, "SET experimental_ivf_index = 1")
249
+ _exec_sql_safe(conn, "SET probe_limit = 1")
250
+ _exec_sql_safe(conn, sql)
251
+ return True
252
+ except Exception as e:
253
+ print(f"Failed to create IVFFLAT vector index: {e}")
254
+ return False
255
+
256
+ @classmethod
257
+ def create_index_in_transaction(
258
+ cls,
259
+ connection,
260
+ table_name: str,
261
+ name: str,
262
+ column: Union[str, Column],
263
+ lists: int = 100,
264
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
265
+ **kwargs,
266
+ ) -> bool:
267
+ """
268
+ Create an IVFFLAT vector index within an existing transaction.
269
+
270
+ Args::
271
+
272
+ connection: SQLAlchemy connection object
273
+ table_name: Name of the table
274
+ name: Name of the index
275
+ column: Vector column to index
276
+ lists: Number of lists for IVFFLAT (default: 100)
277
+ op_type: Vector operation type (default: vector_l2_ops)
278
+ **kwargs: Additional index parameters
279
+
280
+ Returns::
281
+
282
+ bool: True if successful, False otherwise
283
+ """
284
+ try:
285
+ index = cls(name, column, lists, op_type, **kwargs)
286
+ sql = index.create_sql(table_name)
287
+
288
+ # Enable IVF indexing
289
+ _exec_sql_safe(connection, "SET experimental_ivf_index = 1")
290
+ _exec_sql_safe(connection, "SET probe_limit = 1")
291
+ _exec_sql_safe(connection, sql)
292
+ return True
293
+ except Exception as e:
294
+ print(f"Failed to create IVFFLAT vector index in transaction: {e}")
295
+ return False
296
+
297
+ @classmethod
298
+ def drop_index(cls, engine, table_name: str, name: str) -> bool:
299
+ """
300
+ Drop an IVFFLAT vector index using ORM-style method.
301
+
302
+ Args::
303
+
304
+ engine: SQLAlchemy engine
305
+ table_name: Name of the table
306
+ name: Name of the index to drop
307
+
308
+ Returns::
309
+
310
+ bool: True if successful, False otherwise
311
+ """
312
+ try:
313
+ sql = f"DROP INDEX {name} ON {table_name}"
314
+ with engine.begin() as conn:
315
+ _exec_sql_safe(conn, sql)
316
+ return True
317
+ except Exception as e:
318
+ print(f"Failed to drop IVFFLAT vector index: {e}")
319
+ return False
320
+
321
+ @classmethod
322
+ def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
323
+ """
324
+ Drop an IVFFLAT vector index within an existing transaction.
325
+
326
+ Args::
327
+
328
+ connection: SQLAlchemy connection object
329
+ table_name: Name of the table
330
+ name: Name of the index to drop
331
+
332
+ Returns::
333
+
334
+ bool: True if successful, False otherwise
335
+ """
336
+ try:
337
+ sql = f"DROP INDEX {name} ON {table_name}"
338
+ _exec_sql_safe(connection, sql)
339
+ return True
340
+ except Exception as e:
341
+ print(f"Failed to drop IVFFLAT vector index in transaction: {e}")
342
+ return False
343
+
344
+ def create(self, engine, table_name: str) -> bool:
345
+ """
346
+ Create this IVFFLAT vector index using ORM-style method.
347
+
348
+ Args::
349
+
350
+ engine: SQLAlchemy engine
351
+ table_name: Name of the table
352
+
353
+ Returns::
354
+
355
+ bool: True if successful, False otherwise
356
+ """
357
+ try:
358
+ sql = self.create_sql(table_name)
359
+
360
+ with engine.begin() as conn:
361
+ # Enable IVF indexing
362
+ _exec_sql_safe(conn, "SET experimental_ivf_index = 1")
363
+ _exec_sql_safe(conn, "SET probe_limit = 1")
364
+ _exec_sql_safe(conn, sql)
365
+ return True
366
+ except Exception as e:
367
+ print(f"Failed to create IVFFLAT vector index: {e}")
368
+ return False
369
+
370
+ def drop(self, engine, table_name: str) -> bool:
371
+ """
372
+ Drop this IVFFLAT vector index using ORM-style method.
373
+
374
+ Args::
375
+
376
+ engine: SQLAlchemy engine
377
+ table_name: Name of the table
378
+
379
+ Returns::
380
+
381
+ bool: True if successful, False otherwise
382
+ """
383
+ try:
384
+ sql = self.drop_sql(table_name)
385
+ with engine.begin() as conn:
386
+ _exec_sql_safe(conn, sql)
387
+ return True
388
+ except Exception as e:
389
+ print(f"Failed to drop IVFFLAT vector index: {e}")
390
+ return False
391
+
392
+ def create_in_transaction(self, connection, table_name: str) -> bool:
393
+ """
394
+ Create this IVFFLAT vector index within an existing transaction.
395
+
396
+ Args::
397
+
398
+ connection: SQLAlchemy connection object
399
+ table_name: Name of the table
400
+
401
+ Returns::
402
+
403
+ bool: True if successful, False otherwise
404
+ """
405
+ try:
406
+ sql = self.create_sql(table_name)
407
+
408
+ # Enable IVF indexing
409
+ _exec_sql_safe(connection, "SET experimental_ivf_index = 1")
410
+ _exec_sql_safe(connection, "SET probe_limit = 1")
411
+ _exec_sql_safe(connection, sql)
412
+ return True
413
+ except Exception as e:
414
+ print(f"Failed to create IVFFLAT vector index in transaction: {e}")
415
+ return False
416
+
417
+ def drop_in_transaction(self, connection, table_name: str) -> bool:
418
+ """
419
+ Drop this IVFFLAT vector index within an existing transaction.
420
+
421
+ Args::
422
+
423
+ connection: SQLAlchemy connection object
424
+ table_name: Name of the table
425
+
426
+ Returns::
427
+
428
+ bool: True if successful, False otherwise
429
+ """
430
+ try:
431
+ sql = self.drop_sql(table_name)
432
+ _exec_sql_safe(connection, sql)
433
+ return True
434
+ except Exception as e:
435
+ print(f"Failed to drop IVFFLAT vector index in transaction: {e}")
436
+ return False
437
+
438
+
439
+ class HnswVectorIndex(Index):
440
+ """
441
+ SQLAlchemy Index for HNSW vector columns with MatrixOne-specific syntax.
442
+
443
+ Specialized class for HNSW vector indexes with type safety and clear API.
444
+
445
+ Usage Examples
446
+
447
+ 1. Class Methods (Recommended for one-time operations):
448
+
449
+ # Create index using class method
450
+ success = HnswVectorIndex.create_index(
451
+ engine=engine,
452
+ table_name='my_table',
453
+ name='idx_embedding',
454
+ column='embedding',
455
+ m=16,
456
+ ef_construction=200,
457
+ ef_search=50,
458
+ op_type=VectorOpType.VECTOR_L2_OPS
459
+ )
460
+
461
+ # Drop index using class method
462
+ success = HnswVectorIndex.drop_index(
463
+ engine=engine,
464
+ table_name='my_table',
465
+ name='idx_embedding'
466
+ )
467
+
468
+ # Create index within existing transaction
469
+ with engine.begin() as conn:
470
+ success = HnswVectorIndex.create_index_in_transaction(
471
+ connection=conn,
472
+ table_name='my_table',
473
+ name='idx_embedding',
474
+ column='embedding',
475
+ m=16,
476
+ ef_construction=200,
477
+ ef_search=50
478
+ )
479
+
480
+ # Drop index within existing transaction
481
+ with engine.begin() as conn:
482
+ success = HnswVectorIndex.drop_index_in_transaction(
483
+ connection=conn,
484
+ table_name='my_table',
485
+ name='idx_embedding'
486
+ )
487
+
488
+ 2. Instance Methods (Useful for reusable index configurations):
489
+
490
+ # Create index object
491
+ index = HnswVectorIndex('idx_embedding', 'embedding', m=16, ef_construction=200, ef_search=50)
492
+
493
+ # Create index using instance method
494
+ success = index.create(engine, 'my_table')
495
+
496
+ # Drop index using instance method
497
+ success = index.drop(engine, 'my_table')
498
+
499
+ # Create index within existing transaction
500
+ with engine.begin() as conn:
501
+ success = index.create_in_transaction(conn, 'my_table')
502
+
503
+ # Drop index within existing transaction
504
+ with engine.begin() as conn:
505
+ success = index.drop_in_transaction(conn, 'my_table')
506
+
507
+ 3. SQLAlchemy ORM Integration:
508
+
509
+ # In table definition (requires BigInteger primary key for HNSW)
510
+ class Document(Base):
511
+ __tablename__ = 'documents'
512
+ id = Column(BigInteger, primary_key=True) # BigInteger required for HNSW
513
+ embedding = create_vector_column(128, "f32")
514
+
515
+ # Note: For ORM integration, create table first, then create index separately
516
+ # __table_args__ = (HnswVectorIndex('idx_embedding', 'embedding', m=16),)
517
+
518
+ # Create table first
519
+ Base.metadata.create_all(engine)
520
+
521
+ # Then create index separately
522
+ HnswVectorIndex.create_index(engine, 'documents', 'idx_embedding', 'embedding', m=16)
523
+
524
+ 4. Client Chain Operations:
525
+
526
+ # Using client.vector_index.create_hnsw() method
527
+ client.vector_index.create_hnsw('my_table', 'idx_embedding', 'embedding', m=16, ef_construction=200)
528
+
529
+ # Using client.vector_index.create_hnsw_in_transaction() method
530
+ with client.transaction() as tx:
531
+ client.vector_index.create_hnsw_in_transaction(
532
+ 'my_table', 'idx_embedding', 'embedding', tx.connection, m=16
533
+ )
534
+
535
+ Parameters:
536
+ name (str): Index name
537
+ column (Union[str, Column]): Vector column to index
538
+ m (int): Number of bi-directional links for HNSW (default: 16)
539
+ ef_construction (int): Size of dynamic candidate list for HNSW construction (default: 200)
540
+ ef_search (int): Size of dynamic candidate list for HNSW search (default: 50)
541
+ op_type (str): Vector operation type (default: vector_l2_ops)
542
+ **kwargs: Additional index parameters
543
+
544
+ Note:
545
+
546
+ - MatrixOne supports only ONE index per vector column
547
+ - Enable HNSW indexing before creating HNSW indexes: SET experimental_hnsw_index = 1
548
+ - HNSW indexes require BigInteger primary key in the table
549
+ - Higher M values provide better recall but slower construction
550
+ - Higher ef_construction provides better index quality but slower construction
551
+ - Higher ef_search provides better recall but slower search
552
+ """
553
+
554
+ def __init__(
555
+ self,
556
+ name: str,
557
+ column: Union[str, Column],
558
+ m: int = 16,
559
+ ef_construction: int = 200,
560
+ ef_search: int = 50,
561
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
562
+ **kwargs,
563
+ ):
564
+ """
565
+ Initialize HnswVectorIndex.
566
+
567
+ Args::
568
+
569
+ name: Index name
570
+ column: Vector column to index
571
+ m: Number of bi-directional links for HNSW (default: 16)
572
+ ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
573
+ ef_search: Size of dynamic candidate list for HNSW search (default: 50)
574
+ op_type: Vector operation type (default: vector_l2_ops)
575
+ **kwargs: Additional index parameters
576
+ """
577
+ self.index_type = VectorIndexType.HNSW
578
+ self.m = m
579
+ self.ef_construction = ef_construction
580
+ self.ef_search = ef_search
581
+ self.op_type = op_type
582
+
583
+ # Store column name for later use
584
+ self._column_name = str(column) if not isinstance(column, str) else column
585
+
586
+ # Call parent constructor first
587
+ super().__init__(name, column, **kwargs)
588
+
589
+ # Set dialect options after initialization to bind to matrixone dialect
590
+ self.dialect_options["matrixone"] = {"length": None, "using": None}
591
+ # Also provide mysql fallback for compatibility
592
+ self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
593
+
594
+ def _create_index_sql(self, table_name: str) -> str:
595
+ """Generate the CREATE INDEX SQL for HNSW vector index."""
596
+ column_name = self._column_name
597
+ sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
598
+ sql_parts.append(f"M {self.m}")
599
+ sql_parts.append(f"EF_CONSTRUCTION {self.ef_construction}")
600
+ sql_parts.append(f"EF_SEARCH {self.ef_search}")
601
+ sql_parts.append(f"op_type '{self.op_type}'")
602
+ return " ".join(sql_parts)
603
+
604
+ def create_sql(self, table_name: str) -> str:
605
+ """Generate CREATE INDEX SQL for the given table name."""
606
+ return self._create_index_sql(table_name)
607
+
608
+ def drop_sql(self, table_name: str) -> str:
609
+ """Generate DROP INDEX SQL for the given table name."""
610
+ return f"DROP INDEX {self.name} ON {table_name}"
611
+
612
+ @classmethod
613
+ def create_index(
614
+ cls,
615
+ engine,
616
+ table_name: str,
617
+ name: str,
618
+ column: Union[str, Column],
619
+ m: int = 16,
620
+ ef_construction: int = 200,
621
+ ef_search: int = 50,
622
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
623
+ **kwargs,
624
+ ) -> bool:
625
+ """
626
+ Create an HNSW vector index using ORM-style method.
627
+
628
+ Args::
629
+
630
+ engine: SQLAlchemy engine
631
+ table_name: Name of the table
632
+ name: Name of the index
633
+ column: Vector column to index
634
+ m: Number of bi-directional links for HNSW (default: 16)
635
+ ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
636
+ ef_search: Size of dynamic candidate list for HNSW search (default: 50)
637
+ op_type: Vector operation type (default: vector_l2_ops)
638
+ **kwargs: Additional index parameters
639
+
640
+ Returns::
641
+
642
+ bool: True if successful, False otherwise
643
+ """
644
+ try:
645
+ index = cls(name, column, m, ef_construction, ef_search, op_type, **kwargs)
646
+ sql = index.create_sql(table_name)
647
+
648
+ with engine.begin() as conn:
649
+ # Enable HNSW indexing
650
+ _exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
651
+ _exec_sql_safe(conn, sql)
652
+ return True
653
+ except Exception as e:
654
+ print(f"Failed to create HNSW vector index: {e}")
655
+ return False
656
+
657
+ @classmethod
658
+ def create_index_in_transaction(
659
+ cls,
660
+ connection,
661
+ table_name: str,
662
+ name: str,
663
+ column: Union[str, Column],
664
+ m: int = 16,
665
+ ef_construction: int = 200,
666
+ ef_search: int = 50,
667
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
668
+ **kwargs,
669
+ ) -> bool:
670
+ """
671
+ Create an HNSW vector index within an existing transaction.
672
+
673
+ Args::
674
+
675
+ connection: SQLAlchemy connection object
676
+ table_name: Name of the table
677
+ name: Name of the index
678
+ column: Vector column to index
679
+ m: Number of bi-directional links for HNSW (default: 16)
680
+ ef_construction: Size of dynamic candidate list for HNSW construction (default: 200)
681
+ ef_search: Size of dynamic candidate list for HNSW search (default: 50)
682
+ op_type: Vector operation type (default: vector_l2_ops)
683
+ **kwargs: Additional index parameters
684
+
685
+ Returns::
686
+
687
+ bool: True if successful, False otherwise
688
+ """
689
+ try:
690
+ index = cls(name, column, m, ef_construction, ef_search, op_type, **kwargs)
691
+ sql = index.create_sql(table_name)
692
+
693
+ # Enable HNSW indexing
694
+ _exec_sql_safe(connection, "SET experimental_hnsw_index = 1")
695
+ _exec_sql_safe(connection, sql)
696
+ return True
697
+ except Exception as e:
698
+ print(f"Failed to create HNSW vector index in transaction: {e}")
699
+ return False
700
+
701
+ @classmethod
702
+ def drop_index(cls, engine, table_name: str, name: str) -> bool:
703
+ """
704
+ Drop an HNSW vector index using ORM-style method.
705
+
706
+ Args::
707
+
708
+ engine: SQLAlchemy engine
709
+ table_name: Name of the table
710
+ name: Name of the index to drop
711
+
712
+ Returns::
713
+
714
+ bool: True if successful, False otherwise
715
+ """
716
+ try:
717
+ sql = f"DROP INDEX {name} ON {table_name}"
718
+ with engine.begin() as conn:
719
+ _exec_sql_safe(conn, sql)
720
+ return True
721
+ except Exception as e:
722
+ print(f"Failed to drop HNSW vector index: {e}")
723
+ return False
724
+
725
+ @classmethod
726
+ def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
727
+ """
728
+ Drop an HNSW vector index within an existing transaction.
729
+
730
+ Args::
731
+
732
+ connection: SQLAlchemy connection object
733
+ table_name: Name of the table
734
+ name: Name of the index to drop
735
+
736
+ Returns::
737
+
738
+ bool: True if successful, False otherwise
739
+ """
740
+ try:
741
+ sql = f"DROP INDEX {name} ON {table_name}"
742
+ _exec_sql_safe(connection, sql)
743
+ return True
744
+ except Exception as e:
745
+ print(f"Failed to drop HNSW vector index in transaction: {e}")
746
+ return False
747
+
748
+ def create(self, engine, table_name: str) -> bool:
749
+ """
750
+ Create this HNSW vector index using ORM-style method.
751
+
752
+ Args::
753
+
754
+ engine: SQLAlchemy engine
755
+ table_name: Name of the table
756
+
757
+ Returns::
758
+
759
+ bool: True if successful, False otherwise
760
+ """
761
+ try:
762
+ sql = self.create_sql(table_name)
763
+
764
+ with engine.begin() as conn:
765
+ # Enable HNSW indexing
766
+ _exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
767
+ _exec_sql_safe(conn, sql)
768
+ return True
769
+ except Exception as e:
770
+ print(f"Failed to create HNSW vector index: {e}")
771
+ return False
772
+
773
+ def drop(self, engine, table_name: str) -> bool:
774
+ """
775
+ Drop this HNSW vector index using ORM-style method.
776
+
777
+ Args::
778
+
779
+ engine: SQLAlchemy engine
780
+ table_name: Name of the table
781
+
782
+ Returns::
783
+
784
+ bool: True if successful, False otherwise
785
+ """
786
+ try:
787
+ sql = self.drop_sql(table_name)
788
+ with engine.begin() as conn:
789
+ _exec_sql_safe(conn, sql)
790
+ return True
791
+ except Exception as e:
792
+ print(f"Failed to drop HNSW vector index: {e}")
793
+ return False
794
+
795
+ def create_in_transaction(self, connection, table_name: str) -> bool:
796
+ """
797
+ Create this HNSW vector index within an existing transaction.
798
+
799
+ Args::
800
+
801
+ connection: SQLAlchemy connection object
802
+ table_name: Name of the table
803
+
804
+ Returns::
805
+
806
+ bool: True if successful, False otherwise
807
+ """
808
+ try:
809
+ sql = self.create_sql(table_name)
810
+
811
+ # Enable HNSW indexing
812
+ _exec_sql_safe(connection, "SET experimental_hnsw_index = 1")
813
+ _exec_sql_safe(connection, sql)
814
+ return True
815
+ except Exception as e:
816
+ print(f"Failed to create HNSW vector index in transaction: {e}")
817
+ return False
818
+
819
+ def drop_in_transaction(self, connection, table_name: str) -> bool:
820
+ """
821
+ Drop this HNSW vector index within an existing transaction.
822
+
823
+ Args::
824
+
825
+ connection: SQLAlchemy connection object
826
+ table_name: Name of the table
827
+
828
+ Returns::
829
+
830
+ bool: True if successful, False otherwise
831
+ """
832
+ try:
833
+ sql = self.drop_sql(table_name)
834
+ _exec_sql_safe(connection, sql)
835
+ return True
836
+ except Exception as e:
837
+ print(f"Failed to drop HNSW vector index in transaction: {e}")
838
+ return False
839
+
840
+
841
+ class VectorIndex(Index):
842
+ """
843
+ SQLAlchemy Index for vector columns with MatrixOne-specific syntax.
844
+
845
+ This class provides a generic interface for creating vector indexes with various
846
+ algorithms and operation types. It supports both IVF (Inverted File) and HNSW
847
+ (Hierarchical Navigable Small World) indexing algorithms.
848
+
849
+ Key Features:
850
+
851
+ - Support for multiple vector indexing algorithms (IVF, HNSW)
852
+ - Configurable operation types (L2 distance, cosine similarity, inner product)
853
+ - Automatic SQL generation for index creation and management
854
+ - Integration with MatrixOne's vector search capabilities
855
+ - Support for both class methods and instance methods
856
+
857
+ Supported Index Types:
858
+ - IVF (Inverted File): Good for large datasets, requires training
859
+ - HNSW: Good for high-dimensional vectors, no training required
860
+
861
+ Supported Operation Types:
862
+ - VECTOR_L2_OPS: L2 (Euclidean) distance
863
+ - VECTOR_COSINE_OPS: Cosine similarity
864
+ - VECTOR_INNER_PRODUCT_OPS: Inner product similarity
865
+
866
+ Usage Examples::
867
+
868
+ # Create IVF index
869
+ index = VectorIndex(
870
+ name='vec_idx_ivf',
871
+ column='embedding',
872
+ index_type=VectorIndexType.IVFFLAT,
873
+ lists=100,
874
+ op_type=VectorOpType.VECTOR_L2_OPS
875
+ )
876
+
877
+ # Create HNSW index
878
+ index = VectorIndex(
879
+ name='vec_idx_hnsw',
880
+ column='embedding',
881
+ index_type=VectorIndexType.HNSW,
882
+ m=16,
883
+ ef_construction=200,
884
+ op_type=VectorOpType.VECTOR_COSINE_OPS
885
+ )
886
+
887
+ Note: This is the legacy generic class. For better type safety and specific
888
+ algorithm features, consider using IVFVectorIndex or HnswVectorIndex instead.
889
+ """
890
+
891
+ def __init__(
892
+ self,
893
+ name: str,
894
+ column: Union[str, Column],
895
+ index_type: str = VectorIndexType.IVFFLAT,
896
+ lists: Optional[int] = None,
897
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
898
+ # HNSW parameters
899
+ m: Optional[int] = None,
900
+ ef_construction: Optional[int] = None,
901
+ ef_search: Optional[int] = None,
902
+ **kwargs,
903
+ ):
904
+ """
905
+ Initialize VectorIndex.
906
+
907
+ Args::
908
+
909
+ name: Index name
910
+ column: Vector column to index
911
+ index_type: Type of vector index (ivfflat, hnsw, etc.)
912
+ lists: Number of lists for IVFFLAT (optional)
913
+ op_type: Vector operation type
914
+ m: Number of bi-directional links for HNSW (optional)
915
+ ef_construction: Size of dynamic candidate list for HNSW construction (optional)
916
+ ef_search: Size of dynamic candidate list for HNSW search (optional)
917
+ **kwargs: Additional index parameters
918
+ """
919
+ self.index_type = index_type
920
+ self.lists = lists
921
+ self.op_type = op_type
922
+ # HNSW parameters
923
+ self.m = m
924
+ self.ef_construction = ef_construction
925
+ self.ef_search = ef_search
926
+
927
+ # Store column name for later use
928
+ self._column_name = str(column) if not isinstance(column, str) else column
929
+
930
+ # Call parent constructor first
931
+ super().__init__(name, column, **kwargs)
932
+
933
+ # Set dialect options after initialization to bind to matrixone dialect
934
+ self.dialect_options["matrixone"] = {"length": None, "using": None}
935
+ # Also provide mysql fallback for compatibility
936
+ self.dialect_options["mysql"] = {"length": None, "using": None, "with_parser": None}
937
+
938
+ def _create_index_sql(self, table_name: str) -> str:
939
+ """Generate the CREATE INDEX SQL for vector index."""
940
+ # For simplicity, we'll use the column name passed during initialization
941
+ # This should be stored as a string in most cases
942
+ column_name = self._column_name
943
+
944
+ sql_parts = [f"CREATE INDEX {self.name} USING {self.index_type} ON {table_name}({column_name})"]
945
+
946
+ # Add parameters based on index type
947
+ if self.index_type == VectorIndexType.IVFFLAT and self.lists is not None:
948
+ sql_parts.append(f"lists = {self.lists}")
949
+ elif self.index_type == VectorIndexType.HNSW:
950
+ # Add HNSW parameters
951
+ if self.m is not None:
952
+ sql_parts.append(f"M {self.m}")
953
+ if self.ef_construction is not None:
954
+ sql_parts.append(f"EF_CONSTRUCTION {self.ef_construction}")
955
+ if self.ef_search is not None:
956
+ sql_parts.append(f"EF_SEARCH {self.ef_search}")
957
+
958
+ # Add operation type
959
+ sql_parts.append(f"op_type '{self.op_type}'")
960
+
961
+ return " ".join(sql_parts)
962
+
963
+ def create_sql(self, table_name: str) -> str:
964
+ """Generate CREATE INDEX SQL for the given table name."""
965
+ return self._create_index_sql(table_name)
966
+
967
+ def drop_sql(self, table_name: str) -> str:
968
+ """Generate DROP INDEX SQL for the given table name."""
969
+ return f"DROP INDEX {self.name} ON {table_name}"
970
+
971
+ @classmethod
972
+ def create_index(
973
+ cls,
974
+ engine,
975
+ table_name: str,
976
+ name: str,
977
+ column: Union[str, Column],
978
+ index_type: str = VectorIndexType.IVFFLAT,
979
+ lists: Optional[int] = None,
980
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
981
+ # HNSW parameters
982
+ m: Optional[int] = None,
983
+ ef_construction: Optional[int] = None,
984
+ ef_search: Optional[int] = None,
985
+ **kwargs,
986
+ ) -> bool:
987
+ """
988
+ Create a vector index using ORM-style method.
989
+
990
+ Args::
991
+
992
+ engine: SQLAlchemy engine
993
+ table_name: Name of the table
994
+ name: Name of the index
995
+ column: Vector column to index
996
+ index_type: Type of vector index (ivfflat, hnsw, etc.)
997
+ lists: Number of lists for IVFFLAT (optional)
998
+ op_type: Vector operation type
999
+ m: Number of bi-directional links for HNSW (optional)
1000
+ ef_construction: Size of dynamic candidate list for HNSW construction (optional)
1001
+ ef_search: Size of dynamic candidate list for HNSW search (optional)
1002
+ **kwargs: Additional index parameters
1003
+
1004
+ Returns::
1005
+
1006
+ bool: True if successful, False otherwise
1007
+ """
1008
+ try:
1009
+ index = cls(name, column, index_type, lists, op_type, m, ef_construction, ef_search, **kwargs)
1010
+ sql = index.create_sql(table_name)
1011
+
1012
+ with engine.begin() as conn:
1013
+ # Enable appropriate indexing in the same connection
1014
+ if index_type == VectorIndexType.IVFFLAT:
1015
+ _exec_sql_safe(conn, "SET experimental_ivf_index = 1")
1016
+ _exec_sql_safe(conn, "SET probe_limit = 1")
1017
+ elif index_type == VectorIndexType.HNSW:
1018
+ _exec_sql_safe(conn, "SET experimental_hnsw_index = 1")
1019
+
1020
+ _exec_sql_safe(conn, sql)
1021
+ return True
1022
+ except Exception as e:
1023
+ print(f"Failed to create vector index: {e}")
1024
+ return False
1025
+
1026
+ @classmethod
1027
+ def drop_index(cls, engine, table_name: str, name: str) -> bool:
1028
+ """
1029
+ Drop a vector index using ORM-style method.
1030
+
1031
+ Args::
1032
+
1033
+ engine: SQLAlchemy engine
1034
+ table_name: Name of the table
1035
+ name: Name of the index to drop
1036
+
1037
+ Returns::
1038
+
1039
+ bool: True if successful, False otherwise
1040
+ """
1041
+ try:
1042
+ sql = f"DROP INDEX {name} ON {table_name}"
1043
+ with engine.begin() as conn:
1044
+ _exec_sql_safe(conn, sql)
1045
+ return True
1046
+ except Exception as e:
1047
+ print(f"Failed to drop vector index: {e}")
1048
+ return False
1049
+
1050
+ def create(self, engine, table_name: str) -> bool:
1051
+ """
1052
+ Create this vector index using ORM-style method.
1053
+
1054
+ Args::
1055
+
1056
+ engine: SQLAlchemy engine
1057
+ table_name: Name of the table
1058
+
1059
+ Returns::
1060
+
1061
+ bool: True if successful, False otherwise
1062
+ """
1063
+ return self.__class__.create_index(
1064
+ engine,
1065
+ table_name,
1066
+ self.name,
1067
+ self._column_name,
1068
+ self.index_type,
1069
+ self.lists,
1070
+ self.op_type,
1071
+ self.m,
1072
+ self.ef_construction,
1073
+ self.ef_search,
1074
+ )
1075
+
1076
+ def drop(self, engine, table_name: str) -> bool:
1077
+ """
1078
+ Drop this vector index using ORM-style method.
1079
+
1080
+ Args::
1081
+
1082
+ engine: SQLAlchemy engine
1083
+ table_name: Name of the table
1084
+
1085
+ Returns::
1086
+
1087
+ bool: True if successful, False otherwise
1088
+ """
1089
+ return self.__class__.drop_index(engine, table_name, self.name)
1090
+
1091
+ @classmethod
1092
+ def create_index_in_transaction(
1093
+ cls,
1094
+ connection,
1095
+ table_name: str,
1096
+ name: str,
1097
+ column: Union[str, Column],
1098
+ index_type: str = VectorIndexType.IVFFLAT,
1099
+ lists: Optional[int] = None,
1100
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
1101
+ # HNSW parameters
1102
+ m: Optional[int] = None,
1103
+ ef_construction: Optional[int] = None,
1104
+ ef_search: Optional[int] = None,
1105
+ **kwargs,
1106
+ ) -> bool:
1107
+ """
1108
+ Create a vector index within an existing transaction.
1109
+
1110
+ Args::
1111
+
1112
+ connection: SQLAlchemy connection (within a transaction)
1113
+ table_name: Name of the table
1114
+ name: Name of the index
1115
+ column: Vector column to index
1116
+ index_type: Type of vector index (ivfflat, hnsw, etc.)
1117
+ lists: Number of lists for IVFFLAT (optional)
1118
+ op_type: Vector operation type
1119
+ m: Number of bi-directional links for HNSW (optional)
1120
+ ef_construction: Size of dynamic candidate list for HNSW construction (optional)
1121
+ ef_search: Size of dynamic candidate list for HNSW search (optional)
1122
+ **kwargs: Additional index parameters
1123
+
1124
+ Returns::
1125
+
1126
+ bool: True if successful, False otherwise
1127
+ """
1128
+ try:
1129
+ index = cls(name, column, index_type, lists, op_type, m, ef_construction, ef_search, **kwargs)
1130
+ sql = index.create_sql(table_name)
1131
+
1132
+ # Note: Indexing should be enabled before calling this method
1133
+ # The SET statements are removed to avoid interfering with transaction rollback
1134
+
1135
+ _exec_sql_safe(connection, sql)
1136
+ return True
1137
+ except Exception as e:
1138
+ print(f"Failed to create vector index in transaction: {e}")
1139
+ # Re-raise the exception to ensure transaction rollback
1140
+ raise
1141
+
1142
+ @classmethod
1143
+ def drop_index_in_transaction(cls, connection, table_name: str, name: str) -> bool:
1144
+ """
1145
+ Drop a vector index within an existing transaction.
1146
+
1147
+ Args::
1148
+
1149
+ connection: SQLAlchemy connection (within a transaction)
1150
+ table_name: Name of the table
1151
+ name: Name of the index to drop
1152
+
1153
+ Returns::
1154
+
1155
+ bool: True if successful, False otherwise
1156
+ """
1157
+ try:
1158
+ sql = f"DROP INDEX {name} ON {table_name}"
1159
+ _exec_sql_safe(connection, sql)
1160
+ return True
1161
+ except Exception as e:
1162
+ print(f"Failed to drop vector index in transaction: {e}")
1163
+ return False
1164
+
1165
+ def create_in_transaction(self, connection, table_name: str) -> bool:
1166
+ """
1167
+ Create this vector index within an existing transaction.
1168
+
1169
+ Args::
1170
+
1171
+ connection: SQLAlchemy connection (within a transaction)
1172
+ table_name: Name of the table
1173
+
1174
+ Returns::
1175
+
1176
+ bool: True if successful, False otherwise
1177
+ """
1178
+ return self.__class__.create_index_in_transaction(
1179
+ connection,
1180
+ table_name,
1181
+ self.name,
1182
+ self._column_name,
1183
+ self.index_type,
1184
+ self.lists,
1185
+ self.op_type,
1186
+ self.m,
1187
+ self.ef_construction,
1188
+ self.ef_search,
1189
+ )
1190
+
1191
+ def drop_in_transaction(self, connection, table_name: str) -> bool:
1192
+ """
1193
+ Drop this vector index within an existing transaction.
1194
+
1195
+ Args::
1196
+
1197
+ connection: SQLAlchemy connection (within a transaction)
1198
+ table_name: Name of the table
1199
+
1200
+ Returns::
1201
+
1202
+ bool: True if successful, False otherwise
1203
+ """
1204
+ return self.__class__.drop_index_in_transaction(connection, table_name, self.name)
1205
+
1206
+
1207
+ class CreateVectorIndex(DDLElement):
1208
+ """DDL element for creating vector indexes."""
1209
+
1210
+ def __init__(self, index: VectorIndex, if_not_exists: bool = False):
1211
+ self.index = index
1212
+ self.if_not_exists = if_not_exists
1213
+
1214
+
1215
+ @compiles(CreateVectorIndex)
1216
+ def compile_create_vector_index(element: CreateVectorIndex, compiler, **kw):
1217
+ """Compile CREATE VECTOR INDEX statement."""
1218
+ index = element.index
1219
+
1220
+ # Use the stored column name
1221
+ column_name = index._column_name
1222
+
1223
+ sql_parts = ["CREATE INDEX"]
1224
+
1225
+ if element.if_not_exists:
1226
+ sql_parts.append("IF NOT EXISTS")
1227
+
1228
+ sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
1229
+
1230
+ # Add parameters based on index type
1231
+ if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
1232
+ sql_parts.append(f"lists = {index.lists}")
1233
+ elif index.index_type == VectorIndexType.HNSW:
1234
+ # Add HNSW parameters
1235
+ if index.m is not None:
1236
+ sql_parts.append(f"M {index.m}")
1237
+ if index.ef_construction is not None:
1238
+ sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
1239
+ if index.ef_search is not None:
1240
+ sql_parts.append(f"EF_SEARCH {index.ef_search}")
1241
+
1242
+ # Add operation type
1243
+ sql_parts.append(f"op_type '{index.op_type}'")
1244
+
1245
+ return " ".join(sql_parts)
1246
+
1247
+
1248
+ @compiles(SQLAlchemyCreateIndex, "matrixone")
1249
+ def compile_create_vector_index_matrixone(element: SQLAlchemyCreateIndex, compiler, **kw):
1250
+ """Compile CREATE INDEX for VectorIndex on MatrixOne dialect."""
1251
+ index = element.element
1252
+
1253
+ # Check if this is a VectorIndex
1254
+ if isinstance(index, VectorIndex):
1255
+ # Use the stored column name
1256
+ column_name = index._column_name
1257
+
1258
+ sql_parts = ["CREATE INDEX"]
1259
+
1260
+ if element.if_not_exists:
1261
+ sql_parts.append("IF NOT EXISTS")
1262
+
1263
+ sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
1264
+
1265
+ # Add parameters based on index type
1266
+ if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
1267
+ sql_parts.append(f"lists = {index.lists}")
1268
+ elif index.index_type == VectorIndexType.HNSW:
1269
+ # Add HNSW parameters
1270
+ if index.m is not None:
1271
+ sql_parts.append(f"M {index.m}")
1272
+ if index.ef_construction is not None:
1273
+ sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
1274
+ if index.ef_search is not None:
1275
+ sql_parts.append(f"EF_SEARCH {index.ef_search}")
1276
+
1277
+ # Add operation type
1278
+ sql_parts.append(f"op_type '{index.op_type}'")
1279
+
1280
+ return " ".join(sql_parts)
1281
+ else:
1282
+ # Fall back to default compilation
1283
+ return compiler.visit_create_index(element, **kw)
1284
+
1285
+
1286
+ @compiles(SQLAlchemyCreateIndex, "mysql")
1287
+ def compile_create_vector_index_mysql(element: SQLAlchemyCreateIndex, compiler, **kw):
1288
+ """Compile CREATE INDEX for VectorIndex on MySQL dialect."""
1289
+ index = element.element
1290
+
1291
+ # Check if this is a VectorIndex
1292
+ if isinstance(index, VectorIndex):
1293
+ # Use the stored column name
1294
+ column_name = index._column_name
1295
+
1296
+ sql_parts = ["CREATE INDEX"]
1297
+
1298
+ if element.if_not_exists:
1299
+ sql_parts.append("IF NOT EXISTS")
1300
+
1301
+ sql_parts.append(f"{index.name} USING {index.index_type} ON {index.table.name}({column_name})")
1302
+
1303
+ # Add parameters based on index type
1304
+ if index.index_type == VectorIndexType.IVFFLAT and index.lists is not None:
1305
+ sql_parts.append(f"lists = {index.lists}")
1306
+ elif index.index_type == VectorIndexType.HNSW:
1307
+ # Add HNSW parameters
1308
+ if index.m is not None:
1309
+ sql_parts.append(f"M {index.m}")
1310
+ if index.ef_construction is not None:
1311
+ sql_parts.append(f"EF_CONSTRUCTION {index.ef_construction}")
1312
+ if index.ef_search is not None:
1313
+ sql_parts.append(f"EF_SEARCH {index.ef_search}")
1314
+
1315
+ # Add operation type
1316
+ sql_parts.append(f"op_type '{index.op_type}'")
1317
+
1318
+ return " ".join(sql_parts)
1319
+ else:
1320
+ # Fall back to default MySQL index compilation
1321
+ return compiler.visit_create_index(element, **kw)
1322
+
1323
+
1324
+ def create_vector_index(
1325
+ name: str,
1326
+ column: Union[str, Column],
1327
+ index_type: str = VectorIndexType.IVFFLAT,
1328
+ lists: Optional[int] = None,
1329
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
1330
+ # HNSW parameters
1331
+ m: Optional[int] = None,
1332
+ ef_construction: Optional[int] = None,
1333
+ ef_search: Optional[int] = None,
1334
+ **kwargs,
1335
+ ) -> VectorIndex:
1336
+ """
1337
+ Create a vector index.
1338
+
1339
+ Args::
1340
+
1341
+ name: Index name
1342
+ column: Vector column to index
1343
+ index_type: Type of vector index (ivfflat, hnsw, etc.)
1344
+ lists: Number of lists for IVFFLAT (optional)
1345
+ op_type: Vector operation type
1346
+ m: Number of bi-directional links for HNSW (optional)
1347
+ ef_construction: Size of dynamic candidate list for HNSW construction (optional)
1348
+ ef_search: Size of dynamic candidate list for HNSW search (optional)
1349
+ **kwargs: Additional index parameters
1350
+
1351
+ Returns::
1352
+
1353
+ VectorIndex instance
1354
+
1355
+ Example
1356
+ # Create IVFFLAT index with 256 lists
1357
+ idx = create_vector_index(
1358
+ "idx_vector_l2",
1359
+ "embedding",
1360
+ index_type="ivfflat",
1361
+ lists=256,
1362
+ op_type="vector_l2_ops"
1363
+ )
1364
+
1365
+ # Create HNSW index with custom parameters
1366
+ idx = create_vector_index(
1367
+ "idx_vector_hnsw",
1368
+ "embedding",
1369
+ index_type="hnsw",
1370
+ m=48,
1371
+ ef_construction=64,
1372
+ ef_search=64,
1373
+ op_type="vector_l2_ops"
1374
+ )
1375
+ """
1376
+ return VectorIndex(
1377
+ name=name,
1378
+ column=column,
1379
+ index_type=index_type,
1380
+ lists=lists,
1381
+ op_type=op_type,
1382
+ m=m,
1383
+ ef_construction=ef_construction,
1384
+ ef_search=ef_search,
1385
+ **kwargs,
1386
+ )
1387
+
1388
+
1389
+ def create_ivfflat_index(
1390
+ name: str,
1391
+ column: Union[str, Column],
1392
+ lists: int = 256,
1393
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
1394
+ **kwargs,
1395
+ ) -> VectorIndex:
1396
+ """
1397
+ Create an IVFFLAT vector index.
1398
+
1399
+ Args::
1400
+
1401
+ name: Index name
1402
+ column: Vector column to index
1403
+ lists: Number of lists (default: 256)
1404
+ op_type: Vector operation type (default: vector_l2_ops)
1405
+ **kwargs: Additional index parameters
1406
+
1407
+ Returns::
1408
+
1409
+ VectorIndex instance
1410
+
1411
+ Example
1412
+ # Create IVFFLAT index with 256 lists for L2 distance
1413
+ idx = create_ivfflat_index("idx_embedding_l2", "embedding", lists=256)
1414
+
1415
+ # Create IVFFLAT index with 128 lists for cosine similarity
1416
+ idx = create_ivfflat_index(
1417
+ "idx_embedding_cosine",
1418
+ "embedding",
1419
+ lists=128,
1420
+ op_type="vector_cosine_ops"
1421
+ )
1422
+ """
1423
+ return create_vector_index(
1424
+ name=name,
1425
+ column=column,
1426
+ index_type=VectorIndexType.IVFFLAT,
1427
+ lists=lists,
1428
+ op_type=op_type,
1429
+ **kwargs,
1430
+ )
1431
+
1432
+
1433
+ def create_hnsw_index(
1434
+ name: str,
1435
+ column: Union[str, Column],
1436
+ m: int = 48,
1437
+ ef_construction: int = 64,
1438
+ ef_search: int = 64,
1439
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
1440
+ **kwargs,
1441
+ ) -> VectorIndex:
1442
+ """
1443
+ Create an HNSW vector index.
1444
+
1445
+ Args::
1446
+
1447
+ name: Index name
1448
+ column: Vector column to index
1449
+ m: Number of bi-directional links (default: 48)
1450
+ ef_construction: Size of dynamic candidate list for construction (default: 64)
1451
+ ef_search: Size of dynamic candidate list for search (default: 64)
1452
+ op_type: Vector operation type (default: vector_l2_ops)
1453
+ **kwargs: Additional index parameters
1454
+
1455
+ Returns::
1456
+
1457
+ VectorIndex instance
1458
+
1459
+ Example
1460
+ # Create HNSW index with default parameters
1461
+ idx = create_hnsw_index("idx_embedding_hnsw", "embedding")
1462
+
1463
+ # Create HNSW index with custom parameters
1464
+ idx = create_hnsw_index(
1465
+ "idx_embedding_hnsw_custom",
1466
+ "embedding",
1467
+ m=32,
1468
+ ef_construction=128,
1469
+ ef_search=128,
1470
+ op_type="vector_cosine_ops"
1471
+ )
1472
+ """
1473
+ return create_vector_index(
1474
+ name=name,
1475
+ column=column,
1476
+ index_type=VectorIndexType.HNSW,
1477
+ m=m,
1478
+ ef_construction=ef_construction,
1479
+ ef_search=ef_search,
1480
+ op_type=op_type,
1481
+ **kwargs,
1482
+ )
1483
+
1484
+
1485
+ class VectorIndexBuilder:
1486
+ """
1487
+ Builder class for creating vector indexes with different configurations.
1488
+ """
1489
+
1490
+ def __init__(self, column: Union[str, Column]):
1491
+ """
1492
+ Initialize VectorIndexBuilder.
1493
+
1494
+ Args::
1495
+
1496
+ column: Vector column to index
1497
+ """
1498
+ self.column = column
1499
+ self._indexes = []
1500
+
1501
+ def ivfflat(
1502
+ self, name: str, lists: int = 256, op_type: str = VectorOpType.VECTOR_L2_OPS, **kwargs
1503
+ ) -> "VectorIndexBuilder":
1504
+ """
1505
+ Add an IVFFLAT index.
1506
+
1507
+ Args::
1508
+
1509
+ name: Index name
1510
+ lists: Number of lists
1511
+ op_type: Vector operation type
1512
+ **kwargs: Additional parameters
1513
+
1514
+ Returns::
1515
+
1516
+ Self for method chaining
1517
+ """
1518
+ index = create_ivfflat_index(name, self.column, lists, op_type, **kwargs)
1519
+ self._indexes.append(index)
1520
+ return self
1521
+
1522
+ def l2_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
1523
+ """
1524
+ Add an L2 distance index.
1525
+
1526
+ Args::
1527
+
1528
+ name: Index name
1529
+ lists: Number of lists for IVFFLAT
1530
+ **kwargs: Additional parameters
1531
+
1532
+ Returns::
1533
+
1534
+ Self for method chaining
1535
+ """
1536
+ return self.ivfflat(name, lists, VectorOpType.VECTOR_L2_OPS, **kwargs)
1537
+
1538
+ def cosine_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
1539
+ """
1540
+ Add a cosine similarity index.
1541
+
1542
+ Args::
1543
+
1544
+ name: Index name
1545
+ lists: Number of lists for IVFFLAT
1546
+ **kwargs: Additional parameters
1547
+
1548
+ Returns::
1549
+
1550
+ Self for method chaining
1551
+ """
1552
+ return self.ivfflat(name, lists, VectorOpType.VECTOR_COSINE_OPS, **kwargs)
1553
+
1554
+ def ip_index(self, name: str, lists: int = 256, **kwargs) -> "VectorIndexBuilder":
1555
+ """
1556
+ Add an inner product index.
1557
+
1558
+ Args::
1559
+
1560
+ name: Index name
1561
+ lists: Number of lists for IVFFLAT
1562
+ **kwargs: Additional parameters
1563
+
1564
+ Returns::
1565
+
1566
+ Self for method chaining
1567
+ """
1568
+ return self.ivfflat(name, lists, VectorOpType.VECTOR_IP_OPS, **kwargs)
1569
+
1570
+ def hnsw(
1571
+ self,
1572
+ name: str,
1573
+ m: int = 48,
1574
+ ef_construction: int = 64,
1575
+ ef_search: int = 64,
1576
+ op_type: str = VectorOpType.VECTOR_L2_OPS,
1577
+ **kwargs,
1578
+ ) -> "VectorIndexBuilder":
1579
+ """
1580
+ Add an HNSW index.
1581
+
1582
+ Args::
1583
+
1584
+ name: Index name
1585
+ m: Number of bi-directional links
1586
+ ef_construction: Size of dynamic candidate list for construction
1587
+ ef_search: Size of dynamic candidate list for search
1588
+ op_type: Vector operation type
1589
+ **kwargs: Additional parameters
1590
+
1591
+ Returns::
1592
+
1593
+ Self for method chaining
1594
+ """
1595
+ index = create_hnsw_index(name, self.column, m, ef_construction, ef_search, op_type, **kwargs)
1596
+ self._indexes.append(index)
1597
+ return self
1598
+
1599
+ def hnsw_l2_index(
1600
+ self,
1601
+ name: str,
1602
+ m: int = 48,
1603
+ ef_construction: int = 64,
1604
+ ef_search: int = 64,
1605
+ **kwargs,
1606
+ ) -> "VectorIndexBuilder":
1607
+ """
1608
+ Add an HNSW L2 distance index.
1609
+
1610
+ Args::
1611
+
1612
+ name: Index name
1613
+ m: Number of bi-directional links
1614
+ ef_construction: Size of dynamic candidate list for construction
1615
+ ef_search: Size of dynamic candidate list for search
1616
+ **kwargs: Additional parameters
1617
+
1618
+ Returns::
1619
+
1620
+ Self for method chaining
1621
+ """
1622
+ return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_L2_OPS, **kwargs)
1623
+
1624
+ def hnsw_cosine_index(
1625
+ self,
1626
+ name: str,
1627
+ m: int = 48,
1628
+ ef_construction: int = 64,
1629
+ ef_search: int = 64,
1630
+ **kwargs,
1631
+ ) -> "VectorIndexBuilder":
1632
+ """
1633
+ Add an HNSW cosine similarity index.
1634
+
1635
+ Args::
1636
+
1637
+ name: Index name
1638
+ m: Number of bi-directional links
1639
+ ef_construction: Size of dynamic candidate list for construction
1640
+ ef_search: Size of dynamic candidate list for search
1641
+ **kwargs: Additional parameters
1642
+
1643
+ Returns::
1644
+
1645
+ Self for method chaining
1646
+ """
1647
+ return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_COSINE_OPS, **kwargs)
1648
+
1649
+ def hnsw_ip_index(
1650
+ self,
1651
+ name: str,
1652
+ m: int = 48,
1653
+ ef_construction: int = 64,
1654
+ ef_search: int = 64,
1655
+ **kwargs,
1656
+ ) -> "VectorIndexBuilder":
1657
+ """
1658
+ Add an HNSW inner product index.
1659
+
1660
+ Args::
1661
+
1662
+ name: Index name
1663
+ m: Number of bi-directional links
1664
+ ef_construction: Size of dynamic candidate list for construction
1665
+ ef_search: Size of dynamic candidate list for search
1666
+ **kwargs: Additional parameters
1667
+
1668
+ Returns::
1669
+
1670
+ Self for method chaining
1671
+ """
1672
+ return self.hnsw(name, m, ef_construction, ef_search, VectorOpType.VECTOR_IP_OPS, **kwargs)
1673
+
1674
+ def build(self) -> List[VectorIndex]:
1675
+ """
1676
+ Build and return the list of vector indexes.
1677
+
1678
+ Returns::
1679
+
1680
+ List of VectorIndex instances
1681
+ """
1682
+ return self._indexes.copy()
1683
+
1684
+ def add_to_table(self, table) -> "VectorIndexBuilder":
1685
+ """
1686
+ Add indexes to a table.
1687
+
1688
+ Args::
1689
+
1690
+ table: SQLAlchemy Table instance
1691
+
1692
+ Returns::
1693
+
1694
+ Self for method chaining
1695
+ """
1696
+ for index in self._indexes:
1697
+ index.table = table
1698
+ table.indexes.add(index)
1699
+ return self
1700
+
1701
+
1702
+ def vector_index_builder(column: Union[str, Column]) -> VectorIndexBuilder:
1703
+ """
1704
+ Create a VectorIndexBuilder for a column.
1705
+
1706
+ Args::
1707
+
1708
+ column: Vector column to index
1709
+
1710
+ Returns::
1711
+
1712
+ VectorIndexBuilder instance
1713
+
1714
+ Example
1715
+ # Create multiple indexes for a vector column
1716
+ indexes = vector_index_builder("embedding") \
1717
+ .l2_index("idx_l2", lists=256) \
1718
+ .cosine_index("idx_cosine", lists=128) \
1719
+ .build()
1720
+ """
1721
+ return VectorIndexBuilder(column)