matrixone-python-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. matrixone/__init__.py +155 -0
  2. matrixone/account.py +723 -0
  3. matrixone/async_client.py +3913 -0
  4. matrixone/async_metadata_manager.py +311 -0
  5. matrixone/async_orm.py +123 -0
  6. matrixone/async_vector_index_manager.py +633 -0
  7. matrixone/base_client.py +208 -0
  8. matrixone/client.py +4672 -0
  9. matrixone/config.py +452 -0
  10. matrixone/connection_hooks.py +286 -0
  11. matrixone/exceptions.py +89 -0
  12. matrixone/logger.py +782 -0
  13. matrixone/metadata.py +820 -0
  14. matrixone/moctl.py +219 -0
  15. matrixone/orm.py +2277 -0
  16. matrixone/pitr.py +646 -0
  17. matrixone/pubsub.py +771 -0
  18. matrixone/restore.py +411 -0
  19. matrixone/search_vector_index.py +1176 -0
  20. matrixone/snapshot.py +550 -0
  21. matrixone/sql_builder.py +844 -0
  22. matrixone/sqlalchemy_ext/__init__.py +161 -0
  23. matrixone/sqlalchemy_ext/adapters.py +163 -0
  24. matrixone/sqlalchemy_ext/dialect.py +534 -0
  25. matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
  26. matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
  27. matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
  28. matrixone/sqlalchemy_ext/ivf_config.py +252 -0
  29. matrixone/sqlalchemy_ext/table_builder.py +351 -0
  30. matrixone/sqlalchemy_ext/vector_index.py +1721 -0
  31. matrixone/sqlalchemy_ext/vector_type.py +948 -0
  32. matrixone/version.py +580 -0
  33. matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
  34. matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
  35. matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
  36. matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
  37. matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
  38. matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
  39. tests/__init__.py +19 -0
  40. tests/offline/__init__.py +20 -0
  41. tests/offline/conftest.py +77 -0
  42. tests/offline/test_account.py +703 -0
  43. tests/offline/test_async_client_query_comprehensive.py +1218 -0
  44. tests/offline/test_basic.py +54 -0
  45. tests/offline/test_case_sensitivity.py +227 -0
  46. tests/offline/test_connection_hooks_offline.py +287 -0
  47. tests/offline/test_dialect_schema_handling.py +609 -0
  48. tests/offline/test_explain_methods.py +346 -0
  49. tests/offline/test_filter_logical_in.py +237 -0
  50. tests/offline/test_fulltext_search_comprehensive.py +795 -0
  51. tests/offline/test_ivf_config.py +249 -0
  52. tests/offline/test_join_methods.py +281 -0
  53. tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
  54. tests/offline/test_logical_in_method.py +237 -0
  55. tests/offline/test_matrixone_version_parsing.py +264 -0
  56. tests/offline/test_metadata_offline.py +557 -0
  57. tests/offline/test_moctl.py +300 -0
  58. tests/offline/test_moctl_simple.py +251 -0
  59. tests/offline/test_model_support_offline.py +359 -0
  60. tests/offline/test_model_support_simple.py +225 -0
  61. tests/offline/test_pinecone_filter_offline.py +377 -0
  62. tests/offline/test_pitr.py +585 -0
  63. tests/offline/test_pubsub.py +712 -0
  64. tests/offline/test_query_update.py +283 -0
  65. tests/offline/test_restore.py +445 -0
  66. tests/offline/test_snapshot_comprehensive.py +384 -0
  67. tests/offline/test_sql_escaping_edge_cases.py +551 -0
  68. tests/offline/test_sqlalchemy_integration.py +382 -0
  69. tests/offline/test_sqlalchemy_vector_integration.py +434 -0
  70. tests/offline/test_table_builder.py +198 -0
  71. tests/offline/test_unified_filter.py +398 -0
  72. tests/offline/test_unified_transaction.py +495 -0
  73. tests/offline/test_vector_index.py +238 -0
  74. tests/offline/test_vector_operations.py +688 -0
  75. tests/offline/test_vector_type.py +174 -0
  76. tests/offline/test_version_core.py +328 -0
  77. tests/offline/test_version_management.py +372 -0
  78. tests/offline/test_version_standalone.py +652 -0
  79. tests/online/__init__.py +20 -0
  80. tests/online/conftest.py +216 -0
  81. tests/online/test_account_management.py +194 -0
  82. tests/online/test_advanced_features.py +344 -0
  83. tests/online/test_async_client_interfaces.py +330 -0
  84. tests/online/test_async_client_online.py +285 -0
  85. tests/online/test_async_model_insert_online.py +293 -0
  86. tests/online/test_async_orm_online.py +300 -0
  87. tests/online/test_async_simple_query_online.py +802 -0
  88. tests/online/test_async_transaction_simple_query.py +300 -0
  89. tests/online/test_basic_connection.py +130 -0
  90. tests/online/test_client_online.py +238 -0
  91. tests/online/test_config.py +90 -0
  92. tests/online/test_config_validation.py +123 -0
  93. tests/online/test_connection_hooks_new_online.py +217 -0
  94. tests/online/test_dialect_schema_handling_online.py +331 -0
  95. tests/online/test_filter_logical_in_online.py +374 -0
  96. tests/online/test_fulltext_comprehensive.py +1773 -0
  97. tests/online/test_fulltext_label_online.py +433 -0
  98. tests/online/test_fulltext_search_online.py +842 -0
  99. tests/online/test_ivf_stats_online.py +506 -0
  100. tests/online/test_logger_integration.py +311 -0
  101. tests/online/test_matrixone_query_orm.py +540 -0
  102. tests/online/test_metadata_online.py +579 -0
  103. tests/online/test_model_insert_online.py +255 -0
  104. tests/online/test_mysql_driver_validation.py +213 -0
  105. tests/online/test_orm_advanced_features.py +2022 -0
  106. tests/online/test_orm_cte_integration.py +269 -0
  107. tests/online/test_orm_online.py +270 -0
  108. tests/online/test_pinecone_filter.py +708 -0
  109. tests/online/test_pubsub_operations.py +352 -0
  110. tests/online/test_query_methods.py +225 -0
  111. tests/online/test_query_update_online.py +433 -0
  112. tests/online/test_search_vector_index.py +557 -0
  113. tests/online/test_simple_fulltext_online.py +915 -0
  114. tests/online/test_snapshot_comprehensive.py +998 -0
  115. tests/online/test_sqlalchemy_engine_integration.py +336 -0
  116. tests/online/test_sqlalchemy_integration.py +425 -0
  117. tests/online/test_transaction_contexts.py +1219 -0
  118. tests/online/test_transaction_insert_methods.py +356 -0
  119. tests/online/test_transaction_query_methods.py +288 -0
  120. tests/online/test_unified_filter_online.py +529 -0
  121. tests/online/test_vector_comprehensive.py +706 -0
  122. tests/online/test_version_management.py +291 -0
@@ -0,0 +1,948 @@
1
+ # Copyright 2021 - 2022 Matrix Origin
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Vector type for SQLAlchemy integration with MatrixOne.
17
+ """
18
+
19
+ from typing import Any, List, Optional, Union
20
+
21
+ from sqlalchemy import Column, Text, TypeDecorator, func
22
+ from sqlalchemy.dialects import mysql
23
+ from sqlalchemy.types import UserDefinedType
24
+
25
+
26
+ class VectorPrecision:
27
+ """Enum-like class for vector precision types."""
28
+
29
+ F32 = "f32"
30
+ F64 = "f64"
31
+
32
+
33
+ class VectorType(UserDefinedType):
34
+ """
35
+ SQLAlchemy type for MatrixOne vector columns.
36
+
37
+ This type represents vector data in MatrixOne database and provides
38
+ proper serialization/deserialization for SQLAlchemy operations.
39
+ It supports both vecf32 and vecf64 precision types with configurable dimensions.
40
+
41
+ Key Features:
42
+
43
+ - Support for both 32-bit (vecf32) and 64-bit (vecf64) vector precision
44
+ - Configurable vector dimensions
45
+ - Automatic serialization/deserialization of vector data
46
+ - Integration with MatrixOne's vector indexing and search capabilities
47
+ - Support for vector similarity operations
48
+
49
+ Usage
50
+ # Define vector columns in SQLAlchemy models
51
+ class Document(Base):
52
+ __tablename__ = 'documents'
53
+ id = Column(Integer, primary_key=True)
54
+ content = Column(Text)
55
+ embedding = Column(VectorType(384, VectorPrecision.F32)) # 384-dim f32 vector
56
+ embedding_64 = Column(VectorType(512, VectorPrecision.F64)) # 512-dim f64 vector
57
+
58
+ # Use in table creation
59
+ client.create_table_orm('documents',
60
+ Column('id', Integer, primary_key=True),
61
+ Column('content', Text),
62
+ Column('embedding', VectorType(384, VectorPrecision.F32))
63
+ )
64
+
65
+ Supported Operations:
66
+
67
+ - Vector similarity search using distance functions
68
+ - Vector indexing with HNSW and IVF algorithms
69
+ - Vector arithmetic operations
70
+ - Integration with fulltext search capabilities
71
+
72
+ Note: Vector dimensions and precision must match the requirements of your
73
+ vector indexing strategy and embedding model.
74
+ """
75
+
76
+ __visit_name__ = "VECTOR"
77
+
78
+ def __init__(self, dimension: Optional[int] = None, precision: str = VectorPrecision.F32):
79
+ """
80
+ Initialize VectorType.
81
+
82
+ Args::
83
+
84
+ dimension: Vector dimension (optional)
85
+ precision: Vector precision - VectorPrecision.F32 for vecf32, VectorPrecision.F64 for vecf64
86
+ """
87
+ self.dimension = dimension
88
+ self.precision = precision
89
+
90
+ def get_col_spec(self, **kw: Any) -> str:
91
+ """Return the column specification for this type."""
92
+ if self.dimension is not None:
93
+ return f"vec{self.precision}({self.dimension})"
94
+ else:
95
+ return f"vec{self.precision}"
96
+
97
+ def bind_processor(self, dialect):
98
+ """Return a conversion function for processing bind values."""
99
+
100
+ def process(value):
101
+ if value is None:
102
+ return None
103
+ if isinstance(value, str):
104
+ return value
105
+ if isinstance(value, list):
106
+ # Convert list to MatrixOne vector format
107
+ return "[" + ",".join(map(str, value)) + "]"
108
+ return str(value)
109
+
110
+ return process
111
+
112
+ def process_bind_param(self, value, dialect):
113
+ """Process the value before binding to the database."""
114
+ if value is None:
115
+ return None
116
+ if isinstance(value, list):
117
+ # Convert list to MatrixOne vector format
118
+ return "[" + ",".join(map(str, value)) + "]"
119
+ if isinstance(value, str):
120
+ return value
121
+ return str(value)
122
+
123
+ def result_processor(self, dialect, coltype):
124
+ """Return a conversion function for processing result values."""
125
+
126
+ def process(value):
127
+ if value is None:
128
+ return None
129
+ if isinstance(value, str):
130
+ # Parse MatrixOne vector format back to list
131
+ try:
132
+ # Remove brackets and split by comma
133
+ clean_value = value.strip("[]")
134
+ if clean_value:
135
+ return [float(x.strip()) for x in clean_value.split(",")]
136
+ else:
137
+ return []
138
+ except (ValueError, AttributeError):
139
+ return value
140
+ return value
141
+
142
+ return process
143
+
144
+ def __str__(self):
145
+ """Return the column specification for this type."""
146
+ return self.get_col_spec()
147
+
148
+ def __repr__(self):
149
+ if self.dimension:
150
+ return f"VectorType(dimension={self.dimension}, precision='{self.precision}')"
151
+ else:
152
+ return f"VectorType(precision='{self.precision}')"
153
+
154
+
155
+ class Vectorf32(TypeDecorator):
156
+ """Convenience class for 32-bit float vectors using TypeDecorator."""
157
+
158
+ impl = Text
159
+ cache_ok = True
160
+
161
+ def __init__(self, dimension: Optional[int] = None):
162
+ self.dimension = dimension
163
+ self.precision = VectorPrecision.F32
164
+ super().__init__()
165
+
166
+ def get_col_spec(self, **kw):
167
+ """Return the column specification for this type."""
168
+ if self.dimension is not None:
169
+ return f"vecf32({self.dimension})"
170
+ else:
171
+ return "vecf32"
172
+
173
+ def load_dialect_impl(self, dialect):
174
+ """Return the appropriate type for the given dialect."""
175
+ # For SQL generation, return our custom type
176
+ if hasattr(dialect, "name") and dialect.name == "matrixone":
177
+ return VectorType(dimension=self.dimension, precision=VectorPrecision.F32)
178
+ return self.impl
179
+
180
+ def process_bind_param(self, value, dialect):
181
+ """Process the value before binding to the database."""
182
+ if value is None:
183
+ return None
184
+ if isinstance(value, list):
185
+ # Convert list to MatrixOne vector format
186
+ return "[" + ",".join(map(str, value)) + "]"
187
+ if isinstance(value, str):
188
+ return value
189
+ return str(value)
190
+
191
+ def process_result_value(self, value, dialect):
192
+ """Process the value after retrieving from the database."""
193
+ if value is None:
194
+ return None
195
+ if isinstance(value, str):
196
+ try:
197
+ clean_value = value.strip("[]")
198
+ if clean_value:
199
+ return [float(x.strip()) for x in clean_value.split(",")]
200
+ else:
201
+ return []
202
+ except (ValueError, AttributeError):
203
+ return value
204
+ return value
205
+
206
+ def bind_processor(self, dialect):
207
+ """Return a conversion function for processing bind values."""
208
+
209
+ def process(value):
210
+ return self.process_bind_param(value, dialect)
211
+
212
+ return process
213
+
214
+ def result_processor(self, dialect, coltype):
215
+ """Return a conversion function for processing result values."""
216
+
217
+ def process(value):
218
+ return self.process_result_value(value, dialect)
219
+
220
+ return process
221
+
222
+ def __str__(self):
223
+ """Return the column specification for this type."""
224
+ return self.get_col_spec()
225
+
226
+ def __repr__(self):
227
+ if self.dimension:
228
+ return f"Vectorf32(dimension={self.dimension})"
229
+ else:
230
+ return "Vectorf32()"
231
+
232
+
233
+ class Vectorf64(TypeDecorator):
234
+ """Convenience class for 64-bit float vectors using TypeDecorator."""
235
+
236
+ impl = Text
237
+ cache_ok = True
238
+
239
+ def __init__(self, dimension: Optional[int] = None):
240
+ self.dimension = dimension
241
+ self.precision = VectorPrecision.F64
242
+ super().__init__()
243
+
244
+ def get_col_spec(self, **kw):
245
+ """Return the column specification for this type."""
246
+ if self.dimension is not None:
247
+ return f"vecf64({self.dimension})"
248
+ else:
249
+ return "vecf64"
250
+
251
+ def load_dialect_impl(self, dialect):
252
+ """Return the appropriate type for the given dialect."""
253
+ # For SQL generation, return our custom type
254
+ if hasattr(dialect, "name") and dialect.name == "matrixone":
255
+ return VectorType(dimension=self.dimension, precision=VectorPrecision.F64)
256
+ return self.impl
257
+
258
+ def process_bind_param(self, value, dialect):
259
+ """Process the value before binding to the database."""
260
+ if value is None:
261
+ return None
262
+ if isinstance(value, list):
263
+ # Convert list to MatrixOne vector format
264
+ return "[" + ",".join(map(str, value)) + "]"
265
+ if isinstance(value, str):
266
+ return value
267
+ return str(value)
268
+
269
+ def process_result_value(self, value, dialect):
270
+ """Process the value after retrieving from the database."""
271
+ if value is None:
272
+ return None
273
+ if isinstance(value, str):
274
+ try:
275
+ clean_value = value.strip("[]")
276
+ if clean_value:
277
+ return [float(x.strip()) for x in clean_value.split(",")]
278
+ else:
279
+ return []
280
+ except (ValueError, AttributeError):
281
+ return value
282
+ return value
283
+
284
+ def bind_processor(self, dialect):
285
+ """Return a conversion function for processing bind values."""
286
+
287
+ def process(value):
288
+ return self.process_bind_param(value, dialect)
289
+
290
+ return process
291
+
292
+ def result_processor(self, dialect, coltype):
293
+ """Return a conversion function for processing result values."""
294
+
295
+ def process(value):
296
+ return self.process_result_value(value, dialect)
297
+
298
+ return process
299
+
300
+ def __str__(self):
301
+ """Return the column specification for this type."""
302
+ return self.get_col_spec()
303
+
304
+ def __repr__(self):
305
+ if self.dimension:
306
+ return f"Vectorf64(dimension={self.dimension})"
307
+ else:
308
+ return "Vectorf64()"
309
+
310
+
311
+ class VectorTypeDecorator(TypeDecorator):
312
+ """
313
+ A decorator that allows VectorType to work seamlessly with SQLAlchemy.
314
+
315
+ This provides better integration with SQLAlchemy's type system and
316
+ allows for more flexible usage in models.
317
+ """
318
+
319
+ impl = Text # Use TEXT type for large vector storage
320
+ cache_ok = True
321
+
322
+ def __init__(self, dimension: Optional[int] = None, precision: str = VectorPrecision.F32, **kwargs):
323
+ """
324
+ Initialize VectorTypeDecorator.
325
+
326
+ Args::
327
+
328
+ dimension: Vector dimension
329
+ precision: Vector precision
330
+ **kwargs: Additional arguments passed to TypeDecorator
331
+ """
332
+ self.dimension = dimension
333
+ self.precision = precision
334
+ super().__init__(**kwargs)
335
+
336
+ def load_dialect_impl(self, dialect):
337
+ """Return the appropriate type for the given dialect."""
338
+ if dialect.name == "mysql":
339
+ return mysql.TEXT # Use TEXT for large vector data
340
+ return self.impl
341
+
342
+ def process_bind_param(self, value, dialect):
343
+ """Process the value before binding to the database."""
344
+ if value is None:
345
+ return None
346
+ if isinstance(value, list):
347
+ # Convert list to MatrixOne vector format
348
+ return "[" + ",".join(map(str, value)) + "]"
349
+ if isinstance(value, str):
350
+ return value
351
+ return str(value)
352
+
353
+ def process_result_value(self, value, dialect):
354
+ """Process the value after retrieving from the database."""
355
+ if value is None:
356
+ return None
357
+ if isinstance(value, str):
358
+ # Parse MatrixOne vector format back to list
359
+ try:
360
+ # Remove brackets and split by comma
361
+ clean_value = value.strip("[]")
362
+ if clean_value:
363
+ return [float(x.strip()) for x in clean_value.split(",")]
364
+ else:
365
+ return []
366
+ except (ValueError, AttributeError):
367
+ return value
368
+ return value
369
+
370
+ def __repr__(self):
371
+ if self.dimension:
372
+ return f"VectorTypeDecorator(dimension={self.dimension}, precision='{self.precision}')"
373
+ else:
374
+ return f"VectorTypeDecorator(precision='{self.precision}')"
375
+
376
+
377
+ class VectorColumn(Column):
378
+ """
379
+ Extended Column class with vector distance functions.
380
+
381
+ Provides convenient methods for vector similarity operations.
382
+ """
383
+
384
+ inherit_cache = True
385
+
386
+ def l2_distance(self, other: Union[List[float], str, Column]) -> func:
387
+ """
388
+ Calculate L2 (Euclidean) distance between vectors.
389
+
390
+ Args::
391
+
392
+ other: Target vector as list, string, or column
393
+
394
+ Returns::
395
+
396
+ SQLAlchemy function expression
397
+
398
+ Example
399
+
400
+ query = session.query(Document).filter(
401
+ Document.embedding.l2_distance([1, 2, 3]) < 0.5
402
+ )
403
+ """
404
+ from sqlalchemy import literal
405
+
406
+ if isinstance(other, list):
407
+ # Convert list to MatrixOne vector format and use literal() to preserve formatting
408
+ vector_str = "[" + ",".join(map(str, other)) + "]"
409
+ return func.l2_distance(self, literal(vector_str))
410
+ elif isinstance(other, str):
411
+ return func.l2_distance(self, literal(other))
412
+ else:
413
+ return func.l2_distance(self, other)
414
+
415
+ def l2_distance_sq(self, other: Union[List[float], str, Column]) -> func:
416
+ """
417
+ Calculate squared L2 distance between vectors.
418
+
419
+ Args::
420
+
421
+ other: Target vector as list, string, or column
422
+
423
+ Returns::
424
+
425
+ SQLAlchemy function expression
426
+
427
+ Example
428
+
429
+ query = session.query(Document).order_by(
430
+ Document.embedding.l2_distance_sq([1, 2, 3])
431
+ )
432
+ """
433
+ from sqlalchemy import literal
434
+
435
+ if isinstance(other, list):
436
+ vector_str = "[" + ",".join(map(str, other)) + "]"
437
+ return func.l2_distance_sq(self, literal(vector_str))
438
+ elif isinstance(other, str):
439
+ return func.l2_distance_sq(self, literal(other))
440
+ else:
441
+ return func.l2_distance_sq(self, other)
442
+
443
+ def cosine_distance(self, other: Union[List[float], str, Column]) -> func:
444
+ """
445
+ Calculate cosine distance between vectors.
446
+
447
+ Args::
448
+
449
+ other: Target vector as list, string, or column
450
+
451
+ Returns::
452
+
453
+ SQLAlchemy function expression
454
+
455
+ Example
456
+
457
+ query = session.query(Document).filter(
458
+ Document.embedding.cosine_distance([1, 2, 3]) < 0.1
459
+ )
460
+ """
461
+ from sqlalchemy import literal
462
+
463
+ if isinstance(other, list):
464
+ vector_str = "[" + ",".join(map(str, other)) + "]"
465
+ return func.cosine_distance(self, literal(vector_str))
466
+ elif isinstance(other, str):
467
+ return func.cosine_distance(self, literal(other))
468
+ else:
469
+ return func.cosine_distance(self, other)
470
+
471
+ def negative_inner_product(self, other: Union[List[float], str, Column]) -> func:
472
+ """
473
+ Calculate negative inner product between vectors.
474
+ Note: This is implemented as -inner_product() since MatrixOne doesn't have native support.
475
+
476
+ Args::
477
+
478
+ other: Target vector as list, string, or column
479
+
480
+ Returns::
481
+
482
+ SQLAlchemy function expression
483
+
484
+ Example
485
+
486
+ query = session.query(Document).order_by(
487
+ Document.embedding.negative_inner_product([1, 2, 3])
488
+ )
489
+ """
490
+ if isinstance(other, list):
491
+ vector_str = "[" + ",".join(map(str, other)) + "]"
492
+ return -func.inner_product(self, vector_str)
493
+ elif isinstance(other, str):
494
+ return -func.inner_product(self, other)
495
+ else:
496
+ return -func.inner_product(self, other)
497
+
498
+ def inner_product(self, other: Union[List[float], str, Column]) -> func:
499
+ """
500
+ Calculate inner product (dot product) between vectors.
501
+
502
+ Args::
503
+
504
+ other: Target vector as list, string, or column
505
+
506
+ Returns::
507
+
508
+ SQLAlchemy function expression
509
+
510
+ Example
511
+
512
+ query = session.query(Document).order_by(
513
+ Document.embedding.inner_product([1, 2, 3]).desc()
514
+ )
515
+ """
516
+ from sqlalchemy import literal
517
+
518
+ if isinstance(other, list):
519
+ vector_str = "[" + ",".join(map(str, other)) + "]"
520
+ return func.inner_product(self, literal(vector_str))
521
+ elif isinstance(other, str):
522
+ return func.inner_product(self, literal(other))
523
+ else:
524
+ return func.inner_product(self, other)
525
+
526
+ def similarity_search(
527
+ self,
528
+ other: Union[List[float], str, Column],
529
+ distance_type: str = "l2",
530
+ max_distance: Optional[float] = None,
531
+ ) -> func:
532
+ """
533
+ Create a similarity search expression with optional distance filtering.
534
+
535
+ Args::
536
+
537
+ other: Target vector as list, string, or column
538
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
539
+ max_distance: Optional maximum distance threshold
540
+
541
+ Returns::
542
+
543
+ SQLAlchemy function expression for distance calculation
544
+
545
+ Example
546
+
547
+ # For ordering by similarity
548
+ query = session.query(Document).order_by(
549
+ Document.embedding.similarity_search([1, 2, 3])
550
+ )
551
+
552
+ # For filtering by distance
553
+ query = session.query(Document).filter(
554
+ Document.embedding.similarity_search([1, 2, 3], max_distance=1.0) < 1.0
555
+ )
556
+ """
557
+ if distance_type == "l2":
558
+ distance_expr = self.l2_distance(other)
559
+ elif distance_type == "cosine":
560
+ distance_expr = self.cosine_distance(other)
561
+ elif distance_type == "inner_product":
562
+ distance_expr = self.inner_product(other)
563
+ else:
564
+ raise ValueError(f"Unsupported distance type: {distance_type}")
565
+
566
+ return distance_expr
567
+
568
+ def within_distance(
569
+ self, other: Union[List[float], str, Column], max_distance: float, distance_type: str = "l2"
570
+ ) -> func:
571
+ """
572
+ Create a distance threshold filter expression.
573
+
574
+ Args::
575
+
576
+ other: Target vector as list, string, or column
577
+ max_distance: Maximum distance threshold
578
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
579
+
580
+ Returns::
581
+
582
+ SQLAlchemy boolean expression
583
+
584
+ Example
585
+
586
+ query = session.query(Document).filter(
587
+ Document.embedding.within_distance([1, 2, 3], max_distance=1.0)
588
+ )
589
+ """
590
+ if distance_type == "l2":
591
+ return self.l2_distance(other) < max_distance
592
+ elif distance_type == "cosine":
593
+ return self.cosine_distance(other) < max_distance
594
+ elif distance_type == "inner_product":
595
+ # For inner product, higher values are more similar
596
+ return self.inner_product(other) > max_distance
597
+ else:
598
+ raise ValueError(f"Unsupported distance type: {distance_type}")
599
+
600
+ def most_similar(self, other: Union[List[float], str, Column], distance_type: str = "l2", limit: int = 10) -> func:
601
+ """
602
+ Create an expression for finding most similar vectors.
603
+
604
+ Args::
605
+
606
+ other: Target vector as list, string, or column
607
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
608
+ limit: Number of results to return
609
+
610
+ Returns::
611
+
612
+ SQLAlchemy function expression for ordering
613
+
614
+ Example
615
+
616
+ query = session.query(Document).order_by(
617
+ Document.embedding.most_similar([1, 2, 3])
618
+ ).limit(10)
619
+ """
620
+ if distance_type == "l2":
621
+ return self.l2_distance(other)
622
+ elif distance_type == "cosine":
623
+ return self.cosine_distance(other)
624
+ elif distance_type == "inner_product":
625
+ # For inner product, we want descending order (higher is better)
626
+ return self.inner_product(other).desc()
627
+ else:
628
+ raise ValueError(f"Unsupported distance type: {distance_type}")
629
+
630
+
631
+ # Independent distance functions for more flexible API
632
+ def l2_distance(column: Column, other: Union[List[float], str, Column]) -> func:
633
+ """
634
+ Calculate L2 (Euclidean) distance between vectors.
635
+
636
+ This is an independent function that can be used with any column,
637
+ providing more flexibility than the VectorColumn methods.
638
+
639
+ Args::
640
+
641
+ column: Vector column to calculate distance from
642
+ other: Target vector as list, string, or column
643
+
644
+ Returns::
645
+
646
+ SQLAlchemy function expression
647
+
648
+ Example
649
+ from matrixone.sqlalchemy_ext import l2_distance
650
+
651
+ # With list vector
652
+ result = session.query(Document).filter(
653
+ l2_distance(Document.embedding, [1, 2, 3]) < 0.5
654
+ )
655
+
656
+ # With string vector
657
+ result = session.query(Document).filter(
658
+ l2_distance(Document.embedding, "[1,2,3]") < 0.5
659
+ )
660
+
661
+ # With another column
662
+ result = session.query(Document).filter(
663
+ l2_distance(Document.embedding, Document.query_vector) < 0.5
664
+ )
665
+ """
666
+ if isinstance(other, list):
667
+ # Convert list to MatrixOne vector format
668
+ vector_str = "[" + ",".join(map(str, other)) + "]"
669
+ return func.l2_distance(column, vector_str)
670
+ elif isinstance(other, str):
671
+ return func.l2_distance(column, other)
672
+ else:
673
+ return func.l2_distance(column, other)
674
+
675
+
676
+ def l2_distance_sq(column: Column, other: Union[List[float], str, Column]) -> func:
677
+ """
678
+ Calculate squared L2 distance between vectors.
679
+
680
+ Args::
681
+
682
+ column: Vector column to calculate distance from
683
+ other: Target vector as list, string, or column
684
+
685
+ Returns::
686
+
687
+ SQLAlchemy function expression
688
+
689
+ Example
690
+ from matrixone.sqlalchemy_ext import l2_distance_sq
691
+
692
+ result = session.query(Document).order_by(
693
+ l2_distance_sq(Document.embedding, [1, 2, 3])
694
+ )
695
+ """
696
+ if isinstance(other, list):
697
+ vector_str = "[" + ",".join(map(str, other)) + "]"
698
+ return func.l2_distance_sq(column, vector_str)
699
+ elif isinstance(other, str):
700
+ return func.l2_distance_sq(column, other)
701
+ else:
702
+ return func.l2_distance_sq(column, other)
703
+
704
+
705
+ def cosine_distance(column: Column, other: Union[List[float], str, Column]) -> func:
706
+ """
707
+ Calculate cosine distance between vectors.
708
+
709
+ Args::
710
+
711
+ column: Vector column to calculate distance from
712
+ other: Target vector as list, string, or column
713
+
714
+ Returns::
715
+
716
+ SQLAlchemy function expression
717
+
718
+ Example
719
+ from matrixone.sqlalchemy_ext import cosine_distance
720
+
721
+ result = session.query(Document).filter(
722
+ cosine_distance(Document.embedding, [1, 2, 3]) < 0.1
723
+ )
724
+ """
725
+ if isinstance(other, list):
726
+ vector_str = "[" + ",".join(map(str, other)) + "]"
727
+ return func.cosine_distance(column, vector_str)
728
+ elif isinstance(other, str):
729
+ return func.cosine_distance(column, other)
730
+ else:
731
+ return func.cosine_distance(column, other)
732
+
733
+
734
+ def inner_product(column: Column, other: Union[List[float], str, Column]) -> func:
735
+ """
736
+ Calculate inner product (dot product) between vectors.
737
+
738
+ Args::
739
+
740
+ column: Vector column to calculate distance from
741
+ other: Target vector as list, string, or column
742
+
743
+ Returns::
744
+
745
+ SQLAlchemy function expression
746
+
747
+ Example
748
+ from matrixone.sqlalchemy_ext import inner_product
749
+
750
+ result = session.query(Document).order_by(
751
+ inner_product(Document.embedding, [1, 2, 3]).desc()
752
+ )
753
+ """
754
+ if isinstance(other, list):
755
+ vector_str = "[" + ",".join(map(str, other)) + "]"
756
+ return func.inner_product(column, vector_str)
757
+ elif isinstance(other, str):
758
+ return func.inner_product(column, other)
759
+ else:
760
+ return func.inner_product(column, other)
761
+
762
+
763
+ def negative_inner_product(column: Column, other: Union[List[float], str, Column]) -> func:
764
+ """
765
+ Calculate negative inner product between vectors.
766
+
767
+ Note: This is implemented as -inner_product() since MatrixOne doesn't have native support.
768
+
769
+ Args::
770
+
771
+ column: Vector column to calculate distance from
772
+ other: Target vector as list, string, or column
773
+
774
+ Returns::
775
+
776
+ SQLAlchemy function expression
777
+
778
+ Example
779
+ from matrixone.sqlalchemy_ext import negative_inner_product
780
+
781
+ result = session.query(Document).order_by(
782
+ negative_inner_product(Document.embedding, [1, 2, 3])
783
+ )
784
+ """
785
+ if isinstance(other, list):
786
+ vector_str = "[" + ",".join(map(str, other)) + "]"
787
+ return -func.inner_product(column, vector_str)
788
+ elif isinstance(other, str):
789
+ return -func.inner_product(column, other)
790
+ else:
791
+ return -func.inner_product(column, other)
792
+
793
+
794
+ # Convenience functions for vector operations
795
+ def create_vector_column(dimension: int, precision: str = VectorPrecision.F32, **kwargs) -> VectorColumn:
796
+ """
797
+ Create a vector column with distance function support.
798
+
799
+ Args::
800
+
801
+ dimension: Vector dimension
802
+ precision: Vector precision (VectorPrecision.F32 or VectorPrecision.F64)
803
+ **kwargs: Additional column arguments
804
+
805
+ Returns::
806
+
807
+ VectorColumn instance
808
+
809
+ Example
810
+ class Document(Base):
811
+ id = Column(Integer, primary_key=True)
812
+ embedding = create_vector_column(128, precision=VectorPrecision.F32)
813
+ description = Column(String(500))
814
+ """
815
+ if precision == VectorPrecision.F32:
816
+ return VectorColumn(Vectorf32(dimension=dimension), **kwargs)
817
+ elif precision == VectorPrecision.F64:
818
+ return VectorColumn(Vectorf64(dimension=dimension), **kwargs)
819
+ else:
820
+ raise ValueError(f"Precision must be '{VectorPrecision.F32}' or '{VectorPrecision.F64}'")
821
+
822
+
823
+ def vector_similarity_search(
824
+ column: Column,
825
+ query_vector: Union[List[float], str, Column],
826
+ distance_type: str = "l2",
827
+ max_distance: Optional[float] = None,
828
+ ) -> func:
829
+ """
830
+ Create a similarity search expression with optional distance filtering.
831
+
832
+ This is a convenience function that combines distance calculation with filtering.
833
+
834
+ Args::
835
+
836
+ column: Vector column to search in
837
+ query_vector: Query vector as list, string, or column
838
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
839
+ max_distance: Optional maximum distance threshold
840
+
841
+ Returns::
842
+
843
+ SQLAlchemy function expression for distance calculation
844
+
845
+ Example
846
+ from matrixone.sqlalchemy_ext import vector_similarity_search
847
+
848
+ # For ordering by similarity
849
+ result = session.query(Document).order_by(
850
+ vector_similarity_search(Document.embedding, [1, 2, 3])
851
+ )
852
+
853
+ # For filtering by distance
854
+ result = session.query(Document).filter(
855
+ vector_similarity_search(Document.embedding, [1, 2, 3], max_distance=1.0) < 1.0
856
+ )
857
+ """
858
+ if distance_type == "l2":
859
+ distance_expr = l2_distance(column, query_vector)
860
+ elif distance_type == "cosine":
861
+ distance_expr = cosine_distance(column, query_vector)
862
+ elif distance_type == "inner_product":
863
+ distance_expr = inner_product(column, query_vector)
864
+ else:
865
+ raise ValueError(f"Unsupported distance type: {distance_type}")
866
+
867
+ return distance_expr
868
+
869
+
870
+ def within_distance(
871
+ column: Column,
872
+ query_vector: Union[List[float], str, Column],
873
+ max_distance: float,
874
+ distance_type: str = "l2",
875
+ ) -> func:
876
+ """
877
+ Create a distance threshold filter expression.
878
+
879
+ Args::
880
+
881
+ column: Vector column to filter
882
+ query_vector: Query vector as list, string, or column
883
+ max_distance: Maximum distance threshold
884
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
885
+
886
+ Returns::
887
+
888
+ SQLAlchemy boolean expression
889
+
890
+ Example
891
+ from matrixone.sqlalchemy_ext import within_distance
892
+
893
+ result = session.query(Document).filter(
894
+ within_distance(Document.embedding, [1, 2, 3], max_distance=1.0)
895
+ )
896
+ """
897
+ if distance_type == "l2":
898
+ return l2_distance(column, query_vector) < max_distance
899
+ elif distance_type == "cosine":
900
+ return cosine_distance(column, query_vector) < max_distance
901
+ elif distance_type == "inner_product":
902
+ # For inner product, higher values are more similar
903
+ return inner_product(column, query_vector) > max_distance
904
+ else:
905
+ raise ValueError(f"Unsupported distance type: {distance_type}")
906
+
907
+
908
+ def most_similar(column: Column, query_vector: Union[List[float], str, Column], distance_type: str = "l2") -> func:
909
+ """
910
+ Create an expression for finding most similar vectors.
911
+
912
+ Args::
913
+
914
+ column: Vector column to search in
915
+ query_vector: Query vector as list, string, or column
916
+ distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
917
+
918
+ Returns::
919
+
920
+ SQLAlchemy function expression for ordering
921
+
922
+ Example
923
+ from matrixone.sqlalchemy_ext import most_similar
924
+
925
+ result = session.query(Document).order_by(
926
+ most_similar(Document.embedding, [1, 2, 3])
927
+ ).limit(10)
928
+ """
929
+ if distance_type == "l2":
930
+ return l2_distance(column, query_vector)
931
+ elif distance_type == "cosine":
932
+ return cosine_distance(column, query_vector)
933
+ elif distance_type == "inner_product":
934
+ # For inner product, we want descending order (higher is better)
935
+ return inner_product(column, query_vector).desc()
936
+ else:
937
+ raise ValueError(f"Unsupported distance type: {distance_type}")
938
+
939
+
940
+ def vector_distance_functions():
941
+ """
942
+ Return available vector distance functions.
943
+
944
+ Returns::
945
+
946
+ List of function names
947
+ """
948
+ return ["l2_distance", "l2_distance_sq", "cosine_distance"]