matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,948 @@
|
|
1
|
+
# Copyright 2021 - 2022 Matrix Origin
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""
|
16
|
+
Vector type for SQLAlchemy integration with MatrixOne.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from typing import Any, List, Optional, Union
|
20
|
+
|
21
|
+
from sqlalchemy import Column, Text, TypeDecorator, func
|
22
|
+
from sqlalchemy.dialects import mysql
|
23
|
+
from sqlalchemy.types import UserDefinedType
|
24
|
+
|
25
|
+
|
26
|
+
class VectorPrecision:
|
27
|
+
"""Enum-like class for vector precision types."""
|
28
|
+
|
29
|
+
F32 = "f32"
|
30
|
+
F64 = "f64"
|
31
|
+
|
32
|
+
|
33
|
+
class VectorType(UserDefinedType):
|
34
|
+
"""
|
35
|
+
SQLAlchemy type for MatrixOne vector columns.
|
36
|
+
|
37
|
+
This type represents vector data in MatrixOne database and provides
|
38
|
+
proper serialization/deserialization for SQLAlchemy operations.
|
39
|
+
It supports both vecf32 and vecf64 precision types with configurable dimensions.
|
40
|
+
|
41
|
+
Key Features:
|
42
|
+
|
43
|
+
- Support for both 32-bit (vecf32) and 64-bit (vecf64) vector precision
|
44
|
+
- Configurable vector dimensions
|
45
|
+
- Automatic serialization/deserialization of vector data
|
46
|
+
- Integration with MatrixOne's vector indexing and search capabilities
|
47
|
+
- Support for vector similarity operations
|
48
|
+
|
49
|
+
Usage
|
50
|
+
# Define vector columns in SQLAlchemy models
|
51
|
+
class Document(Base):
|
52
|
+
__tablename__ = 'documents'
|
53
|
+
id = Column(Integer, primary_key=True)
|
54
|
+
content = Column(Text)
|
55
|
+
embedding = Column(VectorType(384, VectorPrecision.F32)) # 384-dim f32 vector
|
56
|
+
embedding_64 = Column(VectorType(512, VectorPrecision.F64)) # 512-dim f64 vector
|
57
|
+
|
58
|
+
# Use in table creation
|
59
|
+
client.create_table_orm('documents',
|
60
|
+
Column('id', Integer, primary_key=True),
|
61
|
+
Column('content', Text),
|
62
|
+
Column('embedding', VectorType(384, VectorPrecision.F32))
|
63
|
+
)
|
64
|
+
|
65
|
+
Supported Operations:
|
66
|
+
|
67
|
+
- Vector similarity search using distance functions
|
68
|
+
- Vector indexing with HNSW and IVF algorithms
|
69
|
+
- Vector arithmetic operations
|
70
|
+
- Integration with fulltext search capabilities
|
71
|
+
|
72
|
+
Note: Vector dimensions and precision must match the requirements of your
|
73
|
+
vector indexing strategy and embedding model.
|
74
|
+
"""
|
75
|
+
|
76
|
+
__visit_name__ = "VECTOR"
|
77
|
+
|
78
|
+
def __init__(self, dimension: Optional[int] = None, precision: str = VectorPrecision.F32):
|
79
|
+
"""
|
80
|
+
Initialize VectorType.
|
81
|
+
|
82
|
+
Args::
|
83
|
+
|
84
|
+
dimension: Vector dimension (optional)
|
85
|
+
precision: Vector precision - VectorPrecision.F32 for vecf32, VectorPrecision.F64 for vecf64
|
86
|
+
"""
|
87
|
+
self.dimension = dimension
|
88
|
+
self.precision = precision
|
89
|
+
|
90
|
+
def get_col_spec(self, **kw: Any) -> str:
|
91
|
+
"""Return the column specification for this type."""
|
92
|
+
if self.dimension is not None:
|
93
|
+
return f"vec{self.precision}({self.dimension})"
|
94
|
+
else:
|
95
|
+
return f"vec{self.precision}"
|
96
|
+
|
97
|
+
def bind_processor(self, dialect):
|
98
|
+
"""Return a conversion function for processing bind values."""
|
99
|
+
|
100
|
+
def process(value):
|
101
|
+
if value is None:
|
102
|
+
return None
|
103
|
+
if isinstance(value, str):
|
104
|
+
return value
|
105
|
+
if isinstance(value, list):
|
106
|
+
# Convert list to MatrixOne vector format
|
107
|
+
return "[" + ",".join(map(str, value)) + "]"
|
108
|
+
return str(value)
|
109
|
+
|
110
|
+
return process
|
111
|
+
|
112
|
+
def process_bind_param(self, value, dialect):
|
113
|
+
"""Process the value before binding to the database."""
|
114
|
+
if value is None:
|
115
|
+
return None
|
116
|
+
if isinstance(value, list):
|
117
|
+
# Convert list to MatrixOne vector format
|
118
|
+
return "[" + ",".join(map(str, value)) + "]"
|
119
|
+
if isinstance(value, str):
|
120
|
+
return value
|
121
|
+
return str(value)
|
122
|
+
|
123
|
+
def result_processor(self, dialect, coltype):
|
124
|
+
"""Return a conversion function for processing result values."""
|
125
|
+
|
126
|
+
def process(value):
|
127
|
+
if value is None:
|
128
|
+
return None
|
129
|
+
if isinstance(value, str):
|
130
|
+
# Parse MatrixOne vector format back to list
|
131
|
+
try:
|
132
|
+
# Remove brackets and split by comma
|
133
|
+
clean_value = value.strip("[]")
|
134
|
+
if clean_value:
|
135
|
+
return [float(x.strip()) for x in clean_value.split(",")]
|
136
|
+
else:
|
137
|
+
return []
|
138
|
+
except (ValueError, AttributeError):
|
139
|
+
return value
|
140
|
+
return value
|
141
|
+
|
142
|
+
return process
|
143
|
+
|
144
|
+
def __str__(self):
|
145
|
+
"""Return the column specification for this type."""
|
146
|
+
return self.get_col_spec()
|
147
|
+
|
148
|
+
def __repr__(self):
|
149
|
+
if self.dimension:
|
150
|
+
return f"VectorType(dimension={self.dimension}, precision='{self.precision}')"
|
151
|
+
else:
|
152
|
+
return f"VectorType(precision='{self.precision}')"
|
153
|
+
|
154
|
+
|
155
|
+
class Vectorf32(TypeDecorator):
|
156
|
+
"""Convenience class for 32-bit float vectors using TypeDecorator."""
|
157
|
+
|
158
|
+
impl = Text
|
159
|
+
cache_ok = True
|
160
|
+
|
161
|
+
def __init__(self, dimension: Optional[int] = None):
|
162
|
+
self.dimension = dimension
|
163
|
+
self.precision = VectorPrecision.F32
|
164
|
+
super().__init__()
|
165
|
+
|
166
|
+
def get_col_spec(self, **kw):
|
167
|
+
"""Return the column specification for this type."""
|
168
|
+
if self.dimension is not None:
|
169
|
+
return f"vecf32({self.dimension})"
|
170
|
+
else:
|
171
|
+
return "vecf32"
|
172
|
+
|
173
|
+
def load_dialect_impl(self, dialect):
|
174
|
+
"""Return the appropriate type for the given dialect."""
|
175
|
+
# For SQL generation, return our custom type
|
176
|
+
if hasattr(dialect, "name") and dialect.name == "matrixone":
|
177
|
+
return VectorType(dimension=self.dimension, precision=VectorPrecision.F32)
|
178
|
+
return self.impl
|
179
|
+
|
180
|
+
def process_bind_param(self, value, dialect):
|
181
|
+
"""Process the value before binding to the database."""
|
182
|
+
if value is None:
|
183
|
+
return None
|
184
|
+
if isinstance(value, list):
|
185
|
+
# Convert list to MatrixOne vector format
|
186
|
+
return "[" + ",".join(map(str, value)) + "]"
|
187
|
+
if isinstance(value, str):
|
188
|
+
return value
|
189
|
+
return str(value)
|
190
|
+
|
191
|
+
def process_result_value(self, value, dialect):
|
192
|
+
"""Process the value after retrieving from the database."""
|
193
|
+
if value is None:
|
194
|
+
return None
|
195
|
+
if isinstance(value, str):
|
196
|
+
try:
|
197
|
+
clean_value = value.strip("[]")
|
198
|
+
if clean_value:
|
199
|
+
return [float(x.strip()) for x in clean_value.split(",")]
|
200
|
+
else:
|
201
|
+
return []
|
202
|
+
except (ValueError, AttributeError):
|
203
|
+
return value
|
204
|
+
return value
|
205
|
+
|
206
|
+
def bind_processor(self, dialect):
|
207
|
+
"""Return a conversion function for processing bind values."""
|
208
|
+
|
209
|
+
def process(value):
|
210
|
+
return self.process_bind_param(value, dialect)
|
211
|
+
|
212
|
+
return process
|
213
|
+
|
214
|
+
def result_processor(self, dialect, coltype):
|
215
|
+
"""Return a conversion function for processing result values."""
|
216
|
+
|
217
|
+
def process(value):
|
218
|
+
return self.process_result_value(value, dialect)
|
219
|
+
|
220
|
+
return process
|
221
|
+
|
222
|
+
def __str__(self):
|
223
|
+
"""Return the column specification for this type."""
|
224
|
+
return self.get_col_spec()
|
225
|
+
|
226
|
+
def __repr__(self):
|
227
|
+
if self.dimension:
|
228
|
+
return f"Vectorf32(dimension={self.dimension})"
|
229
|
+
else:
|
230
|
+
return "Vectorf32()"
|
231
|
+
|
232
|
+
|
233
|
+
class Vectorf64(TypeDecorator):
|
234
|
+
"""Convenience class for 64-bit float vectors using TypeDecorator."""
|
235
|
+
|
236
|
+
impl = Text
|
237
|
+
cache_ok = True
|
238
|
+
|
239
|
+
def __init__(self, dimension: Optional[int] = None):
|
240
|
+
self.dimension = dimension
|
241
|
+
self.precision = VectorPrecision.F64
|
242
|
+
super().__init__()
|
243
|
+
|
244
|
+
def get_col_spec(self, **kw):
|
245
|
+
"""Return the column specification for this type."""
|
246
|
+
if self.dimension is not None:
|
247
|
+
return f"vecf64({self.dimension})"
|
248
|
+
else:
|
249
|
+
return "vecf64"
|
250
|
+
|
251
|
+
def load_dialect_impl(self, dialect):
|
252
|
+
"""Return the appropriate type for the given dialect."""
|
253
|
+
# For SQL generation, return our custom type
|
254
|
+
if hasattr(dialect, "name") and dialect.name == "matrixone":
|
255
|
+
return VectorType(dimension=self.dimension, precision=VectorPrecision.F64)
|
256
|
+
return self.impl
|
257
|
+
|
258
|
+
def process_bind_param(self, value, dialect):
|
259
|
+
"""Process the value before binding to the database."""
|
260
|
+
if value is None:
|
261
|
+
return None
|
262
|
+
if isinstance(value, list):
|
263
|
+
# Convert list to MatrixOne vector format
|
264
|
+
return "[" + ",".join(map(str, value)) + "]"
|
265
|
+
if isinstance(value, str):
|
266
|
+
return value
|
267
|
+
return str(value)
|
268
|
+
|
269
|
+
def process_result_value(self, value, dialect):
|
270
|
+
"""Process the value after retrieving from the database."""
|
271
|
+
if value is None:
|
272
|
+
return None
|
273
|
+
if isinstance(value, str):
|
274
|
+
try:
|
275
|
+
clean_value = value.strip("[]")
|
276
|
+
if clean_value:
|
277
|
+
return [float(x.strip()) for x in clean_value.split(",")]
|
278
|
+
else:
|
279
|
+
return []
|
280
|
+
except (ValueError, AttributeError):
|
281
|
+
return value
|
282
|
+
return value
|
283
|
+
|
284
|
+
def bind_processor(self, dialect):
|
285
|
+
"""Return a conversion function for processing bind values."""
|
286
|
+
|
287
|
+
def process(value):
|
288
|
+
return self.process_bind_param(value, dialect)
|
289
|
+
|
290
|
+
return process
|
291
|
+
|
292
|
+
def result_processor(self, dialect, coltype):
|
293
|
+
"""Return a conversion function for processing result values."""
|
294
|
+
|
295
|
+
def process(value):
|
296
|
+
return self.process_result_value(value, dialect)
|
297
|
+
|
298
|
+
return process
|
299
|
+
|
300
|
+
def __str__(self):
|
301
|
+
"""Return the column specification for this type."""
|
302
|
+
return self.get_col_spec()
|
303
|
+
|
304
|
+
def __repr__(self):
|
305
|
+
if self.dimension:
|
306
|
+
return f"Vectorf64(dimension={self.dimension})"
|
307
|
+
else:
|
308
|
+
return "Vectorf64()"
|
309
|
+
|
310
|
+
|
311
|
+
class VectorTypeDecorator(TypeDecorator):
|
312
|
+
"""
|
313
|
+
A decorator that allows VectorType to work seamlessly with SQLAlchemy.
|
314
|
+
|
315
|
+
This provides better integration with SQLAlchemy's type system and
|
316
|
+
allows for more flexible usage in models.
|
317
|
+
"""
|
318
|
+
|
319
|
+
impl = Text # Use TEXT type for large vector storage
|
320
|
+
cache_ok = True
|
321
|
+
|
322
|
+
def __init__(self, dimension: Optional[int] = None, precision: str = VectorPrecision.F32, **kwargs):
|
323
|
+
"""
|
324
|
+
Initialize VectorTypeDecorator.
|
325
|
+
|
326
|
+
Args::
|
327
|
+
|
328
|
+
dimension: Vector dimension
|
329
|
+
precision: Vector precision
|
330
|
+
**kwargs: Additional arguments passed to TypeDecorator
|
331
|
+
"""
|
332
|
+
self.dimension = dimension
|
333
|
+
self.precision = precision
|
334
|
+
super().__init__(**kwargs)
|
335
|
+
|
336
|
+
def load_dialect_impl(self, dialect):
|
337
|
+
"""Return the appropriate type for the given dialect."""
|
338
|
+
if dialect.name == "mysql":
|
339
|
+
return mysql.TEXT # Use TEXT for large vector data
|
340
|
+
return self.impl
|
341
|
+
|
342
|
+
def process_bind_param(self, value, dialect):
|
343
|
+
"""Process the value before binding to the database."""
|
344
|
+
if value is None:
|
345
|
+
return None
|
346
|
+
if isinstance(value, list):
|
347
|
+
# Convert list to MatrixOne vector format
|
348
|
+
return "[" + ",".join(map(str, value)) + "]"
|
349
|
+
if isinstance(value, str):
|
350
|
+
return value
|
351
|
+
return str(value)
|
352
|
+
|
353
|
+
def process_result_value(self, value, dialect):
|
354
|
+
"""Process the value after retrieving from the database."""
|
355
|
+
if value is None:
|
356
|
+
return None
|
357
|
+
if isinstance(value, str):
|
358
|
+
# Parse MatrixOne vector format back to list
|
359
|
+
try:
|
360
|
+
# Remove brackets and split by comma
|
361
|
+
clean_value = value.strip("[]")
|
362
|
+
if clean_value:
|
363
|
+
return [float(x.strip()) for x in clean_value.split(",")]
|
364
|
+
else:
|
365
|
+
return []
|
366
|
+
except (ValueError, AttributeError):
|
367
|
+
return value
|
368
|
+
return value
|
369
|
+
|
370
|
+
def __repr__(self):
|
371
|
+
if self.dimension:
|
372
|
+
return f"VectorTypeDecorator(dimension={self.dimension}, precision='{self.precision}')"
|
373
|
+
else:
|
374
|
+
return f"VectorTypeDecorator(precision='{self.precision}')"
|
375
|
+
|
376
|
+
|
377
|
+
class VectorColumn(Column):
|
378
|
+
"""
|
379
|
+
Extended Column class with vector distance functions.
|
380
|
+
|
381
|
+
Provides convenient methods for vector similarity operations.
|
382
|
+
"""
|
383
|
+
|
384
|
+
inherit_cache = True
|
385
|
+
|
386
|
+
def l2_distance(self, other: Union[List[float], str, Column]) -> func:
|
387
|
+
"""
|
388
|
+
Calculate L2 (Euclidean) distance between vectors.
|
389
|
+
|
390
|
+
Args::
|
391
|
+
|
392
|
+
other: Target vector as list, string, or column
|
393
|
+
|
394
|
+
Returns::
|
395
|
+
|
396
|
+
SQLAlchemy function expression
|
397
|
+
|
398
|
+
Example
|
399
|
+
|
400
|
+
query = session.query(Document).filter(
|
401
|
+
Document.embedding.l2_distance([1, 2, 3]) < 0.5
|
402
|
+
)
|
403
|
+
"""
|
404
|
+
from sqlalchemy import literal
|
405
|
+
|
406
|
+
if isinstance(other, list):
|
407
|
+
# Convert list to MatrixOne vector format and use literal() to preserve formatting
|
408
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
409
|
+
return func.l2_distance(self, literal(vector_str))
|
410
|
+
elif isinstance(other, str):
|
411
|
+
return func.l2_distance(self, literal(other))
|
412
|
+
else:
|
413
|
+
return func.l2_distance(self, other)
|
414
|
+
|
415
|
+
def l2_distance_sq(self, other: Union[List[float], str, Column]) -> func:
|
416
|
+
"""
|
417
|
+
Calculate squared L2 distance between vectors.
|
418
|
+
|
419
|
+
Args::
|
420
|
+
|
421
|
+
other: Target vector as list, string, or column
|
422
|
+
|
423
|
+
Returns::
|
424
|
+
|
425
|
+
SQLAlchemy function expression
|
426
|
+
|
427
|
+
Example
|
428
|
+
|
429
|
+
query = session.query(Document).order_by(
|
430
|
+
Document.embedding.l2_distance_sq([1, 2, 3])
|
431
|
+
)
|
432
|
+
"""
|
433
|
+
from sqlalchemy import literal
|
434
|
+
|
435
|
+
if isinstance(other, list):
|
436
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
437
|
+
return func.l2_distance_sq(self, literal(vector_str))
|
438
|
+
elif isinstance(other, str):
|
439
|
+
return func.l2_distance_sq(self, literal(other))
|
440
|
+
else:
|
441
|
+
return func.l2_distance_sq(self, other)
|
442
|
+
|
443
|
+
def cosine_distance(self, other: Union[List[float], str, Column]) -> func:
|
444
|
+
"""
|
445
|
+
Calculate cosine distance between vectors.
|
446
|
+
|
447
|
+
Args::
|
448
|
+
|
449
|
+
other: Target vector as list, string, or column
|
450
|
+
|
451
|
+
Returns::
|
452
|
+
|
453
|
+
SQLAlchemy function expression
|
454
|
+
|
455
|
+
Example
|
456
|
+
|
457
|
+
query = session.query(Document).filter(
|
458
|
+
Document.embedding.cosine_distance([1, 2, 3]) < 0.1
|
459
|
+
)
|
460
|
+
"""
|
461
|
+
from sqlalchemy import literal
|
462
|
+
|
463
|
+
if isinstance(other, list):
|
464
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
465
|
+
return func.cosine_distance(self, literal(vector_str))
|
466
|
+
elif isinstance(other, str):
|
467
|
+
return func.cosine_distance(self, literal(other))
|
468
|
+
else:
|
469
|
+
return func.cosine_distance(self, other)
|
470
|
+
|
471
|
+
def negative_inner_product(self, other: Union[List[float], str, Column]) -> func:
|
472
|
+
"""
|
473
|
+
Calculate negative inner product between vectors.
|
474
|
+
Note: This is implemented as -inner_product() since MatrixOne doesn't have native support.
|
475
|
+
|
476
|
+
Args::
|
477
|
+
|
478
|
+
other: Target vector as list, string, or column
|
479
|
+
|
480
|
+
Returns::
|
481
|
+
|
482
|
+
SQLAlchemy function expression
|
483
|
+
|
484
|
+
Example
|
485
|
+
|
486
|
+
query = session.query(Document).order_by(
|
487
|
+
Document.embedding.negative_inner_product([1, 2, 3])
|
488
|
+
)
|
489
|
+
"""
|
490
|
+
if isinstance(other, list):
|
491
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
492
|
+
return -func.inner_product(self, vector_str)
|
493
|
+
elif isinstance(other, str):
|
494
|
+
return -func.inner_product(self, other)
|
495
|
+
else:
|
496
|
+
return -func.inner_product(self, other)
|
497
|
+
|
498
|
+
def inner_product(self, other: Union[List[float], str, Column]) -> func:
|
499
|
+
"""
|
500
|
+
Calculate inner product (dot product) between vectors.
|
501
|
+
|
502
|
+
Args::
|
503
|
+
|
504
|
+
other: Target vector as list, string, or column
|
505
|
+
|
506
|
+
Returns::
|
507
|
+
|
508
|
+
SQLAlchemy function expression
|
509
|
+
|
510
|
+
Example
|
511
|
+
|
512
|
+
query = session.query(Document).order_by(
|
513
|
+
Document.embedding.inner_product([1, 2, 3]).desc()
|
514
|
+
)
|
515
|
+
"""
|
516
|
+
from sqlalchemy import literal
|
517
|
+
|
518
|
+
if isinstance(other, list):
|
519
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
520
|
+
return func.inner_product(self, literal(vector_str))
|
521
|
+
elif isinstance(other, str):
|
522
|
+
return func.inner_product(self, literal(other))
|
523
|
+
else:
|
524
|
+
return func.inner_product(self, other)
|
525
|
+
|
526
|
+
def similarity_search(
|
527
|
+
self,
|
528
|
+
other: Union[List[float], str, Column],
|
529
|
+
distance_type: str = "l2",
|
530
|
+
max_distance: Optional[float] = None,
|
531
|
+
) -> func:
|
532
|
+
"""
|
533
|
+
Create a similarity search expression with optional distance filtering.
|
534
|
+
|
535
|
+
Args::
|
536
|
+
|
537
|
+
other: Target vector as list, string, or column
|
538
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
539
|
+
max_distance: Optional maximum distance threshold
|
540
|
+
|
541
|
+
Returns::
|
542
|
+
|
543
|
+
SQLAlchemy function expression for distance calculation
|
544
|
+
|
545
|
+
Example
|
546
|
+
|
547
|
+
# For ordering by similarity
|
548
|
+
query = session.query(Document).order_by(
|
549
|
+
Document.embedding.similarity_search([1, 2, 3])
|
550
|
+
)
|
551
|
+
|
552
|
+
# For filtering by distance
|
553
|
+
query = session.query(Document).filter(
|
554
|
+
Document.embedding.similarity_search([1, 2, 3], max_distance=1.0) < 1.0
|
555
|
+
)
|
556
|
+
"""
|
557
|
+
if distance_type == "l2":
|
558
|
+
distance_expr = self.l2_distance(other)
|
559
|
+
elif distance_type == "cosine":
|
560
|
+
distance_expr = self.cosine_distance(other)
|
561
|
+
elif distance_type == "inner_product":
|
562
|
+
distance_expr = self.inner_product(other)
|
563
|
+
else:
|
564
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
565
|
+
|
566
|
+
return distance_expr
|
567
|
+
|
568
|
+
def within_distance(
|
569
|
+
self, other: Union[List[float], str, Column], max_distance: float, distance_type: str = "l2"
|
570
|
+
) -> func:
|
571
|
+
"""
|
572
|
+
Create a distance threshold filter expression.
|
573
|
+
|
574
|
+
Args::
|
575
|
+
|
576
|
+
other: Target vector as list, string, or column
|
577
|
+
max_distance: Maximum distance threshold
|
578
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
579
|
+
|
580
|
+
Returns::
|
581
|
+
|
582
|
+
SQLAlchemy boolean expression
|
583
|
+
|
584
|
+
Example
|
585
|
+
|
586
|
+
query = session.query(Document).filter(
|
587
|
+
Document.embedding.within_distance([1, 2, 3], max_distance=1.0)
|
588
|
+
)
|
589
|
+
"""
|
590
|
+
if distance_type == "l2":
|
591
|
+
return self.l2_distance(other) < max_distance
|
592
|
+
elif distance_type == "cosine":
|
593
|
+
return self.cosine_distance(other) < max_distance
|
594
|
+
elif distance_type == "inner_product":
|
595
|
+
# For inner product, higher values are more similar
|
596
|
+
return self.inner_product(other) > max_distance
|
597
|
+
else:
|
598
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
599
|
+
|
600
|
+
def most_similar(self, other: Union[List[float], str, Column], distance_type: str = "l2", limit: int = 10) -> func:
|
601
|
+
"""
|
602
|
+
Create an expression for finding most similar vectors.
|
603
|
+
|
604
|
+
Args::
|
605
|
+
|
606
|
+
other: Target vector as list, string, or column
|
607
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
608
|
+
limit: Number of results to return
|
609
|
+
|
610
|
+
Returns::
|
611
|
+
|
612
|
+
SQLAlchemy function expression for ordering
|
613
|
+
|
614
|
+
Example
|
615
|
+
|
616
|
+
query = session.query(Document).order_by(
|
617
|
+
Document.embedding.most_similar([1, 2, 3])
|
618
|
+
).limit(10)
|
619
|
+
"""
|
620
|
+
if distance_type == "l2":
|
621
|
+
return self.l2_distance(other)
|
622
|
+
elif distance_type == "cosine":
|
623
|
+
return self.cosine_distance(other)
|
624
|
+
elif distance_type == "inner_product":
|
625
|
+
# For inner product, we want descending order (higher is better)
|
626
|
+
return self.inner_product(other).desc()
|
627
|
+
else:
|
628
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
629
|
+
|
630
|
+
|
631
|
+
# Independent distance functions for more flexible API
|
632
|
+
def l2_distance(column: Column, other: Union[List[float], str, Column]) -> func:
|
633
|
+
"""
|
634
|
+
Calculate L2 (Euclidean) distance between vectors.
|
635
|
+
|
636
|
+
This is an independent function that can be used with any column,
|
637
|
+
providing more flexibility than the VectorColumn methods.
|
638
|
+
|
639
|
+
Args::
|
640
|
+
|
641
|
+
column: Vector column to calculate distance from
|
642
|
+
other: Target vector as list, string, or column
|
643
|
+
|
644
|
+
Returns::
|
645
|
+
|
646
|
+
SQLAlchemy function expression
|
647
|
+
|
648
|
+
Example
|
649
|
+
from matrixone.sqlalchemy_ext import l2_distance
|
650
|
+
|
651
|
+
# With list vector
|
652
|
+
result = session.query(Document).filter(
|
653
|
+
l2_distance(Document.embedding, [1, 2, 3]) < 0.5
|
654
|
+
)
|
655
|
+
|
656
|
+
# With string vector
|
657
|
+
result = session.query(Document).filter(
|
658
|
+
l2_distance(Document.embedding, "[1,2,3]") < 0.5
|
659
|
+
)
|
660
|
+
|
661
|
+
# With another column
|
662
|
+
result = session.query(Document).filter(
|
663
|
+
l2_distance(Document.embedding, Document.query_vector) < 0.5
|
664
|
+
)
|
665
|
+
"""
|
666
|
+
if isinstance(other, list):
|
667
|
+
# Convert list to MatrixOne vector format
|
668
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
669
|
+
return func.l2_distance(column, vector_str)
|
670
|
+
elif isinstance(other, str):
|
671
|
+
return func.l2_distance(column, other)
|
672
|
+
else:
|
673
|
+
return func.l2_distance(column, other)
|
674
|
+
|
675
|
+
|
676
|
+
def l2_distance_sq(column: Column, other: Union[List[float], str, Column]) -> func:
|
677
|
+
"""
|
678
|
+
Calculate squared L2 distance between vectors.
|
679
|
+
|
680
|
+
Args::
|
681
|
+
|
682
|
+
column: Vector column to calculate distance from
|
683
|
+
other: Target vector as list, string, or column
|
684
|
+
|
685
|
+
Returns::
|
686
|
+
|
687
|
+
SQLAlchemy function expression
|
688
|
+
|
689
|
+
Example
|
690
|
+
from matrixone.sqlalchemy_ext import l2_distance_sq
|
691
|
+
|
692
|
+
result = session.query(Document).order_by(
|
693
|
+
l2_distance_sq(Document.embedding, [1, 2, 3])
|
694
|
+
)
|
695
|
+
"""
|
696
|
+
if isinstance(other, list):
|
697
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
698
|
+
return func.l2_distance_sq(column, vector_str)
|
699
|
+
elif isinstance(other, str):
|
700
|
+
return func.l2_distance_sq(column, other)
|
701
|
+
else:
|
702
|
+
return func.l2_distance_sq(column, other)
|
703
|
+
|
704
|
+
|
705
|
+
def cosine_distance(column: Column, other: Union[List[float], str, Column]) -> func:
|
706
|
+
"""
|
707
|
+
Calculate cosine distance between vectors.
|
708
|
+
|
709
|
+
Args::
|
710
|
+
|
711
|
+
column: Vector column to calculate distance from
|
712
|
+
other: Target vector as list, string, or column
|
713
|
+
|
714
|
+
Returns::
|
715
|
+
|
716
|
+
SQLAlchemy function expression
|
717
|
+
|
718
|
+
Example
|
719
|
+
from matrixone.sqlalchemy_ext import cosine_distance
|
720
|
+
|
721
|
+
result = session.query(Document).filter(
|
722
|
+
cosine_distance(Document.embedding, [1, 2, 3]) < 0.1
|
723
|
+
)
|
724
|
+
"""
|
725
|
+
if isinstance(other, list):
|
726
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
727
|
+
return func.cosine_distance(column, vector_str)
|
728
|
+
elif isinstance(other, str):
|
729
|
+
return func.cosine_distance(column, other)
|
730
|
+
else:
|
731
|
+
return func.cosine_distance(column, other)
|
732
|
+
|
733
|
+
|
734
|
+
def inner_product(column: Column, other: Union[List[float], str, Column]) -> func:
|
735
|
+
"""
|
736
|
+
Calculate inner product (dot product) between vectors.
|
737
|
+
|
738
|
+
Args::
|
739
|
+
|
740
|
+
column: Vector column to calculate distance from
|
741
|
+
other: Target vector as list, string, or column
|
742
|
+
|
743
|
+
Returns::
|
744
|
+
|
745
|
+
SQLAlchemy function expression
|
746
|
+
|
747
|
+
Example
|
748
|
+
from matrixone.sqlalchemy_ext import inner_product
|
749
|
+
|
750
|
+
result = session.query(Document).order_by(
|
751
|
+
inner_product(Document.embedding, [1, 2, 3]).desc()
|
752
|
+
)
|
753
|
+
"""
|
754
|
+
if isinstance(other, list):
|
755
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
756
|
+
return func.inner_product(column, vector_str)
|
757
|
+
elif isinstance(other, str):
|
758
|
+
return func.inner_product(column, other)
|
759
|
+
else:
|
760
|
+
return func.inner_product(column, other)
|
761
|
+
|
762
|
+
|
763
|
+
def negative_inner_product(column: Column, other: Union[List[float], str, Column]) -> func:
|
764
|
+
"""
|
765
|
+
Calculate negative inner product between vectors.
|
766
|
+
|
767
|
+
Note: This is implemented as -inner_product() since MatrixOne doesn't have native support.
|
768
|
+
|
769
|
+
Args::
|
770
|
+
|
771
|
+
column: Vector column to calculate distance from
|
772
|
+
other: Target vector as list, string, or column
|
773
|
+
|
774
|
+
Returns::
|
775
|
+
|
776
|
+
SQLAlchemy function expression
|
777
|
+
|
778
|
+
Example
|
779
|
+
from matrixone.sqlalchemy_ext import negative_inner_product
|
780
|
+
|
781
|
+
result = session.query(Document).order_by(
|
782
|
+
negative_inner_product(Document.embedding, [1, 2, 3])
|
783
|
+
)
|
784
|
+
"""
|
785
|
+
if isinstance(other, list):
|
786
|
+
vector_str = "[" + ",".join(map(str, other)) + "]"
|
787
|
+
return -func.inner_product(column, vector_str)
|
788
|
+
elif isinstance(other, str):
|
789
|
+
return -func.inner_product(column, other)
|
790
|
+
else:
|
791
|
+
return -func.inner_product(column, other)
|
792
|
+
|
793
|
+
|
794
|
+
# Convenience functions for vector operations
|
795
|
+
def create_vector_column(dimension: int, precision: str = VectorPrecision.F32, **kwargs) -> VectorColumn:
|
796
|
+
"""
|
797
|
+
Create a vector column with distance function support.
|
798
|
+
|
799
|
+
Args::
|
800
|
+
|
801
|
+
dimension: Vector dimension
|
802
|
+
precision: Vector precision (VectorPrecision.F32 or VectorPrecision.F64)
|
803
|
+
**kwargs: Additional column arguments
|
804
|
+
|
805
|
+
Returns::
|
806
|
+
|
807
|
+
VectorColumn instance
|
808
|
+
|
809
|
+
Example
|
810
|
+
class Document(Base):
|
811
|
+
id = Column(Integer, primary_key=True)
|
812
|
+
embedding = create_vector_column(128, precision=VectorPrecision.F32)
|
813
|
+
description = Column(String(500))
|
814
|
+
"""
|
815
|
+
if precision == VectorPrecision.F32:
|
816
|
+
return VectorColumn(Vectorf32(dimension=dimension), **kwargs)
|
817
|
+
elif precision == VectorPrecision.F64:
|
818
|
+
return VectorColumn(Vectorf64(dimension=dimension), **kwargs)
|
819
|
+
else:
|
820
|
+
raise ValueError(f"Precision must be '{VectorPrecision.F32}' or '{VectorPrecision.F64}'")
|
821
|
+
|
822
|
+
|
823
|
+
def vector_similarity_search(
|
824
|
+
column: Column,
|
825
|
+
query_vector: Union[List[float], str, Column],
|
826
|
+
distance_type: str = "l2",
|
827
|
+
max_distance: Optional[float] = None,
|
828
|
+
) -> func:
|
829
|
+
"""
|
830
|
+
Create a similarity search expression with optional distance filtering.
|
831
|
+
|
832
|
+
This is a convenience function that combines distance calculation with filtering.
|
833
|
+
|
834
|
+
Args::
|
835
|
+
|
836
|
+
column: Vector column to search in
|
837
|
+
query_vector: Query vector as list, string, or column
|
838
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
839
|
+
max_distance: Optional maximum distance threshold
|
840
|
+
|
841
|
+
Returns::
|
842
|
+
|
843
|
+
SQLAlchemy function expression for distance calculation
|
844
|
+
|
845
|
+
Example
|
846
|
+
from matrixone.sqlalchemy_ext import vector_similarity_search
|
847
|
+
|
848
|
+
# For ordering by similarity
|
849
|
+
result = session.query(Document).order_by(
|
850
|
+
vector_similarity_search(Document.embedding, [1, 2, 3])
|
851
|
+
)
|
852
|
+
|
853
|
+
# For filtering by distance
|
854
|
+
result = session.query(Document).filter(
|
855
|
+
vector_similarity_search(Document.embedding, [1, 2, 3], max_distance=1.0) < 1.0
|
856
|
+
)
|
857
|
+
"""
|
858
|
+
if distance_type == "l2":
|
859
|
+
distance_expr = l2_distance(column, query_vector)
|
860
|
+
elif distance_type == "cosine":
|
861
|
+
distance_expr = cosine_distance(column, query_vector)
|
862
|
+
elif distance_type == "inner_product":
|
863
|
+
distance_expr = inner_product(column, query_vector)
|
864
|
+
else:
|
865
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
866
|
+
|
867
|
+
return distance_expr
|
868
|
+
|
869
|
+
|
870
|
+
def within_distance(
|
871
|
+
column: Column,
|
872
|
+
query_vector: Union[List[float], str, Column],
|
873
|
+
max_distance: float,
|
874
|
+
distance_type: str = "l2",
|
875
|
+
) -> func:
|
876
|
+
"""
|
877
|
+
Create a distance threshold filter expression.
|
878
|
+
|
879
|
+
Args::
|
880
|
+
|
881
|
+
column: Vector column to filter
|
882
|
+
query_vector: Query vector as list, string, or column
|
883
|
+
max_distance: Maximum distance threshold
|
884
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
885
|
+
|
886
|
+
Returns::
|
887
|
+
|
888
|
+
SQLAlchemy boolean expression
|
889
|
+
|
890
|
+
Example
|
891
|
+
from matrixone.sqlalchemy_ext import within_distance
|
892
|
+
|
893
|
+
result = session.query(Document).filter(
|
894
|
+
within_distance(Document.embedding, [1, 2, 3], max_distance=1.0)
|
895
|
+
)
|
896
|
+
"""
|
897
|
+
if distance_type == "l2":
|
898
|
+
return l2_distance(column, query_vector) < max_distance
|
899
|
+
elif distance_type == "cosine":
|
900
|
+
return cosine_distance(column, query_vector) < max_distance
|
901
|
+
elif distance_type == "inner_product":
|
902
|
+
# For inner product, higher values are more similar
|
903
|
+
return inner_product(column, query_vector) > max_distance
|
904
|
+
else:
|
905
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
906
|
+
|
907
|
+
|
908
|
+
def most_similar(column: Column, query_vector: Union[List[float], str, Column], distance_type: str = "l2") -> func:
|
909
|
+
"""
|
910
|
+
Create an expression for finding most similar vectors.
|
911
|
+
|
912
|
+
Args::
|
913
|
+
|
914
|
+
column: Vector column to search in
|
915
|
+
query_vector: Query vector as list, string, or column
|
916
|
+
distance_type: Type of distance calculation ("l2", "cosine", "inner_product")
|
917
|
+
|
918
|
+
Returns::
|
919
|
+
|
920
|
+
SQLAlchemy function expression for ordering
|
921
|
+
|
922
|
+
Example
|
923
|
+
from matrixone.sqlalchemy_ext import most_similar
|
924
|
+
|
925
|
+
result = session.query(Document).order_by(
|
926
|
+
most_similar(Document.embedding, [1, 2, 3])
|
927
|
+
).limit(10)
|
928
|
+
"""
|
929
|
+
if distance_type == "l2":
|
930
|
+
return l2_distance(column, query_vector)
|
931
|
+
elif distance_type == "cosine":
|
932
|
+
return cosine_distance(column, query_vector)
|
933
|
+
elif distance_type == "inner_product":
|
934
|
+
# For inner product, we want descending order (higher is better)
|
935
|
+
return inner_product(column, query_vector).desc()
|
936
|
+
else:
|
937
|
+
raise ValueError(f"Unsupported distance type: {distance_type}")
|
938
|
+
|
939
|
+
|
940
|
+
def vector_distance_functions():
|
941
|
+
"""
|
942
|
+
Return available vector distance functions.
|
943
|
+
|
944
|
+
Returns::
|
945
|
+
|
946
|
+
List of function names
|
947
|
+
"""
|
948
|
+
return ["l2_distance", "l2_distance_sq", "cosine_distance"]
|