matrixone-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/__init__.py +155 -0
- matrixone/account.py +723 -0
- matrixone/async_client.py +3913 -0
- matrixone/async_metadata_manager.py +311 -0
- matrixone/async_orm.py +123 -0
- matrixone/async_vector_index_manager.py +633 -0
- matrixone/base_client.py +208 -0
- matrixone/client.py +4672 -0
- matrixone/config.py +452 -0
- matrixone/connection_hooks.py +286 -0
- matrixone/exceptions.py +89 -0
- matrixone/logger.py +782 -0
- matrixone/metadata.py +820 -0
- matrixone/moctl.py +219 -0
- matrixone/orm.py +2277 -0
- matrixone/pitr.py +646 -0
- matrixone/pubsub.py +771 -0
- matrixone/restore.py +411 -0
- matrixone/search_vector_index.py +1176 -0
- matrixone/snapshot.py +550 -0
- matrixone/sql_builder.py +844 -0
- matrixone/sqlalchemy_ext/__init__.py +161 -0
- matrixone/sqlalchemy_ext/adapters.py +163 -0
- matrixone/sqlalchemy_ext/dialect.py +534 -0
- matrixone/sqlalchemy_ext/fulltext_index.py +895 -0
- matrixone/sqlalchemy_ext/fulltext_search.py +1686 -0
- matrixone/sqlalchemy_ext/hnsw_config.py +194 -0
- matrixone/sqlalchemy_ext/ivf_config.py +252 -0
- matrixone/sqlalchemy_ext/table_builder.py +351 -0
- matrixone/sqlalchemy_ext/vector_index.py +1721 -0
- matrixone/sqlalchemy_ext/vector_type.py +948 -0
- matrixone/version.py +580 -0
- matrixone_python_sdk-0.1.0.dist-info/METADATA +706 -0
- matrixone_python_sdk-0.1.0.dist-info/RECORD +122 -0
- matrixone_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- matrixone_python_sdk-0.1.0.dist-info/entry_points.txt +5 -0
- matrixone_python_sdk-0.1.0.dist-info/licenses/LICENSE +200 -0
- matrixone_python_sdk-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +19 -0
- tests/offline/__init__.py +20 -0
- tests/offline/conftest.py +77 -0
- tests/offline/test_account.py +703 -0
- tests/offline/test_async_client_query_comprehensive.py +1218 -0
- tests/offline/test_basic.py +54 -0
- tests/offline/test_case_sensitivity.py +227 -0
- tests/offline/test_connection_hooks_offline.py +287 -0
- tests/offline/test_dialect_schema_handling.py +609 -0
- tests/offline/test_explain_methods.py +346 -0
- tests/offline/test_filter_logical_in.py +237 -0
- tests/offline/test_fulltext_search_comprehensive.py +795 -0
- tests/offline/test_ivf_config.py +249 -0
- tests/offline/test_join_methods.py +281 -0
- tests/offline/test_join_sqlalchemy_compatibility.py +276 -0
- tests/offline/test_logical_in_method.py +237 -0
- tests/offline/test_matrixone_version_parsing.py +264 -0
- tests/offline/test_metadata_offline.py +557 -0
- tests/offline/test_moctl.py +300 -0
- tests/offline/test_moctl_simple.py +251 -0
- tests/offline/test_model_support_offline.py +359 -0
- tests/offline/test_model_support_simple.py +225 -0
- tests/offline/test_pinecone_filter_offline.py +377 -0
- tests/offline/test_pitr.py +585 -0
- tests/offline/test_pubsub.py +712 -0
- tests/offline/test_query_update.py +283 -0
- tests/offline/test_restore.py +445 -0
- tests/offline/test_snapshot_comprehensive.py +384 -0
- tests/offline/test_sql_escaping_edge_cases.py +551 -0
- tests/offline/test_sqlalchemy_integration.py +382 -0
- tests/offline/test_sqlalchemy_vector_integration.py +434 -0
- tests/offline/test_table_builder.py +198 -0
- tests/offline/test_unified_filter.py +398 -0
- tests/offline/test_unified_transaction.py +495 -0
- tests/offline/test_vector_index.py +238 -0
- tests/offline/test_vector_operations.py +688 -0
- tests/offline/test_vector_type.py +174 -0
- tests/offline/test_version_core.py +328 -0
- tests/offline/test_version_management.py +372 -0
- tests/offline/test_version_standalone.py +652 -0
- tests/online/__init__.py +20 -0
- tests/online/conftest.py +216 -0
- tests/online/test_account_management.py +194 -0
- tests/online/test_advanced_features.py +344 -0
- tests/online/test_async_client_interfaces.py +330 -0
- tests/online/test_async_client_online.py +285 -0
- tests/online/test_async_model_insert_online.py +293 -0
- tests/online/test_async_orm_online.py +300 -0
- tests/online/test_async_simple_query_online.py +802 -0
- tests/online/test_async_transaction_simple_query.py +300 -0
- tests/online/test_basic_connection.py +130 -0
- tests/online/test_client_online.py +238 -0
- tests/online/test_config.py +90 -0
- tests/online/test_config_validation.py +123 -0
- tests/online/test_connection_hooks_new_online.py +217 -0
- tests/online/test_dialect_schema_handling_online.py +331 -0
- tests/online/test_filter_logical_in_online.py +374 -0
- tests/online/test_fulltext_comprehensive.py +1773 -0
- tests/online/test_fulltext_label_online.py +433 -0
- tests/online/test_fulltext_search_online.py +842 -0
- tests/online/test_ivf_stats_online.py +506 -0
- tests/online/test_logger_integration.py +311 -0
- tests/online/test_matrixone_query_orm.py +540 -0
- tests/online/test_metadata_online.py +579 -0
- tests/online/test_model_insert_online.py +255 -0
- tests/online/test_mysql_driver_validation.py +213 -0
- tests/online/test_orm_advanced_features.py +2022 -0
- tests/online/test_orm_cte_integration.py +269 -0
- tests/online/test_orm_online.py +270 -0
- tests/online/test_pinecone_filter.py +708 -0
- tests/online/test_pubsub_operations.py +352 -0
- tests/online/test_query_methods.py +225 -0
- tests/online/test_query_update_online.py +433 -0
- tests/online/test_search_vector_index.py +557 -0
- tests/online/test_simple_fulltext_online.py +915 -0
- tests/online/test_snapshot_comprehensive.py +998 -0
- tests/online/test_sqlalchemy_engine_integration.py +336 -0
- tests/online/test_sqlalchemy_integration.py +425 -0
- tests/online/test_transaction_contexts.py +1219 -0
- tests/online/test_transaction_insert_methods.py +356 -0
- tests/online/test_transaction_query_methods.py +288 -0
- tests/online/test_unified_filter_online.py +529 -0
- tests/online/test_vector_comprehensive.py +706 -0
- tests/online/test_version_management.py +291 -0
@@ -0,0 +1,706 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: matrixone-python-sdk
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A comprehensive Python SDK for MatrixOne database operations with vector search, fulltext search, and advanced features
|
5
|
+
Home-page: https://github.com/matrixorigin/matrixone
|
6
|
+
Author: MatrixOne Team
|
7
|
+
Author-email: MatrixOne Team <dev@matrixone.io>
|
8
|
+
Maintainer-email: MatrixOne Team <dev@matrixone.io>
|
9
|
+
License: Apache-2.0
|
10
|
+
Project-URL: Homepage, https://github.com/matrixorigin/matrixone
|
11
|
+
Project-URL: Documentation, https://matrixone.readthedocs.io/
|
12
|
+
Project-URL: Repository, https://github.com/matrixorigin/matrixone
|
13
|
+
Project-URL: Issues, https://github.com/matrixorigin/matrixone/issues
|
14
|
+
Project-URL: Changelog, https://github.com/matrixorigin/matrixone/blob/main/clients/python/CHANGELOG.md
|
15
|
+
Keywords: matrixone,database,vector,search,sqlalchemy,python
|
16
|
+
Classifier: Development Status :: 4 - Beta
|
17
|
+
Classifier: Intended Audience :: Developers
|
18
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
19
|
+
Classifier: Operating System :: OS Independent
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
21
|
+
Classifier: Programming Language :: Python :: 3.8
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
26
|
+
Classifier: Topic :: Database
|
27
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
|
+
Requires-Python: >=3.8
|
29
|
+
Description-Content-Type: text/markdown
|
30
|
+
License-File: LICENSE
|
31
|
+
Requires-Dist: PyMySQL>=1.0.0
|
32
|
+
Requires-Dist: aiomysql>=0.2.0
|
33
|
+
Requires-Dist: SQLAlchemy<3.0.0,>=1.4.0
|
34
|
+
Requires-Dist: typing-extensions>=4.0.0
|
35
|
+
Requires-Dist: python-dateutil>=2.8.0
|
36
|
+
Requires-Dist: numpy>=1.20.0
|
37
|
+
Provides-Extra: dev
|
38
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
39
|
+
Requires-Dist: pytest-asyncio>=0.18.0; extra == "dev"
|
40
|
+
Requires-Dist: pytest-cov>=3.0; extra == "dev"
|
41
|
+
Requires-Dist: Faker>=10.0.0; extra == "dev"
|
42
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
43
|
+
Requires-Dist: flake8>=4.0; extra == "dev"
|
44
|
+
Requires-Dist: mypy>=0.950; extra == "dev"
|
45
|
+
Requires-Dist: isort>=5.0; extra == "dev"
|
46
|
+
Requires-Dist: sphinx>=4.0; extra == "dev"
|
47
|
+
Requires-Dist: sphinx-rtd-theme>=1.0; extra == "dev"
|
48
|
+
Requires-Dist: sphinx-autobuild>=2021.3.14; extra == "dev"
|
49
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
50
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
51
|
+
Requires-Dist: tox>=3.24.0; extra == "dev"
|
52
|
+
Provides-Extra: docs
|
53
|
+
Requires-Dist: sphinx>=4.0; extra == "docs"
|
54
|
+
Requires-Dist: sphinx-rtd-theme>=1.0; extra == "docs"
|
55
|
+
Requires-Dist: sphinx-autobuild>=2021.3.14; extra == "docs"
|
56
|
+
Provides-Extra: test
|
57
|
+
Requires-Dist: pytest>=6.0; extra == "test"
|
58
|
+
Requires-Dist: pytest-asyncio>=0.18.0; extra == "test"
|
59
|
+
Requires-Dist: pytest-cov>=3.0; extra == "test"
|
60
|
+
Requires-Dist: Faker>=10.0.0; extra == "test"
|
61
|
+
Dynamic: author
|
62
|
+
Dynamic: home-page
|
63
|
+
Dynamic: license-file
|
64
|
+
Dynamic: requires-python
|
65
|
+
|
66
|
+
# MatrixOne Python SDK
|
67
|
+
|
68
|
+
[](https://badge.fury.io/py/matrixone-python-sdk)
|
69
|
+
[](https://pypi.org/project/matrixone-python-sdk/)
|
70
|
+
[](https://matrixone.readthedocs.io/en/latest/?badge=latest)
|
71
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
72
|
+
|
73
|
+
A comprehensive, high-level Python SDK for MatrixOne that provides SQLAlchemy-like interface for database operations, vector similarity search, fulltext search, snapshot management, PITR, restore operations, table cloning, and more.
|
74
|
+
|
75
|
+
---
|
76
|
+
|
77
|
+
## π Documentation
|
78
|
+
|
79
|
+
**[π Complete Documentation on ReadTheDocs](https://matrixone.readthedocs.io/)** β
|
80
|
+
|
81
|
+
**Quick Links:**
|
82
|
+
- π [Quick Start Guide](https://matrixone.readthedocs.io/en/latest/quickstart.html)
|
83
|
+
- π§ [Vector Search & IVF Index Monitoring](https://matrixone.readthedocs.io/en/latest/vector_guide.html)
|
84
|
+
- π [Best Practices](https://matrixone.readthedocs.io/en/latest/best_practices.html)
|
85
|
+
- π [API Reference](https://matrixone.readthedocs.io/en/latest/api/index.html)
|
86
|
+
|
87
|
+
---
|
88
|
+
|
89
|
+
## β¨ Features
|
90
|
+
|
91
|
+
- π **High Performance**: Optimized for MatrixOne database operations with connection pooling
|
92
|
+
- π **Async Support**: Full async/await support with AsyncClient for non-blocking operations
|
93
|
+
- π§ **Vector Search**: Advanced vector similarity search with HNSW and IVF indexing
|
94
|
+
- Support for f32 and f64 precision vectors
|
95
|
+
- Multiple distance metrics (L2, Cosine, Inner Product)
|
96
|
+
- β **IVF Index Health Monitoring** with `get_ivf_stats()` - Critical for production!
|
97
|
+
- High-performance indexing for AI/ML applications
|
98
|
+
- π **Fulltext Search**: Powerful fulltext indexing and search with BM25 and TF-IDF
|
99
|
+
- Natural language and boolean search modes
|
100
|
+
- Multi-column indexes with relevance scoring
|
101
|
+
- π **Metadata Analysis**: Table and column metadata analysis with statistics
|
102
|
+
- πΈ **Snapshot Management**: Create and manage database snapshots at multiple levels
|
103
|
+
- β° **Point-in-Time Recovery**: PITR functionality for precise data recovery
|
104
|
+
- π **Table Cloning**: Clone databases and tables efficiently
|
105
|
+
- π₯ **Account Management**: Comprehensive user and role management
|
106
|
+
- π **Pub/Sub**: Real-time publication and subscription support
|
107
|
+
- π§ **Version Management**: Automatic backend version detection and compatibility
|
108
|
+
- π‘οΈ **Type Safety**: Full type hints support with comprehensive documentation
|
109
|
+
- π **SQLAlchemy Integration**: Seamless SQLAlchemy ORM integration with enhanced features
|
110
|
+
|
111
|
+
## π Installation
|
112
|
+
|
113
|
+
### Using pip (Recommended)
|
114
|
+
|
115
|
+
```bash
|
116
|
+
pip install matrixone-python-sdk
|
117
|
+
```
|
118
|
+
|
119
|
+
### Install from test.pypi (Latest Pre-release)
|
120
|
+
|
121
|
+
```bash
|
122
|
+
pip install \
|
123
|
+
--index-url https://test.pypi.org/simple/ \
|
124
|
+
--extra-index-url https://pypi.org/simple/ \
|
125
|
+
matrixone-python-sdk
|
126
|
+
```
|
127
|
+
|
128
|
+
**Note**: The `--extra-index-url` is required to install dependencies from the official PyPI.
|
129
|
+
|
130
|
+
### Using Virtual Environment (Best Practice)
|
131
|
+
|
132
|
+
```bash
|
133
|
+
# Create virtual environment
|
134
|
+
python -m venv venv
|
135
|
+
|
136
|
+
# Activate virtual environment
|
137
|
+
# On macOS/Linux:
|
138
|
+
source venv/bin/activate
|
139
|
+
# On Windows:
|
140
|
+
# venv\Scripts\activate
|
141
|
+
|
142
|
+
# Install MatrixOne SDK
|
143
|
+
pip install matrixone-python-sdk
|
144
|
+
|
145
|
+
# Verify installation
|
146
|
+
python -c "import matrixone; print('MatrixOne SDK installed successfully')"
|
147
|
+
```
|
148
|
+
|
149
|
+
### Using Conda
|
150
|
+
|
151
|
+
```bash
|
152
|
+
# Create conda environment
|
153
|
+
conda create -n matrixone python=3.10
|
154
|
+
conda activate matrixone
|
155
|
+
|
156
|
+
# Install MatrixOne SDK
|
157
|
+
pip install matrixone-python-sdk
|
158
|
+
```
|
159
|
+
|
160
|
+
## Quick Start
|
161
|
+
|
162
|
+
### Basic Usage
|
163
|
+
|
164
|
+
```python
|
165
|
+
from matrixone import Client
|
166
|
+
|
167
|
+
# Create and connect to MatrixOne
|
168
|
+
client = Client()
|
169
|
+
client.connect(
|
170
|
+
host='localhost',
|
171
|
+
port=6001,
|
172
|
+
user='root',
|
173
|
+
password='111',
|
174
|
+
database='test'
|
175
|
+
)
|
176
|
+
|
177
|
+
# Execute queries
|
178
|
+
result = client.execute("SELECT 1 as test")
|
179
|
+
print(result.fetchall())
|
180
|
+
|
181
|
+
# Get backend version (auto-detected)
|
182
|
+
version = client.get_backend_version()
|
183
|
+
print(f"MatrixOne version: {version}")
|
184
|
+
|
185
|
+
client.disconnect()
|
186
|
+
```
|
187
|
+
|
188
|
+
### Async Usage
|
189
|
+
|
190
|
+
```python
|
191
|
+
import asyncio
|
192
|
+
from matrixone import AsyncClient
|
193
|
+
|
194
|
+
async def main():
|
195
|
+
client = AsyncClient()
|
196
|
+
await client.connect(
|
197
|
+
host='localhost',
|
198
|
+
port=6001,
|
199
|
+
user='root',
|
200
|
+
password='111',
|
201
|
+
database='test'
|
202
|
+
)
|
203
|
+
|
204
|
+
result = await client.execute("SELECT 1 as test")
|
205
|
+
print(result.fetchall())
|
206
|
+
|
207
|
+
await client.disconnect()
|
208
|
+
|
209
|
+
asyncio.run(main())
|
210
|
+
```
|
211
|
+
|
212
|
+
### Snapshot Management
|
213
|
+
|
214
|
+
```python
|
215
|
+
# Create a snapshot
|
216
|
+
snapshot = client.snapshots.create(
|
217
|
+
'my_snapshot',
|
218
|
+
'cluster',
|
219
|
+
description='Backup before migration'
|
220
|
+
)
|
221
|
+
|
222
|
+
# List snapshots
|
223
|
+
snapshots = client.snapshots.list()
|
224
|
+
for snap in snapshots:
|
225
|
+
print(f"Snapshot: {snap.name}, Created: {snap.created_at}")
|
226
|
+
|
227
|
+
# Clone database from snapshot
|
228
|
+
client.clone.clone_database(
|
229
|
+
'new_database',
|
230
|
+
'old_database',
|
231
|
+
snapshot_name='my_snapshot'
|
232
|
+
)
|
233
|
+
```
|
234
|
+
|
235
|
+
### Version Management
|
236
|
+
|
237
|
+
```python
|
238
|
+
# Check if feature is available
|
239
|
+
if client.is_feature_available('snapshot_creation'):
|
240
|
+
snapshot = client.snapshots.create('my_snapshot', 'cluster')
|
241
|
+
else:
|
242
|
+
hint = client.get_version_hint('snapshot_creation')
|
243
|
+
print(f"Feature not available: {hint}")
|
244
|
+
|
245
|
+
# Check version compatibility
|
246
|
+
if client.check_version_compatibility('3.0.0', '>='):
|
247
|
+
print("Backend supports 3.0.0+ features")
|
248
|
+
```
|
249
|
+
|
250
|
+
## MatrixOne Version Support
|
251
|
+
|
252
|
+
The SDK automatically detects MatrixOne backend versions and handles compatibility:
|
253
|
+
|
254
|
+
- **Development Version**: `8.0.30-MatrixOne-v` β `999.0.0` (highest priority)
|
255
|
+
- **Release Version**: `8.0.30-MatrixOne-v3.0.0` β `3.0.0`
|
256
|
+
- **Legacy Format**: `MatrixOne 3.0.1` β `3.0.1`
|
257
|
+
|
258
|
+
```python
|
259
|
+
# Check if running development version
|
260
|
+
if client.is_development_version():
|
261
|
+
print("Running development version - all features available")
|
262
|
+
else:
|
263
|
+
print(f"Running release version: {client.get_backend_version()}")
|
264
|
+
```
|
265
|
+
|
266
|
+
## Advanced Features
|
267
|
+
|
268
|
+
### PITR (Point-in-Time Recovery)
|
269
|
+
|
270
|
+
```python
|
271
|
+
# Create PITR for cluster
|
272
|
+
pitr = client.pitr.create_cluster_pitr(
|
273
|
+
'cluster_pitr',
|
274
|
+
range_value=7,
|
275
|
+
range_unit='d'
|
276
|
+
)
|
277
|
+
|
278
|
+
# Restore cluster from snapshot
|
279
|
+
client.restore.restore_cluster('my_snapshot')
|
280
|
+
```
|
281
|
+
|
282
|
+
### Account Management
|
283
|
+
|
284
|
+
```python
|
285
|
+
from matrixone.account import AccountManager
|
286
|
+
|
287
|
+
# Initialize account manager
|
288
|
+
account_manager = AccountManager(client)
|
289
|
+
|
290
|
+
# Create user
|
291
|
+
user = account_manager.create_user('newuser', 'password123')
|
292
|
+
print(f"Created user: {user.name}")
|
293
|
+
|
294
|
+
# Create role
|
295
|
+
role = account_manager.create_role('analyst')
|
296
|
+
print(f"Created role: {role.name}")
|
297
|
+
|
298
|
+
# Grant privileges on specific table (optional)
|
299
|
+
# Note: table must exist first
|
300
|
+
account_manager.grant_privilege(
|
301
|
+
'SELECT', # privilege
|
302
|
+
'TABLE', # object_type
|
303
|
+
'users', # object_name (database.table format)
|
304
|
+
to_role='analyst'
|
305
|
+
)
|
306
|
+
|
307
|
+
# Grant role to user
|
308
|
+
account_manager.grant_role('analyst', 'newuser')
|
309
|
+
print(f"Granted role to user")
|
310
|
+
|
311
|
+
# List users
|
312
|
+
users = account_manager.list_users()
|
313
|
+
for user in users:
|
314
|
+
print(f"User: {user.name}")
|
315
|
+
```
|
316
|
+
|
317
|
+
### Vector Search Operations
|
318
|
+
|
319
|
+
```python
|
320
|
+
from matrixone import Client
|
321
|
+
from matrixone.sqlalchemy_ext import create_vector_column
|
322
|
+
from matrixone.orm import declarative_base
|
323
|
+
from sqlalchemy import Column, BigInteger, String, Text
|
324
|
+
import numpy as np
|
325
|
+
|
326
|
+
# Create client and connect
|
327
|
+
client = Client()
|
328
|
+
client.connect(
|
329
|
+
host='localhost',
|
330
|
+
port=6001,
|
331
|
+
user='root',
|
332
|
+
password='111',
|
333
|
+
database='test'
|
334
|
+
)
|
335
|
+
|
336
|
+
# Define vector table using MatrixOne ORM
|
337
|
+
Base = declarative_base()
|
338
|
+
|
339
|
+
class Document(Base):
|
340
|
+
__tablename__ = 'documents'
|
341
|
+
# IMPORTANT: HNSW index requires BigInteger (BIGINT) primary key
|
342
|
+
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
343
|
+
title = Column(String(200))
|
344
|
+
content = Column(Text)
|
345
|
+
embedding = create_vector_column(384, precision='f32')
|
346
|
+
|
347
|
+
# Create table using client API (not Base.metadata.create_all)
|
348
|
+
client.create_table(Document)
|
349
|
+
|
350
|
+
# Create HNSW index using SDK (not SQL)
|
351
|
+
client.vector_ops.enable_hnsw()
|
352
|
+
client.vector_ops.create_hnsw(
|
353
|
+
'documents', # table name or model - positional argument
|
354
|
+
name='idx_embedding',
|
355
|
+
column='embedding',
|
356
|
+
m=16,
|
357
|
+
ef_construction=200
|
358
|
+
)
|
359
|
+
|
360
|
+
# Insert vector data using client API
|
361
|
+
client.insert(Document, {
|
362
|
+
'title': 'Machine Learning Guide',
|
363
|
+
'content': 'Comprehensive ML tutorial...',
|
364
|
+
'embedding': np.random.rand(384).tolist()
|
365
|
+
})
|
366
|
+
|
367
|
+
# Search similar documents using SDK
|
368
|
+
query_vector = np.random.rand(384).tolist()
|
369
|
+
results = client.vector_ops.similarity_search(
|
370
|
+
'documents', # table name or model - positional argument
|
371
|
+
vector_column='embedding',
|
372
|
+
query_vector=query_vector,
|
373
|
+
limit=5,
|
374
|
+
distance_type='cosine'
|
375
|
+
)
|
376
|
+
|
377
|
+
for row in results:
|
378
|
+
print(f"Document: {row[1]}, Similarity: {row[-1]}")
|
379
|
+
|
380
|
+
# Cleanup
|
381
|
+
client.drop_table(Document) # Use client API
|
382
|
+
client.disconnect()
|
383
|
+
```
|
384
|
+
|
385
|
+
### β IVF Index Health Monitoring (Production Critical)
|
386
|
+
|
387
|
+
**Monitor your IVF indexes to ensure optimal performance!**
|
388
|
+
|
389
|
+
```python
|
390
|
+
from matrixone import Client
|
391
|
+
import numpy as np
|
392
|
+
|
393
|
+
client = Client()
|
394
|
+
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
395
|
+
|
396
|
+
# After creating IVF index and inserting data...
|
397
|
+
|
398
|
+
# Get IVF index statistics
|
399
|
+
stats = client.vector_ops.get_ivf_stats("documents", "embedding")
|
400
|
+
|
401
|
+
# Analyze index balance
|
402
|
+
counts = stats['distribution']['centroid_count']
|
403
|
+
total_centroids = len(counts)
|
404
|
+
total_vectors = sum(counts)
|
405
|
+
min_count = min(counts) if counts else 0
|
406
|
+
max_count = max(counts) if counts else 0
|
407
|
+
balance_ratio = max_count / min_count if min_count > 0 else float('inf')
|
408
|
+
|
409
|
+
print(f"π IVF Index Health Report:")
|
410
|
+
print(f" - Total centroids: {total_centroids}")
|
411
|
+
print(f" - Total vectors: {total_vectors}")
|
412
|
+
print(f" - Balance ratio: {balance_ratio:.2f}")
|
413
|
+
print(f" - Min vectors in centroid: {min_count}")
|
414
|
+
print(f" - Max vectors in centroid: {max_count}")
|
415
|
+
|
416
|
+
# Check if index needs rebuilding
|
417
|
+
if balance_ratio > 2.5:
|
418
|
+
print("β οΈ WARNING: Index is imbalanced and needs rebuilding!")
|
419
|
+
print(" Rebuild the index for optimal performance:")
|
420
|
+
|
421
|
+
# Rebuild process
|
422
|
+
client.vector_ops.drop("documents", "idx_embedding")
|
423
|
+
client.vector_ops.create_ivf(
|
424
|
+
"documents",
|
425
|
+
name="idx_embedding",
|
426
|
+
column="embedding",
|
427
|
+
lists=100
|
428
|
+
)
|
429
|
+
print("β
Index rebuilt successfully")
|
430
|
+
else:
|
431
|
+
print("β
Index is healthy and well-balanced")
|
432
|
+
|
433
|
+
client.disconnect()
|
434
|
+
```
|
435
|
+
|
436
|
+
**Why IVF Stats Matter:**
|
437
|
+
- π― **Performance**: Unbalanced indexes lead to slow searches
|
438
|
+
- π **Load Distribution**: Identify hot spots and imbalances
|
439
|
+
- π **Rebuild Timing**: Know when to rebuild for optimal performance
|
440
|
+
- π **Capacity Planning**: Understand data distribution patterns
|
441
|
+
|
442
|
+
**When to Rebuild:**
|
443
|
+
- Balance ratio > 2.5 (moderate imbalance)
|
444
|
+
- Balance ratio > 3.0 (severe imbalance - rebuild immediately)
|
445
|
+
- After bulk inserts (>20% of data)
|
446
|
+
- Performance degradation in searches
|
447
|
+
|
448
|
+
### Fulltext Search Operations
|
449
|
+
|
450
|
+
```python
|
451
|
+
from matrixone import Client
|
452
|
+
from matrixone.sqlalchemy_ext.fulltext_search import boolean_match
|
453
|
+
from matrixone.orm import declarative_base
|
454
|
+
from sqlalchemy import Column, Integer, String, Text
|
455
|
+
|
456
|
+
# Create client and connect
|
457
|
+
client = Client()
|
458
|
+
client.connect(
|
459
|
+
host='localhost',
|
460
|
+
port=6001,
|
461
|
+
user='root',
|
462
|
+
password='111',
|
463
|
+
database='test'
|
464
|
+
)
|
465
|
+
|
466
|
+
# Define model using MatrixOne ORM
|
467
|
+
Base = declarative_base()
|
468
|
+
|
469
|
+
class Article(Base):
|
470
|
+
__tablename__ = 'articles'
|
471
|
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
472
|
+
title = Column(String(200), nullable=False)
|
473
|
+
content = Column(Text, nullable=False)
|
474
|
+
category = Column(String(100))
|
475
|
+
|
476
|
+
# Create table using client API (not Base.metadata.create_all)
|
477
|
+
client.create_table(Article)
|
478
|
+
|
479
|
+
# Insert some data using client API
|
480
|
+
articles = [
|
481
|
+
{'title': 'Machine Learning Guide',
|
482
|
+
'content': 'Comprehensive machine learning tutorial...',
|
483
|
+
'category': 'AI'},
|
484
|
+
{'title': 'Python Programming',
|
485
|
+
'content': 'Learn Python programming basics',
|
486
|
+
'category': 'Programming'},
|
487
|
+
]
|
488
|
+
client.batch_insert(Article, articles)
|
489
|
+
|
490
|
+
# Create fulltext index using SDK (not SQL)
|
491
|
+
client.fulltext_index.create(
|
492
|
+
'articles', # table name - positional argument
|
493
|
+
name='ftidx_content',
|
494
|
+
columns=['title', 'content']
|
495
|
+
)
|
496
|
+
|
497
|
+
# Boolean search with encourage (like natural language)
|
498
|
+
results = client.query(
|
499
|
+
Article.title,
|
500
|
+
Article.content,
|
501
|
+
boolean_match('title', 'content').encourage('machine learning tutorial')
|
502
|
+
).execute()
|
503
|
+
|
504
|
+
# Boolean search with must/should operators
|
505
|
+
results = client.query(
|
506
|
+
Article.title,
|
507
|
+
Article.content,
|
508
|
+
boolean_match('title', 'content')
|
509
|
+
.must('machine')
|
510
|
+
.must('learning')
|
511
|
+
.must_not('basics')
|
512
|
+
).execute()
|
513
|
+
|
514
|
+
# Results is a ResultSet object
|
515
|
+
for row in results.rows:
|
516
|
+
print(f"Title: {row[0]}, Content: {row[1][:50]}...")
|
517
|
+
|
518
|
+
# Cleanup
|
519
|
+
client.drop_table(Article) # Use client API
|
520
|
+
client.disconnect()
|
521
|
+
```
|
522
|
+
|
523
|
+
### Metadata Analysis
|
524
|
+
|
525
|
+
```python
|
526
|
+
from matrixone import Client
|
527
|
+
|
528
|
+
# Create client and connect
|
529
|
+
client = Client()
|
530
|
+
client.connect(
|
531
|
+
host='localhost',
|
532
|
+
port=6001,
|
533
|
+
user='root',
|
534
|
+
password='111',
|
535
|
+
database='test'
|
536
|
+
)
|
537
|
+
|
538
|
+
# Analyze table metadata - returns structured MetadataRow objects
|
539
|
+
metadata_rows = client.metadata.scan(
|
540
|
+
dbname='test',
|
541
|
+
tablename='documents',
|
542
|
+
columns='*' # Get all columns
|
543
|
+
)
|
544
|
+
|
545
|
+
for row in metadata_rows:
|
546
|
+
print(f"Column: {row.col_name}")
|
547
|
+
print(f" Rows count: {row.rows_cnt}")
|
548
|
+
print(f" Null count: {row.null_cnt}")
|
549
|
+
print(f" Size: {row.origin_size}")
|
550
|
+
|
551
|
+
# Get table brief statistics
|
552
|
+
brief_stats = client.metadata.get_table_brief_stats(
|
553
|
+
dbname='test',
|
554
|
+
tablename='documents'
|
555
|
+
)
|
556
|
+
|
557
|
+
table_stats = brief_stats['documents']
|
558
|
+
print(f"Total rows: {table_stats['row_cnt']}")
|
559
|
+
print(f"Total nulls: {table_stats['null_cnt']}")
|
560
|
+
print(f"Original size: {table_stats['original_size']}")
|
561
|
+
print(f"Compressed size: {table_stats['compress_size']}")
|
562
|
+
|
563
|
+
client.disconnect()
|
564
|
+
```
|
565
|
+
|
566
|
+
### Pub/Sub Operations
|
567
|
+
|
568
|
+
```python
|
569
|
+
# List publications
|
570
|
+
publications = client.pubsub.list_publications()
|
571
|
+
for pub in publications:
|
572
|
+
print(f"Publication: {pub}")
|
573
|
+
|
574
|
+
# List subscriptions
|
575
|
+
subscriptions = client.pubsub.list_subscriptions()
|
576
|
+
for sub in subscriptions:
|
577
|
+
print(f"Subscription: {sub}")
|
578
|
+
|
579
|
+
# Drop publication/subscription when needed
|
580
|
+
try:
|
581
|
+
client.pubsub.drop_publication("test_publication")
|
582
|
+
client.pubsub.drop_subscription("test_subscription")
|
583
|
+
except Exception as e:
|
584
|
+
print(f"Cleanup: {e}")
|
585
|
+
```
|
586
|
+
|
587
|
+
## Configuration
|
588
|
+
|
589
|
+
### Connection Parameters
|
590
|
+
|
591
|
+
```python
|
592
|
+
client = Client(
|
593
|
+
connection_timeout=30,
|
594
|
+
query_timeout=300,
|
595
|
+
auto_commit=True,
|
596
|
+
charset='utf8mb4',
|
597
|
+
sql_log_mode='auto', # 'off', 'simple', 'auto', 'full'
|
598
|
+
slow_query_threshold=1.0
|
599
|
+
)
|
600
|
+
```
|
601
|
+
|
602
|
+
### Logging Configuration
|
603
|
+
|
604
|
+
```python
|
605
|
+
from matrixone import Client
|
606
|
+
from matrixone.logger import create_default_logger
|
607
|
+
import logging
|
608
|
+
|
609
|
+
# Create custom logger
|
610
|
+
logger = create_default_logger(
|
611
|
+
level=logging.INFO,
|
612
|
+
sql_log_mode='auto', # 'off', 'simple', 'auto', 'full'
|
613
|
+
slow_query_threshold=1.0,
|
614
|
+
max_sql_display_length=500
|
615
|
+
)
|
616
|
+
|
617
|
+
# Use custom logger with client
|
618
|
+
client = Client(logger=logger)
|
619
|
+
```
|
620
|
+
|
621
|
+
## Error Handling
|
622
|
+
|
623
|
+
The SDK provides comprehensive error handling with helpful messages:
|
624
|
+
|
625
|
+
```python
|
626
|
+
from matrixone.exceptions import (
|
627
|
+
ConnectionError,
|
628
|
+
QueryError,
|
629
|
+
VersionError,
|
630
|
+
SnapshotError
|
631
|
+
)
|
632
|
+
|
633
|
+
try:
|
634
|
+
snapshot = client.snapshots.create('test', 'cluster')
|
635
|
+
except VersionError as e:
|
636
|
+
print(f"Version compatibility error: {e}")
|
637
|
+
except SnapshotError as e:
|
638
|
+
print(f"Snapshot operation failed: {e}")
|
639
|
+
```
|
640
|
+
|
641
|
+
## π Links
|
642
|
+
|
643
|
+
- **π Full Documentation**: https://matrixone.readthedocs.io/
|
644
|
+
- **π¦ PyPI Package**: https://pypi.org/project/matrixone-python-sdk/
|
645
|
+
- **π» GitHub Repository**: https://github.com/matrixorigin/matrixone/tree/main/clients/python
|
646
|
+
- **π MatrixOne Docs**: https://docs.matrixorigin.cn/
|
647
|
+
|
648
|
+
### Online Examples
|
649
|
+
|
650
|
+
The SDK includes 25+ comprehensive examples covering all features:
|
651
|
+
|
652
|
+
**Getting Started:**
|
653
|
+
- Basic connection and database operations
|
654
|
+
- Async/await operations
|
655
|
+
- Transaction management
|
656
|
+
- SQLAlchemy ORM integration
|
657
|
+
|
658
|
+
**Vector Search:**
|
659
|
+
- Vector data types and distance functions
|
660
|
+
- IVF and HNSW index creation and tuning
|
661
|
+
- β **IVF Index Health Monitoring** - Essential for production systems
|
662
|
+
- Similarity search operations
|
663
|
+
- Advanced vector optimizations and index rebuilding
|
664
|
+
|
665
|
+
**Advanced Features:**
|
666
|
+
- Fulltext search with BM25/TF-IDF
|
667
|
+
- Table metadata analysis
|
668
|
+
- Snapshot and restore operations
|
669
|
+
- Account and permission management
|
670
|
+
- Pub/Sub operations
|
671
|
+
- Connection hooks and logging
|
672
|
+
|
673
|
+
### Quick Examples
|
674
|
+
|
675
|
+
Clone the repository to access all examples:
|
676
|
+
```bash
|
677
|
+
git clone https://github.com/matrixorigin/matrixone.git
|
678
|
+
cd matrixone/clients/python/examples
|
679
|
+
|
680
|
+
# Run basic example
|
681
|
+
python example_01_basic_connection.py
|
682
|
+
|
683
|
+
# Run vector search example
|
684
|
+
python example_12_vector_basics.py
|
685
|
+
|
686
|
+
# Run metadata analysis example
|
687
|
+
python example_25_metadata_operations.py
|
688
|
+
```
|
689
|
+
|
690
|
+
|
691
|
+
## Support
|
692
|
+
|
693
|
+
- π§ Email: contact@matrixorigin.cn
|
694
|
+
- π Issues: [GitHub Issues](https://github.com/matrixorigin/matrixone/issues)
|
695
|
+
- π¬ Discussions: [GitHub Discussions](https://github.com/matrixorigin/matrixone/discussions)
|
696
|
+
- π Documentation:
|
697
|
+
- [MatrixOne Docs (English)](https://docs.matrixorigin.cn/en)
|
698
|
+
- [MatrixOne Docs (δΈζ)](https://docs.matrixorigin.cn/)
|
699
|
+
|
700
|
+
## License
|
701
|
+
|
702
|
+
This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
|
703
|
+
|
704
|
+
---
|
705
|
+
|
706
|
+
**MatrixOne Python SDK** - Making MatrixOne database operations simple and powerful in Python.
|