matrixone-python-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/async_client.py +27 -5
- matrixone/cli_tools.py +1895 -0
- matrixone/client.py +126 -6
- matrixone/connection_hooks.py +61 -12
- matrixone/index_utils.py +42 -18
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/METADATA +347 -6
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/RECORD +13 -11
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/entry_points.txt +1 -1
- tests/offline/test_connection_hooks_offline.py +8 -8
- tests/online/test_cli_tools_online.py +482 -0
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/WHEEL +0 -0
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {matrixone_python_sdk-0.1.3.dist-info → matrixone_python_sdk-0.1.5.dist-info}/top_level.txt +0 -0
matrixone/client.py
CHANGED
@@ -2613,37 +2613,50 @@ class Client(BaseMatrixOneClient):
|
|
2613
2613
|
|
2614
2614
|
return self
|
2615
2615
|
|
2616
|
-
def get_secondary_index_tables(self, table_name: str) -> List[str]:
|
2616
|
+
def get_secondary_index_tables(self, table_name: str, database_name: str = None) -> List[str]:
|
2617
2617
|
"""
|
2618
2618
|
Get all secondary index table names for a given table.
|
2619
2619
|
|
2620
|
+
This includes both regular secondary indexes (MULTIPLE type) and UNIQUE indexes.
|
2621
|
+
|
2620
2622
|
Args:
|
2621
2623
|
table_name: Name of the table to get secondary indexes for
|
2624
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2622
2625
|
|
2623
2626
|
Returns:
|
2624
|
-
List of secondary index table names
|
2627
|
+
List of secondary index table names (includes both __mo_index_secondary_... and __mo_index_unique_... tables)
|
2625
2628
|
|
2626
2629
|
Examples::
|
2627
2630
|
|
2628
2631
|
>>> client = Client()
|
2629
2632
|
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2633
|
+
>>> # Use current database
|
2630
2634
|
>>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info')
|
2635
|
+
>>> # Or specify database explicitly
|
2636
|
+
>>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info', 'test')
|
2631
2637
|
>>> print(index_tables)
|
2632
|
-
['__mo_index_secondary_..._cms_id', '
|
2638
|
+
['__mo_index_secondary_..._cms_id', '__mo_index_unique_..._email']
|
2633
2639
|
"""
|
2634
2640
|
from .index_utils import build_get_index_tables_sql
|
2635
2641
|
|
2636
|
-
|
2642
|
+
# Use provided database_name or get current database from connection params
|
2643
|
+
if database_name is None:
|
2644
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2645
|
+
|
2646
|
+
sql, params = build_get_index_tables_sql(table_name, database_name)
|
2637
2647
|
result = self.execute(sql, params)
|
2638
2648
|
return [row[0] for row in result.fetchall()]
|
2639
2649
|
|
2640
|
-
def get_secondary_index_table_by_name(
|
2650
|
+
def get_secondary_index_table_by_name(
|
2651
|
+
self, table_name: str, index_name: str, database_name: str = None
|
2652
|
+
) -> Optional[str]:
|
2641
2653
|
"""
|
2642
2654
|
Get the physical table name of a secondary index by its index name.
|
2643
2655
|
|
2644
2656
|
Args:
|
2645
2657
|
table_name: Name of the table
|
2646
2658
|
index_name: Name of the secondary index
|
2659
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2647
2660
|
|
2648
2661
|
Returns:
|
2649
2662
|
Physical table name of the secondary index, or None if not found
|
@@ -2652,17 +2665,124 @@ class Client(BaseMatrixOneClient):
|
|
2652
2665
|
|
2653
2666
|
>>> client = Client()
|
2654
2667
|
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2668
|
+
>>> # Use current database
|
2655
2669
|
>>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id')
|
2670
|
+
>>> # Or specify database explicitly
|
2671
|
+
>>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id', 'test')
|
2656
2672
|
>>> print(index_table)
|
2657
2673
|
'__mo_index_secondary_018cfbda-bde1-7c3e-805c-3f8e71769f75_cms_id'
|
2658
2674
|
"""
|
2659
2675
|
from .index_utils import build_get_index_table_by_name_sql
|
2660
2676
|
|
2661
|
-
|
2677
|
+
# Use provided database_name or get current database from connection params
|
2678
|
+
if database_name is None:
|
2679
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2680
|
+
|
2681
|
+
sql, params = build_get_index_table_by_name_sql(table_name, index_name, database_name)
|
2662
2682
|
result = self.execute(sql, params)
|
2663
2683
|
row = result.fetchone()
|
2664
2684
|
return row[0] if row else None
|
2665
2685
|
|
2686
|
+
def get_table_indexes_detail(self, table_name: str, database_name: str = None) -> List[dict]:
|
2687
|
+
"""
|
2688
|
+
Get detailed information about all indexes for a table, including IVF, HNSW, Fulltext, and regular indexes.
|
2689
|
+
|
2690
|
+
This method returns comprehensive information about each index physical table, including:
|
2691
|
+
- Index name
|
2692
|
+
- Index type (MULTIPLE, PRIMARY, UNIQUE, etc.)
|
2693
|
+
- Algorithm type (ivfflat, hnsw, fulltext, etc.)
|
2694
|
+
- Algorithm table type (metadata, centroids, entries, etc.)
|
2695
|
+
- Physical table name
|
2696
|
+
- Column names
|
2697
|
+
- Algorithm parameters
|
2698
|
+
|
2699
|
+
Args:
|
2700
|
+
table_name: Name of the table to get indexes for
|
2701
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2702
|
+
|
2703
|
+
Returns:
|
2704
|
+
List of dictionaries, each containing:
|
2705
|
+
- index_name: Name of the index
|
2706
|
+
- index_type: Type of index (MULTIPLE, PRIMARY, UNIQUE, etc.)
|
2707
|
+
- algo: Algorithm type (ivfflat, hnsw, fulltext, or None for regular indexes)
|
2708
|
+
- algo_table_type: Algorithm table type (metadata, centroids, entries, etc., or None)
|
2709
|
+
- physical_table_name: Physical table name
|
2710
|
+
- columns: List of column names
|
2711
|
+
- algo_params: Algorithm parameters (or None)
|
2712
|
+
|
2713
|
+
Examples::
|
2714
|
+
|
2715
|
+
>>> client = Client()
|
2716
|
+
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2717
|
+
>>> # Get all index details for a table
|
2718
|
+
>>> indexes = client.get_table_indexes_detail('ivf_health_demo_docs')
|
2719
|
+
>>> for idx in indexes:
|
2720
|
+
... print(f"{idx['index_name']} ({idx['algo']}) - {idx['algo_table_type']}: {idx['physical_table_name']}")
|
2721
|
+
idx_embedding_ivf (ivfflat) - metadata: __mo_index_secondary_...
|
2722
|
+
idx_embedding_ivf (ivfflat) - centroids: __mo_index_secondary_...
|
2723
|
+
idx_embedding_ivf (ivfflat) - entries: __mo_index_secondary_...
|
2724
|
+
"""
|
2725
|
+
# Use provided database_name or get current database from connection params
|
2726
|
+
if database_name is None:
|
2727
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2728
|
+
|
2729
|
+
if not database_name:
|
2730
|
+
raise ValueError("Database name must be provided or set in connection parameters")
|
2731
|
+
|
2732
|
+
# Query to get all index information
|
2733
|
+
sql = """
|
2734
|
+
SELECT
|
2735
|
+
mo_indexes.name AS index_name,
|
2736
|
+
mo_indexes.type AS index_type,
|
2737
|
+
mo_indexes.algo AS algo,
|
2738
|
+
mo_indexes.algo_table_type AS algo_table_type,
|
2739
|
+
mo_indexes.index_table_name AS physical_table_name,
|
2740
|
+
GROUP_CONCAT(mo_indexes.column_name ORDER BY mo_indexes.ordinal_position SEPARATOR ', ') AS columns,
|
2741
|
+
mo_indexes.algo_params AS algo_params,
|
2742
|
+
CASE mo_indexes.algo_table_type
|
2743
|
+
WHEN 'metadata' THEN 1
|
2744
|
+
WHEN 'centroids' THEN 2
|
2745
|
+
WHEN 'entries' THEN 3
|
2746
|
+
ELSE 4
|
2747
|
+
END AS sort_order
|
2748
|
+
FROM mo_catalog.mo_indexes
|
2749
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
2750
|
+
WHERE mo_tables.relname = ?
|
2751
|
+
AND mo_tables.reldatabase = ?
|
2752
|
+
AND mo_indexes.type != 'PRIMARY'
|
2753
|
+
AND mo_indexes.index_table_name IS NOT NULL
|
2754
|
+
GROUP BY
|
2755
|
+
mo_indexes.name,
|
2756
|
+
mo_indexes.type,
|
2757
|
+
mo_indexes.algo,
|
2758
|
+
mo_indexes.algo_table_type,
|
2759
|
+
mo_indexes.index_table_name,
|
2760
|
+
mo_indexes.algo_params
|
2761
|
+
ORDER BY
|
2762
|
+
mo_indexes.name,
|
2763
|
+
sort_order
|
2764
|
+
"""
|
2765
|
+
|
2766
|
+
result = self.execute(sql, (table_name, database_name))
|
2767
|
+
rows = result.fetchall()
|
2768
|
+
|
2769
|
+
# Convert to list of dictionaries
|
2770
|
+
indexes = []
|
2771
|
+
for row in rows:
|
2772
|
+
indexes.append(
|
2773
|
+
{
|
2774
|
+
'index_name': row[0],
|
2775
|
+
'index_type': row[1],
|
2776
|
+
'algo': row[2] if row[2] else None,
|
2777
|
+
'algo_table_type': row[3] if row[3] else None,
|
2778
|
+
'physical_table_name': row[4],
|
2779
|
+
'columns': row[5].split(', ') if row[5] else [],
|
2780
|
+
'algo_params': row[6] if row[6] else None,
|
2781
|
+
}
|
2782
|
+
)
|
2783
|
+
|
2784
|
+
return indexes
|
2785
|
+
|
2666
2786
|
def verify_table_index_counts(self, table_name: str) -> int:
|
2667
2787
|
"""
|
2668
2788
|
Verify that the main table and all its secondary index tables have the same row count.
|
matrixone/connection_hooks.py
CHANGED
@@ -19,7 +19,7 @@ Connection hooks for MatrixOne clients
|
|
19
19
|
from enum import Enum
|
20
20
|
from typing import Callable, List, Optional, Union
|
21
21
|
|
22
|
-
from sqlalchemy import event
|
22
|
+
from sqlalchemy import event, text
|
23
23
|
from sqlalchemy.engine import Engine
|
24
24
|
from sqlalchemy.ext.asyncio import AsyncEngine
|
25
25
|
|
@@ -69,13 +69,13 @@ class ConnectionHook:
|
|
69
69
|
event.listen(engine.sync_engine, "connect", self._on_connect_sync)
|
70
70
|
event.listen(engine.sync_engine, "before_cursor_execute", self._on_before_cursor_execute)
|
71
71
|
if hasattr(self._client_ref, 'logger'):
|
72
|
-
self._client_ref.logger.
|
72
|
+
self._client_ref.logger.debug("Attached connection hook to async engine")
|
73
73
|
else:
|
74
74
|
# For sync engines, listen to both connect and before_cursor_execute events
|
75
75
|
event.listen(engine, "connect", self._on_connect_sync)
|
76
76
|
event.listen(engine, "before_cursor_execute", self._on_before_cursor_execute)
|
77
77
|
if hasattr(self._client_ref, 'logger'):
|
78
|
-
self._client_ref.logger.
|
78
|
+
self._client_ref.logger.debug("Attached connection hook to sync engine")
|
79
79
|
|
80
80
|
def _on_connect_sync(self, dbapi_connection, connection_record):
|
81
81
|
"""SQLAlchemy event handler for new connections (sync)"""
|
@@ -86,7 +86,7 @@ class ConnectionHook:
|
|
86
86
|
try:
|
87
87
|
# Log that the hook is being executed
|
88
88
|
if hasattr(self._client_ref, 'logger'):
|
89
|
-
self._client_ref.logger.
|
89
|
+
self._client_ref.logger.debug(f"Executing connection hook on new connection {conn_id}")
|
90
90
|
# Pass the connection to avoid creating new connections
|
91
91
|
self.execute_sync_with_connection(self._client_ref, dbapi_connection)
|
92
92
|
self._executed_connections.add(conn_id)
|
@@ -104,7 +104,7 @@ class ConnectionHook:
|
|
104
104
|
try:
|
105
105
|
# Log that the hook is being executed
|
106
106
|
if hasattr(self._client_ref, 'logger'):
|
107
|
-
self._client_ref.logger.
|
107
|
+
self._client_ref.logger.debug(f"Executing connection hook on connection {conn_id}")
|
108
108
|
# Use the connection to avoid creating new connections
|
109
109
|
self.execute_sync_with_connection(self._client_ref, conn.connection)
|
110
110
|
self._executed_connections.add(conn_id)
|
@@ -119,16 +119,65 @@ class ConnectionHook:
|
|
119
119
|
# For immediate execution, we need to get a connection from the client
|
120
120
|
# This is a fallback for when we don't have a specific connection
|
121
121
|
if hasattr(client, '_engine') and client._engine:
|
122
|
-
async with client._engine.
|
123
|
-
#
|
124
|
-
|
125
|
-
await self.execute_async_with_connection(client, raw_conn)
|
122
|
+
async with client._engine.connect() as conn:
|
123
|
+
# Execute actions directly on async connection
|
124
|
+
await self.execute_async_on_connection(client, conn)
|
126
125
|
else:
|
127
126
|
client.logger.warning("No engine available for connection hook execution")
|
128
127
|
|
129
128
|
except Exception as e:
|
130
129
|
client.logger.warning(f"Connection hook execution failed: {e}")
|
131
130
|
|
131
|
+
async def execute_async_on_connection(self, client, async_connection) -> None:
|
132
|
+
"""Execute hook actions on an AsyncConnection (SQLAlchemy async connection)"""
|
133
|
+
try:
|
134
|
+
# Execute predefined actions
|
135
|
+
for action in self.actions:
|
136
|
+
if isinstance(action, str):
|
137
|
+
action = ConnectionAction(action)
|
138
|
+
|
139
|
+
# Execute SQL directly on async connection
|
140
|
+
if action == ConnectionAction.ENABLE_IVF:
|
141
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
142
|
+
client.logger.debug("✓ Enabled IVF vector operations")
|
143
|
+
elif action == ConnectionAction.ENABLE_HNSW:
|
144
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
145
|
+
client.logger.debug("✓ Enabled HNSW vector operations")
|
146
|
+
elif action == ConnectionAction.ENABLE_FULLTEXT:
|
147
|
+
await async_connection.execute(text("SET experimental_fulltext_index = 1"))
|
148
|
+
client.logger.debug("✓ Enabled fulltext search operations")
|
149
|
+
elif action == ConnectionAction.ENABLE_VECTOR:
|
150
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
151
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
152
|
+
client.logger.debug("✓ Enabled vector operations")
|
153
|
+
elif action == ConnectionAction.ENABLE_ALL:
|
154
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
155
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
156
|
+
await async_connection.execute(text("SET experimental_fulltext_index = 1"))
|
157
|
+
client.logger.debug("✓ Enabled all operations")
|
158
|
+
else:
|
159
|
+
client.logger.warning(f"Unknown connection action: {action}")
|
160
|
+
|
161
|
+
# Execute custom hook if provided
|
162
|
+
if self.custom_hook:
|
163
|
+
if hasattr(self.custom_hook, '__call__'):
|
164
|
+
# Check if it's an async function
|
165
|
+
if hasattr(self.custom_hook, '__code__') and self.custom_hook.__code__.co_flags & 0x80:
|
166
|
+
await self.custom_hook(client)
|
167
|
+
else:
|
168
|
+
# Try to call it as sync
|
169
|
+
try:
|
170
|
+
result = self.custom_hook(client)
|
171
|
+
# If it returns a coroutine, await it
|
172
|
+
if hasattr(result, '__await__'):
|
173
|
+
await result
|
174
|
+
except TypeError as e:
|
175
|
+
if "object NoneType can't be used in 'await' expression" in str(e):
|
176
|
+
client.logger.warning("Custom hook appears to be async but was called synchronously")
|
177
|
+
|
178
|
+
except Exception as e:
|
179
|
+
client.logger.warning(f"Connection hook execution failed: {e}")
|
180
|
+
|
132
181
|
async def execute_async_with_connection(self, client, dbapi_connection) -> None:
|
133
182
|
"""Execute hook actions asynchronously using the provided connection"""
|
134
183
|
try:
|
@@ -212,7 +261,7 @@ class ConnectionHook:
|
|
212
261
|
cursor = dbapi_connection.cursor()
|
213
262
|
cursor.execute("SET experimental_ivf_index = 1")
|
214
263
|
cursor.close()
|
215
|
-
client.logger.
|
264
|
+
client.logger.debug("✓ Enabled IVF vector operations")
|
216
265
|
except Exception as e:
|
217
266
|
client.logger.warning(f"Failed to enable IVF: {e}")
|
218
267
|
|
@@ -223,7 +272,7 @@ class ConnectionHook:
|
|
223
272
|
cursor = dbapi_connection.cursor()
|
224
273
|
cursor.execute("SET experimental_hnsw_index = 1")
|
225
274
|
cursor.close()
|
226
|
-
client.logger.
|
275
|
+
client.logger.debug("✓ Enabled HNSW vector operations")
|
227
276
|
except Exception as e:
|
228
277
|
client.logger.warning(f"Failed to enable HNSW: {e}")
|
229
278
|
|
@@ -234,7 +283,7 @@ class ConnectionHook:
|
|
234
283
|
cursor = dbapi_connection.cursor()
|
235
284
|
cursor.execute("SET experimental_fulltext_index = 1")
|
236
285
|
cursor.close()
|
237
|
-
client.logger.
|
286
|
+
client.logger.debug("✓ Enabled fulltext search operations")
|
238
287
|
except Exception as e:
|
239
288
|
client.logger.warning(f"Failed to enable fulltext: {e}")
|
240
289
|
|
matrixone/index_utils.py
CHANGED
@@ -19,43 +19,67 @@ Index utilities - Shared logic for secondary index operations
|
|
19
19
|
from typing import List, Tuple
|
20
20
|
|
21
21
|
|
22
|
-
def build_get_index_tables_sql(table_name: str) -> Tuple[str, Tuple]:
|
22
|
+
def build_get_index_tables_sql(table_name: str, database: str = None) -> Tuple[str, Tuple]:
|
23
23
|
"""
|
24
24
|
Build SQL to get all secondary index table names for a given table.
|
25
25
|
|
26
|
+
This includes both MULTIPLE (regular secondary indexes) and UNIQUE indexes.
|
27
|
+
|
26
28
|
Args:
|
27
29
|
table_name: Name of the table
|
30
|
+
database: Name of the database (optional, but recommended to avoid cross-database conflicts)
|
28
31
|
|
29
32
|
Returns:
|
30
33
|
Tuple of (sql, params)
|
31
34
|
"""
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
if database:
|
36
|
+
sql = """
|
37
|
+
SELECT DISTINCT index_table_name
|
38
|
+
FROM mo_catalog.mo_indexes
|
39
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
40
|
+
WHERE relname = ? AND reldatabase = ? AND type IN ('MULTIPLE', 'UNIQUE')
|
41
|
+
"""
|
42
|
+
return sql, (table_name, database)
|
43
|
+
else:
|
44
|
+
# Fallback to old behavior if database is not provided
|
45
|
+
sql = """
|
46
|
+
SELECT DISTINCT index_table_name
|
47
|
+
FROM mo_catalog.mo_indexes
|
48
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
49
|
+
WHERE relname = ? AND type IN ('MULTIPLE', 'UNIQUE')
|
50
|
+
"""
|
51
|
+
return sql, (table_name,)
|
52
|
+
|
53
|
+
|
54
|
+
def build_get_index_table_by_name_sql(table_name: str, index_name: str, database: str = None) -> Tuple[str, Tuple]:
|
42
55
|
"""
|
43
56
|
Build SQL to get the physical table name of a secondary index by its index name.
|
44
57
|
|
45
58
|
Args:
|
46
59
|
table_name: Name of the table
|
47
60
|
index_name: Name of the secondary index
|
61
|
+
database: Name of the database (optional, but recommended to avoid cross-database conflicts)
|
48
62
|
|
49
63
|
Returns:
|
50
64
|
Tuple of (sql, params)
|
51
65
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
66
|
+
if database:
|
67
|
+
sql = """
|
68
|
+
SELECT DISTINCT index_table_name
|
69
|
+
FROM mo_catalog.mo_indexes
|
70
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
71
|
+
WHERE relname = ? AND name = ? AND reldatabase = ?
|
72
|
+
"""
|
73
|
+
return sql, (table_name, index_name, database)
|
74
|
+
else:
|
75
|
+
# Fallback to old behavior if database is not provided
|
76
|
+
sql = """
|
77
|
+
SELECT DISTINCT index_table_name
|
78
|
+
FROM mo_catalog.mo_indexes
|
79
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
80
|
+
WHERE relname = ? AND name = ?
|
81
|
+
"""
|
82
|
+
return sql, (table_name, index_name)
|
59
83
|
|
60
84
|
|
61
85
|
def build_verify_counts_sql(table_name: str, index_tables: List[str]) -> str:
|