matrixone-python-sdk 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matrixone/async_client.py +27 -11
- matrixone/cli_tools.py +1895 -0
- matrixone/client.py +126 -12
- matrixone/connection_hooks.py +54 -5
- matrixone/index_utils.py +4 -2
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/METADATA +347 -6
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/RECORD +12 -10
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/entry_points.txt +1 -1
- tests/online/test_cli_tools_online.py +482 -0
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/WHEEL +0 -0
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {matrixone_python_sdk-0.1.4.dist-info → matrixone_python_sdk-0.1.5.dist-info}/top_level.txt +0 -0
matrixone/client.py
CHANGED
@@ -2613,40 +2613,50 @@ class Client(BaseMatrixOneClient):
|
|
2613
2613
|
|
2614
2614
|
return self
|
2615
2615
|
|
2616
|
-
def get_secondary_index_tables(self, table_name: str) -> List[str]:
|
2616
|
+
def get_secondary_index_tables(self, table_name: str, database_name: str = None) -> List[str]:
|
2617
2617
|
"""
|
2618
|
-
Get all secondary index table names for a given table
|
2618
|
+
Get all secondary index table names for a given table.
|
2619
|
+
|
2620
|
+
This includes both regular secondary indexes (MULTIPLE type) and UNIQUE indexes.
|
2619
2621
|
|
2620
2622
|
Args:
|
2621
2623
|
table_name: Name of the table to get secondary indexes for
|
2624
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2622
2625
|
|
2623
2626
|
Returns:
|
2624
|
-
List of secondary index table names
|
2627
|
+
List of secondary index table names (includes both __mo_index_secondary_... and __mo_index_unique_... tables)
|
2625
2628
|
|
2626
2629
|
Examples::
|
2627
2630
|
|
2628
2631
|
>>> client = Client()
|
2629
2632
|
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2633
|
+
>>> # Use current database
|
2630
2634
|
>>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info')
|
2635
|
+
>>> # Or specify database explicitly
|
2636
|
+
>>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info', 'test')
|
2631
2637
|
>>> print(index_tables)
|
2632
|
-
['__mo_index_secondary_..._cms_id', '
|
2638
|
+
['__mo_index_secondary_..._cms_id', '__mo_index_unique_..._email']
|
2633
2639
|
"""
|
2634
2640
|
from .index_utils import build_get_index_tables_sql
|
2635
2641
|
|
2636
|
-
#
|
2637
|
-
|
2642
|
+
# Use provided database_name or get current database from connection params
|
2643
|
+
if database_name is None:
|
2644
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2638
2645
|
|
2639
|
-
sql, params = build_get_index_tables_sql(table_name,
|
2646
|
+
sql, params = build_get_index_tables_sql(table_name, database_name)
|
2640
2647
|
result = self.execute(sql, params)
|
2641
2648
|
return [row[0] for row in result.fetchall()]
|
2642
2649
|
|
2643
|
-
def get_secondary_index_table_by_name(
|
2650
|
+
def get_secondary_index_table_by_name(
|
2651
|
+
self, table_name: str, index_name: str, database_name: str = None
|
2652
|
+
) -> Optional[str]:
|
2644
2653
|
"""
|
2645
|
-
Get the physical table name of a secondary index by its index name
|
2654
|
+
Get the physical table name of a secondary index by its index name.
|
2646
2655
|
|
2647
2656
|
Args:
|
2648
2657
|
table_name: Name of the table
|
2649
2658
|
index_name: Name of the secondary index
|
2659
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2650
2660
|
|
2651
2661
|
Returns:
|
2652
2662
|
Physical table name of the secondary index, or None if not found
|
@@ -2655,20 +2665,124 @@ class Client(BaseMatrixOneClient):
|
|
2655
2665
|
|
2656
2666
|
>>> client = Client()
|
2657
2667
|
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2668
|
+
>>> # Use current database
|
2658
2669
|
>>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id')
|
2670
|
+
>>> # Or specify database explicitly
|
2671
|
+
>>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id', 'test')
|
2659
2672
|
>>> print(index_table)
|
2660
2673
|
'__mo_index_secondary_018cfbda-bde1-7c3e-805c-3f8e71769f75_cms_id'
|
2661
2674
|
"""
|
2662
2675
|
from .index_utils import build_get_index_table_by_name_sql
|
2663
2676
|
|
2664
|
-
#
|
2665
|
-
|
2677
|
+
# Use provided database_name or get current database from connection params
|
2678
|
+
if database_name is None:
|
2679
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2666
2680
|
|
2667
|
-
sql, params = build_get_index_table_by_name_sql(table_name, index_name,
|
2681
|
+
sql, params = build_get_index_table_by_name_sql(table_name, index_name, database_name)
|
2668
2682
|
result = self.execute(sql, params)
|
2669
2683
|
row = result.fetchone()
|
2670
2684
|
return row[0] if row else None
|
2671
2685
|
|
2686
|
+
def get_table_indexes_detail(self, table_name: str, database_name: str = None) -> List[dict]:
|
2687
|
+
"""
|
2688
|
+
Get detailed information about all indexes for a table, including IVF, HNSW, Fulltext, and regular indexes.
|
2689
|
+
|
2690
|
+
This method returns comprehensive information about each index physical table, including:
|
2691
|
+
- Index name
|
2692
|
+
- Index type (MULTIPLE, PRIMARY, UNIQUE, etc.)
|
2693
|
+
- Algorithm type (ivfflat, hnsw, fulltext, etc.)
|
2694
|
+
- Algorithm table type (metadata, centroids, entries, etc.)
|
2695
|
+
- Physical table name
|
2696
|
+
- Column names
|
2697
|
+
- Algorithm parameters
|
2698
|
+
|
2699
|
+
Args:
|
2700
|
+
table_name: Name of the table to get indexes for
|
2701
|
+
database_name: Name of the database (optional). If None, uses the current database.
|
2702
|
+
|
2703
|
+
Returns:
|
2704
|
+
List of dictionaries, each containing:
|
2705
|
+
- index_name: Name of the index
|
2706
|
+
- index_type: Type of index (MULTIPLE, PRIMARY, UNIQUE, etc.)
|
2707
|
+
- algo: Algorithm type (ivfflat, hnsw, fulltext, or None for regular indexes)
|
2708
|
+
- algo_table_type: Algorithm table type (metadata, centroids, entries, etc., or None)
|
2709
|
+
- physical_table_name: Physical table name
|
2710
|
+
- columns: List of column names
|
2711
|
+
- algo_params: Algorithm parameters (or None)
|
2712
|
+
|
2713
|
+
Examples::
|
2714
|
+
|
2715
|
+
>>> client = Client()
|
2716
|
+
>>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
|
2717
|
+
>>> # Get all index details for a table
|
2718
|
+
>>> indexes = client.get_table_indexes_detail('ivf_health_demo_docs')
|
2719
|
+
>>> for idx in indexes:
|
2720
|
+
... print(f"{idx['index_name']} ({idx['algo']}) - {idx['algo_table_type']}: {idx['physical_table_name']}")
|
2721
|
+
idx_embedding_ivf (ivfflat) - metadata: __mo_index_secondary_...
|
2722
|
+
idx_embedding_ivf (ivfflat) - centroids: __mo_index_secondary_...
|
2723
|
+
idx_embedding_ivf (ivfflat) - entries: __mo_index_secondary_...
|
2724
|
+
"""
|
2725
|
+
# Use provided database_name or get current database from connection params
|
2726
|
+
if database_name is None:
|
2727
|
+
database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
|
2728
|
+
|
2729
|
+
if not database_name:
|
2730
|
+
raise ValueError("Database name must be provided or set in connection parameters")
|
2731
|
+
|
2732
|
+
# Query to get all index information
|
2733
|
+
sql = """
|
2734
|
+
SELECT
|
2735
|
+
mo_indexes.name AS index_name,
|
2736
|
+
mo_indexes.type AS index_type,
|
2737
|
+
mo_indexes.algo AS algo,
|
2738
|
+
mo_indexes.algo_table_type AS algo_table_type,
|
2739
|
+
mo_indexes.index_table_name AS physical_table_name,
|
2740
|
+
GROUP_CONCAT(mo_indexes.column_name ORDER BY mo_indexes.ordinal_position SEPARATOR ', ') AS columns,
|
2741
|
+
mo_indexes.algo_params AS algo_params,
|
2742
|
+
CASE mo_indexes.algo_table_type
|
2743
|
+
WHEN 'metadata' THEN 1
|
2744
|
+
WHEN 'centroids' THEN 2
|
2745
|
+
WHEN 'entries' THEN 3
|
2746
|
+
ELSE 4
|
2747
|
+
END AS sort_order
|
2748
|
+
FROM mo_catalog.mo_indexes
|
2749
|
+
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
2750
|
+
WHERE mo_tables.relname = ?
|
2751
|
+
AND mo_tables.reldatabase = ?
|
2752
|
+
AND mo_indexes.type != 'PRIMARY'
|
2753
|
+
AND mo_indexes.index_table_name IS NOT NULL
|
2754
|
+
GROUP BY
|
2755
|
+
mo_indexes.name,
|
2756
|
+
mo_indexes.type,
|
2757
|
+
mo_indexes.algo,
|
2758
|
+
mo_indexes.algo_table_type,
|
2759
|
+
mo_indexes.index_table_name,
|
2760
|
+
mo_indexes.algo_params
|
2761
|
+
ORDER BY
|
2762
|
+
mo_indexes.name,
|
2763
|
+
sort_order
|
2764
|
+
"""
|
2765
|
+
|
2766
|
+
result = self.execute(sql, (table_name, database_name))
|
2767
|
+
rows = result.fetchall()
|
2768
|
+
|
2769
|
+
# Convert to list of dictionaries
|
2770
|
+
indexes = []
|
2771
|
+
for row in rows:
|
2772
|
+
indexes.append(
|
2773
|
+
{
|
2774
|
+
'index_name': row[0],
|
2775
|
+
'index_type': row[1],
|
2776
|
+
'algo': row[2] if row[2] else None,
|
2777
|
+
'algo_table_type': row[3] if row[3] else None,
|
2778
|
+
'physical_table_name': row[4],
|
2779
|
+
'columns': row[5].split(', ') if row[5] else [],
|
2780
|
+
'algo_params': row[6] if row[6] else None,
|
2781
|
+
}
|
2782
|
+
)
|
2783
|
+
|
2784
|
+
return indexes
|
2785
|
+
|
2672
2786
|
def verify_table_index_counts(self, table_name: str) -> int:
|
2673
2787
|
"""
|
2674
2788
|
Verify that the main table and all its secondary index tables have the same row count.
|
matrixone/connection_hooks.py
CHANGED
@@ -19,7 +19,7 @@ Connection hooks for MatrixOne clients
|
|
19
19
|
from enum import Enum
|
20
20
|
from typing import Callable, List, Optional, Union
|
21
21
|
|
22
|
-
from sqlalchemy import event
|
22
|
+
from sqlalchemy import event, text
|
23
23
|
from sqlalchemy.engine import Engine
|
24
24
|
from sqlalchemy.ext.asyncio import AsyncEngine
|
25
25
|
|
@@ -119,16 +119,65 @@ class ConnectionHook:
|
|
119
119
|
# For immediate execution, we need to get a connection from the client
|
120
120
|
# This is a fallback for when we don't have a specific connection
|
121
121
|
if hasattr(client, '_engine') and client._engine:
|
122
|
-
async with client._engine.
|
123
|
-
#
|
124
|
-
|
125
|
-
await self.execute_async_with_connection(client, raw_conn)
|
122
|
+
async with client._engine.connect() as conn:
|
123
|
+
# Execute actions directly on async connection
|
124
|
+
await self.execute_async_on_connection(client, conn)
|
126
125
|
else:
|
127
126
|
client.logger.warning("No engine available for connection hook execution")
|
128
127
|
|
129
128
|
except Exception as e:
|
130
129
|
client.logger.warning(f"Connection hook execution failed: {e}")
|
131
130
|
|
131
|
+
async def execute_async_on_connection(self, client, async_connection) -> None:
|
132
|
+
"""Execute hook actions on an AsyncConnection (SQLAlchemy async connection)"""
|
133
|
+
try:
|
134
|
+
# Execute predefined actions
|
135
|
+
for action in self.actions:
|
136
|
+
if isinstance(action, str):
|
137
|
+
action = ConnectionAction(action)
|
138
|
+
|
139
|
+
# Execute SQL directly on async connection
|
140
|
+
if action == ConnectionAction.ENABLE_IVF:
|
141
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
142
|
+
client.logger.debug("✓ Enabled IVF vector operations")
|
143
|
+
elif action == ConnectionAction.ENABLE_HNSW:
|
144
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
145
|
+
client.logger.debug("✓ Enabled HNSW vector operations")
|
146
|
+
elif action == ConnectionAction.ENABLE_FULLTEXT:
|
147
|
+
await async_connection.execute(text("SET experimental_fulltext_index = 1"))
|
148
|
+
client.logger.debug("✓ Enabled fulltext search operations")
|
149
|
+
elif action == ConnectionAction.ENABLE_VECTOR:
|
150
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
151
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
152
|
+
client.logger.debug("✓ Enabled vector operations")
|
153
|
+
elif action == ConnectionAction.ENABLE_ALL:
|
154
|
+
await async_connection.execute(text("SET experimental_ivf_index = 1"))
|
155
|
+
await async_connection.execute(text("SET experimental_hnsw_index = 1"))
|
156
|
+
await async_connection.execute(text("SET experimental_fulltext_index = 1"))
|
157
|
+
client.logger.debug("✓ Enabled all operations")
|
158
|
+
else:
|
159
|
+
client.logger.warning(f"Unknown connection action: {action}")
|
160
|
+
|
161
|
+
# Execute custom hook if provided
|
162
|
+
if self.custom_hook:
|
163
|
+
if hasattr(self.custom_hook, '__call__'):
|
164
|
+
# Check if it's an async function
|
165
|
+
if hasattr(self.custom_hook, '__code__') and self.custom_hook.__code__.co_flags & 0x80:
|
166
|
+
await self.custom_hook(client)
|
167
|
+
else:
|
168
|
+
# Try to call it as sync
|
169
|
+
try:
|
170
|
+
result = self.custom_hook(client)
|
171
|
+
# If it returns a coroutine, await it
|
172
|
+
if hasattr(result, '__await__'):
|
173
|
+
await result
|
174
|
+
except TypeError as e:
|
175
|
+
if "object NoneType can't be used in 'await' expression" in str(e):
|
176
|
+
client.logger.warning("Custom hook appears to be async but was called synchronously")
|
177
|
+
|
178
|
+
except Exception as e:
|
179
|
+
client.logger.warning(f"Connection hook execution failed: {e}")
|
180
|
+
|
132
181
|
async def execute_async_with_connection(self, client, dbapi_connection) -> None:
|
133
182
|
"""Execute hook actions asynchronously using the provided connection"""
|
134
183
|
try:
|
matrixone/index_utils.py
CHANGED
@@ -23,6 +23,8 @@ def build_get_index_tables_sql(table_name: str, database: str = None) -> Tuple[s
|
|
23
23
|
"""
|
24
24
|
Build SQL to get all secondary index table names for a given table.
|
25
25
|
|
26
|
+
This includes both MULTIPLE (regular secondary indexes) and UNIQUE indexes.
|
27
|
+
|
26
28
|
Args:
|
27
29
|
table_name: Name of the table
|
28
30
|
database: Name of the database (optional, but recommended to avoid cross-database conflicts)
|
@@ -35,7 +37,7 @@ def build_get_index_tables_sql(table_name: str, database: str = None) -> Tuple[s
|
|
35
37
|
SELECT DISTINCT index_table_name
|
36
38
|
FROM mo_catalog.mo_indexes
|
37
39
|
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
38
|
-
WHERE relname = ? AND reldatabase = ? AND type
|
40
|
+
WHERE relname = ? AND reldatabase = ? AND type IN ('MULTIPLE', 'UNIQUE')
|
39
41
|
"""
|
40
42
|
return sql, (table_name, database)
|
41
43
|
else:
|
@@ -44,7 +46,7 @@ def build_get_index_tables_sql(table_name: str, database: str = None) -> Tuple[s
|
|
44
46
|
SELECT DISTINCT index_table_name
|
45
47
|
FROM mo_catalog.mo_indexes
|
46
48
|
JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
|
47
|
-
WHERE relname = ? AND type
|
49
|
+
WHERE relname = ? AND type IN ('MULTIPLE', 'UNIQUE')
|
48
50
|
"""
|
49
51
|
return sql, (table_name,)
|
50
52
|
|