matrixone-python-sdk 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
matrixone/client.py CHANGED
@@ -2613,37 +2613,50 @@ class Client(BaseMatrixOneClient):
2613
2613
 
2614
2614
  return self
2615
2615
 
2616
- def get_secondary_index_tables(self, table_name: str) -> List[str]:
2616
+ def get_secondary_index_tables(self, table_name: str, database_name: str = None) -> List[str]:
2617
2617
  """
2618
2618
  Get all secondary index table names for a given table.
2619
2619
 
2620
+ This includes both regular secondary indexes (MULTIPLE type) and UNIQUE indexes.
2621
+
2620
2622
  Args:
2621
2623
  table_name: Name of the table to get secondary indexes for
2624
+ database_name: Name of the database (optional). If None, uses the current database.
2622
2625
 
2623
2626
  Returns:
2624
- List of secondary index table names
2627
+ List of secondary index table names (includes both __mo_index_secondary_... and __mo_index_unique_... tables)
2625
2628
 
2626
2629
  Examples::
2627
2630
 
2628
2631
  >>> client = Client()
2629
2632
  >>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
2633
+ >>> # Use current database
2630
2634
  >>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info')
2635
+ >>> # Or specify database explicitly
2636
+ >>> index_tables = client.get_secondary_index_tables('cms_all_content_chunk_info', 'test')
2631
2637
  >>> print(index_tables)
2632
- ['__mo_index_secondary_..._cms_id', '__mo_index_secondary_..._idx_all_content_length']
2638
+ ['__mo_index_secondary_..._cms_id', '__mo_index_unique_..._email']
2633
2639
  """
2634
2640
  from .index_utils import build_get_index_tables_sql
2635
2641
 
2636
- sql, params = build_get_index_tables_sql(table_name)
2642
+ # Use provided database_name or get current database from connection params
2643
+ if database_name is None:
2644
+ database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
2645
+
2646
+ sql, params = build_get_index_tables_sql(table_name, database_name)
2637
2647
  result = self.execute(sql, params)
2638
2648
  return [row[0] for row in result.fetchall()]
2639
2649
 
2640
- def get_secondary_index_table_by_name(self, table_name: str, index_name: str) -> Optional[str]:
2650
+ def get_secondary_index_table_by_name(
2651
+ self, table_name: str, index_name: str, database_name: str = None
2652
+ ) -> Optional[str]:
2641
2653
  """
2642
2654
  Get the physical table name of a secondary index by its index name.
2643
2655
 
2644
2656
  Args:
2645
2657
  table_name: Name of the table
2646
2658
  index_name: Name of the secondary index
2659
+ database_name: Name of the database (optional). If None, uses the current database.
2647
2660
 
2648
2661
  Returns:
2649
2662
  Physical table name of the secondary index, or None if not found
@@ -2652,17 +2665,124 @@ class Client(BaseMatrixOneClient):
2652
2665
 
2653
2666
  >>> client = Client()
2654
2667
  >>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
2668
+ >>> # Use current database
2655
2669
  >>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id')
2670
+ >>> # Or specify database explicitly
2671
+ >>> index_table = client.get_secondary_index_table_by_name('cms_all_content_chunk_info', 'cms_id', 'test')
2656
2672
  >>> print(index_table)
2657
2673
  '__mo_index_secondary_018cfbda-bde1-7c3e-805c-3f8e71769f75_cms_id'
2658
2674
  """
2659
2675
  from .index_utils import build_get_index_table_by_name_sql
2660
2676
 
2661
- sql, params = build_get_index_table_by_name_sql(table_name, index_name)
2677
+ # Use provided database_name or get current database from connection params
2678
+ if database_name is None:
2679
+ database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
2680
+
2681
+ sql, params = build_get_index_table_by_name_sql(table_name, index_name, database_name)
2662
2682
  result = self.execute(sql, params)
2663
2683
  row = result.fetchone()
2664
2684
  return row[0] if row else None
2665
2685
 
2686
+ def get_table_indexes_detail(self, table_name: str, database_name: str = None) -> List[dict]:
2687
+ """
2688
+ Get detailed information about all indexes for a table, including IVF, HNSW, Fulltext, and regular indexes.
2689
+
2690
+ This method returns comprehensive information about each index physical table, including:
2691
+ - Index name
2692
+ - Index type (MULTIPLE, PRIMARY, UNIQUE, etc.)
2693
+ - Algorithm type (ivfflat, hnsw, fulltext, etc.)
2694
+ - Algorithm table type (metadata, centroids, entries, etc.)
2695
+ - Physical table name
2696
+ - Column names
2697
+ - Algorithm parameters
2698
+
2699
+ Args:
2700
+ table_name: Name of the table to get indexes for
2701
+ database_name: Name of the database (optional). If None, uses the current database.
2702
+
2703
+ Returns:
2704
+ List of dictionaries, each containing:
2705
+ - index_name: Name of the index
2706
+ - index_type: Type of index (MULTIPLE, PRIMARY, UNIQUE, etc.)
2707
+ - algo: Algorithm type (ivfflat, hnsw, fulltext, or None for regular indexes)
2708
+ - algo_table_type: Algorithm table type (metadata, centroids, entries, etc., or None)
2709
+ - physical_table_name: Physical table name
2710
+ - columns: List of column names
2711
+ - algo_params: Algorithm parameters (or None)
2712
+
2713
+ Examples::
2714
+
2715
+ >>> client = Client()
2716
+ >>> client.connect(host='localhost', port=6001, user='root', password='111', database='test')
2717
+ >>> # Get all index details for a table
2718
+ >>> indexes = client.get_table_indexes_detail('ivf_health_demo_docs')
2719
+ >>> for idx in indexes:
2720
+ ... print(f"{idx['index_name']} ({idx['algo']}) - {idx['algo_table_type']}: {idx['physical_table_name']}")
2721
+ idx_embedding_ivf (ivfflat) - metadata: __mo_index_secondary_...
2722
+ idx_embedding_ivf (ivfflat) - centroids: __mo_index_secondary_...
2723
+ idx_embedding_ivf (ivfflat) - entries: __mo_index_secondary_...
2724
+ """
2725
+ # Use provided database_name or get current database from connection params
2726
+ if database_name is None:
2727
+ database_name = self._connection_params.get('database') if hasattr(self, '_connection_params') else None
2728
+
2729
+ if not database_name:
2730
+ raise ValueError("Database name must be provided or set in connection parameters")
2731
+
2732
+ # Query to get all index information
2733
+ sql = """
2734
+ SELECT
2735
+ mo_indexes.name AS index_name,
2736
+ mo_indexes.type AS index_type,
2737
+ mo_indexes.algo AS algo,
2738
+ mo_indexes.algo_table_type AS algo_table_type,
2739
+ mo_indexes.index_table_name AS physical_table_name,
2740
+ GROUP_CONCAT(mo_indexes.column_name ORDER BY mo_indexes.ordinal_position SEPARATOR ', ') AS columns,
2741
+ mo_indexes.algo_params AS algo_params,
2742
+ CASE mo_indexes.algo_table_type
2743
+ WHEN 'metadata' THEN 1
2744
+ WHEN 'centroids' THEN 2
2745
+ WHEN 'entries' THEN 3
2746
+ ELSE 4
2747
+ END AS sort_order
2748
+ FROM mo_catalog.mo_indexes
2749
+ JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
2750
+ WHERE mo_tables.relname = ?
2751
+ AND mo_tables.reldatabase = ?
2752
+ AND mo_indexes.type != 'PRIMARY'
2753
+ AND mo_indexes.index_table_name IS NOT NULL
2754
+ GROUP BY
2755
+ mo_indexes.name,
2756
+ mo_indexes.type,
2757
+ mo_indexes.algo,
2758
+ mo_indexes.algo_table_type,
2759
+ mo_indexes.index_table_name,
2760
+ mo_indexes.algo_params
2761
+ ORDER BY
2762
+ mo_indexes.name,
2763
+ sort_order
2764
+ """
2765
+
2766
+ result = self.execute(sql, (table_name, database_name))
2767
+ rows = result.fetchall()
2768
+
2769
+ # Convert to list of dictionaries
2770
+ indexes = []
2771
+ for row in rows:
2772
+ indexes.append(
2773
+ {
2774
+ 'index_name': row[0],
2775
+ 'index_type': row[1],
2776
+ 'algo': row[2] if row[2] else None,
2777
+ 'algo_table_type': row[3] if row[3] else None,
2778
+ 'physical_table_name': row[4],
2779
+ 'columns': row[5].split(', ') if row[5] else [],
2780
+ 'algo_params': row[6] if row[6] else None,
2781
+ }
2782
+ )
2783
+
2784
+ return indexes
2785
+
2666
2786
  def verify_table_index_counts(self, table_name: str) -> int:
2667
2787
  """
2668
2788
  Verify that the main table and all its secondary index tables have the same row count.
@@ -19,7 +19,7 @@ Connection hooks for MatrixOne clients
19
19
  from enum import Enum
20
20
  from typing import Callable, List, Optional, Union
21
21
 
22
- from sqlalchemy import event
22
+ from sqlalchemy import event, text
23
23
  from sqlalchemy.engine import Engine
24
24
  from sqlalchemy.ext.asyncio import AsyncEngine
25
25
 
@@ -69,13 +69,13 @@ class ConnectionHook:
69
69
  event.listen(engine.sync_engine, "connect", self._on_connect_sync)
70
70
  event.listen(engine.sync_engine, "before_cursor_execute", self._on_before_cursor_execute)
71
71
  if hasattr(self._client_ref, 'logger'):
72
- self._client_ref.logger.info("Attached connection hook to async engine")
72
+ self._client_ref.logger.debug("Attached connection hook to async engine")
73
73
  else:
74
74
  # For sync engines, listen to both connect and before_cursor_execute events
75
75
  event.listen(engine, "connect", self._on_connect_sync)
76
76
  event.listen(engine, "before_cursor_execute", self._on_before_cursor_execute)
77
77
  if hasattr(self._client_ref, 'logger'):
78
- self._client_ref.logger.info("Attached connection hook to sync engine")
78
+ self._client_ref.logger.debug("Attached connection hook to sync engine")
79
79
 
80
80
  def _on_connect_sync(self, dbapi_connection, connection_record):
81
81
  """SQLAlchemy event handler for new connections (sync)"""
@@ -86,7 +86,7 @@ class ConnectionHook:
86
86
  try:
87
87
  # Log that the hook is being executed
88
88
  if hasattr(self._client_ref, 'logger'):
89
- self._client_ref.logger.info(f"Executing connection hook on new connection {conn_id}")
89
+ self._client_ref.logger.debug(f"Executing connection hook on new connection {conn_id}")
90
90
  # Pass the connection to avoid creating new connections
91
91
  self.execute_sync_with_connection(self._client_ref, dbapi_connection)
92
92
  self._executed_connections.add(conn_id)
@@ -104,7 +104,7 @@ class ConnectionHook:
104
104
  try:
105
105
  # Log that the hook is being executed
106
106
  if hasattr(self._client_ref, 'logger'):
107
- self._client_ref.logger.info(f"Executing connection hook on connection {conn_id}")
107
+ self._client_ref.logger.debug(f"Executing connection hook on connection {conn_id}")
108
108
  # Use the connection to avoid creating new connections
109
109
  self.execute_sync_with_connection(self._client_ref, conn.connection)
110
110
  self._executed_connections.add(conn_id)
@@ -119,16 +119,65 @@ class ConnectionHook:
119
119
  # For immediate execution, we need to get a connection from the client
120
120
  # This is a fallback for when we don't have a specific connection
121
121
  if hasattr(client, '_engine') and client._engine:
122
- async with client._engine.begin() as conn:
123
- # For async connections, use get_raw_connection() method
124
- raw_conn = await conn.get_raw_connection()
125
- await self.execute_async_with_connection(client, raw_conn)
122
+ async with client._engine.connect() as conn:
123
+ # Execute actions directly on async connection
124
+ await self.execute_async_on_connection(client, conn)
126
125
  else:
127
126
  client.logger.warning("No engine available for connection hook execution")
128
127
 
129
128
  except Exception as e:
130
129
  client.logger.warning(f"Connection hook execution failed: {e}")
131
130
 
131
+ async def execute_async_on_connection(self, client, async_connection) -> None:
132
+ """Execute hook actions on an AsyncConnection (SQLAlchemy async connection)"""
133
+ try:
134
+ # Execute predefined actions
135
+ for action in self.actions:
136
+ if isinstance(action, str):
137
+ action = ConnectionAction(action)
138
+
139
+ # Execute SQL directly on async connection
140
+ if action == ConnectionAction.ENABLE_IVF:
141
+ await async_connection.execute(text("SET experimental_ivf_index = 1"))
142
+ client.logger.debug("✓ Enabled IVF vector operations")
143
+ elif action == ConnectionAction.ENABLE_HNSW:
144
+ await async_connection.execute(text("SET experimental_hnsw_index = 1"))
145
+ client.logger.debug("✓ Enabled HNSW vector operations")
146
+ elif action == ConnectionAction.ENABLE_FULLTEXT:
147
+ await async_connection.execute(text("SET experimental_fulltext_index = 1"))
148
+ client.logger.debug("✓ Enabled fulltext search operations")
149
+ elif action == ConnectionAction.ENABLE_VECTOR:
150
+ await async_connection.execute(text("SET experimental_ivf_index = 1"))
151
+ await async_connection.execute(text("SET experimental_hnsw_index = 1"))
152
+ client.logger.debug("✓ Enabled vector operations")
153
+ elif action == ConnectionAction.ENABLE_ALL:
154
+ await async_connection.execute(text("SET experimental_ivf_index = 1"))
155
+ await async_connection.execute(text("SET experimental_hnsw_index = 1"))
156
+ await async_connection.execute(text("SET experimental_fulltext_index = 1"))
157
+ client.logger.debug("✓ Enabled all operations")
158
+ else:
159
+ client.logger.warning(f"Unknown connection action: {action}")
160
+
161
+ # Execute custom hook if provided
162
+ if self.custom_hook:
163
+ if hasattr(self.custom_hook, '__call__'):
164
+ # Check if it's an async function
165
+ if hasattr(self.custom_hook, '__code__') and self.custom_hook.__code__.co_flags & 0x80:
166
+ await self.custom_hook(client)
167
+ else:
168
+ # Try to call it as sync
169
+ try:
170
+ result = self.custom_hook(client)
171
+ # If it returns a coroutine, await it
172
+ if hasattr(result, '__await__'):
173
+ await result
174
+ except TypeError as e:
175
+ if "object NoneType can't be used in 'await' expression" in str(e):
176
+ client.logger.warning("Custom hook appears to be async but was called synchronously")
177
+
178
+ except Exception as e:
179
+ client.logger.warning(f"Connection hook execution failed: {e}")
180
+
132
181
  async def execute_async_with_connection(self, client, dbapi_connection) -> None:
133
182
  """Execute hook actions asynchronously using the provided connection"""
134
183
  try:
@@ -212,7 +261,7 @@ class ConnectionHook:
212
261
  cursor = dbapi_connection.cursor()
213
262
  cursor.execute("SET experimental_ivf_index = 1")
214
263
  cursor.close()
215
- client.logger.info("✓ Enabled IVF vector operations")
264
+ client.logger.debug("✓ Enabled IVF vector operations")
216
265
  except Exception as e:
217
266
  client.logger.warning(f"Failed to enable IVF: {e}")
218
267
 
@@ -223,7 +272,7 @@ class ConnectionHook:
223
272
  cursor = dbapi_connection.cursor()
224
273
  cursor.execute("SET experimental_hnsw_index = 1")
225
274
  cursor.close()
226
- client.logger.info("✓ Enabled HNSW vector operations")
275
+ client.logger.debug("✓ Enabled HNSW vector operations")
227
276
  except Exception as e:
228
277
  client.logger.warning(f"Failed to enable HNSW: {e}")
229
278
 
@@ -234,7 +283,7 @@ class ConnectionHook:
234
283
  cursor = dbapi_connection.cursor()
235
284
  cursor.execute("SET experimental_fulltext_index = 1")
236
285
  cursor.close()
237
- client.logger.info("✓ Enabled fulltext search operations")
286
+ client.logger.debug("✓ Enabled fulltext search operations")
238
287
  except Exception as e:
239
288
  client.logger.warning(f"Failed to enable fulltext: {e}")
240
289
 
matrixone/index_utils.py CHANGED
@@ -19,43 +19,67 @@ Index utilities - Shared logic for secondary index operations
19
19
  from typing import List, Tuple
20
20
 
21
21
 
22
- def build_get_index_tables_sql(table_name: str) -> Tuple[str, Tuple]:
22
+ def build_get_index_tables_sql(table_name: str, database: str = None) -> Tuple[str, Tuple]:
23
23
  """
24
24
  Build SQL to get all secondary index table names for a given table.
25
25
 
26
+ This includes both MULTIPLE (regular secondary indexes) and UNIQUE indexes.
27
+
26
28
  Args:
27
29
  table_name: Name of the table
30
+ database: Name of the database (optional, but recommended to avoid cross-database conflicts)
28
31
 
29
32
  Returns:
30
33
  Tuple of (sql, params)
31
34
  """
32
- sql = """
33
- SELECT DISTINCT index_table_name
34
- FROM mo_catalog.mo_indexes
35
- JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
36
- WHERE relname = ? AND type = 'MULTIPLE'
37
- """
38
- return sql, (table_name,)
39
-
40
-
41
- def build_get_index_table_by_name_sql(table_name: str, index_name: str) -> Tuple[str, Tuple]:
35
+ if database:
36
+ sql = """
37
+ SELECT DISTINCT index_table_name
38
+ FROM mo_catalog.mo_indexes
39
+ JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
40
+ WHERE relname = ? AND reldatabase = ? AND type IN ('MULTIPLE', 'UNIQUE')
41
+ """
42
+ return sql, (table_name, database)
43
+ else:
44
+ # Fallback to old behavior if database is not provided
45
+ sql = """
46
+ SELECT DISTINCT index_table_name
47
+ FROM mo_catalog.mo_indexes
48
+ JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
49
+ WHERE relname = ? AND type IN ('MULTIPLE', 'UNIQUE')
50
+ """
51
+ return sql, (table_name,)
52
+
53
+
54
+ def build_get_index_table_by_name_sql(table_name: str, index_name: str, database: str = None) -> Tuple[str, Tuple]:
42
55
  """
43
56
  Build SQL to get the physical table name of a secondary index by its index name.
44
57
 
45
58
  Args:
46
59
  table_name: Name of the table
47
60
  index_name: Name of the secondary index
61
+ database: Name of the database (optional, but recommended to avoid cross-database conflicts)
48
62
 
49
63
  Returns:
50
64
  Tuple of (sql, params)
51
65
  """
52
- sql = """
53
- SELECT DISTINCT index_table_name
54
- FROM mo_catalog.mo_indexes
55
- JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
56
- WHERE relname = ? AND name = ?
57
- """
58
- return sql, (table_name, index_name)
66
+ if database:
67
+ sql = """
68
+ SELECT DISTINCT index_table_name
69
+ FROM mo_catalog.mo_indexes
70
+ JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
71
+ WHERE relname = ? AND name = ? AND reldatabase = ?
72
+ """
73
+ return sql, (table_name, index_name, database)
74
+ else:
75
+ # Fallback to old behavior if database is not provided
76
+ sql = """
77
+ SELECT DISTINCT index_table_name
78
+ FROM mo_catalog.mo_indexes
79
+ JOIN mo_catalog.mo_tables ON mo_indexes.table_id = mo_tables.rel_id
80
+ WHERE relname = ? AND name = ?
81
+ """
82
+ return sql, (table_name, index_name)
59
83
 
60
84
 
61
85
  def build_verify_counts_sql(table_name: str, index_tables: List[str]) -> str: