PyPI - MindsDB - Versions diffs - 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl - Mend

MindsDB 25.5.4.2py3-none-any.whl → 25.6.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/agent.py +28 -25
mindsdb/api/a2a/common/server/server.py +32 -26
mindsdb/api/executor/command_executor.py +69 -14
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
mindsdb/api/executor/planner/plan_join.py +67 -77
mindsdb/api/executor/planner/query_planner.py +176 -155
mindsdb/api/executor/planner/steps.py +37 -12
mindsdb/api/executor/sql_query/result_set.py +45 -64
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
mindsdb/api/executor/utilities/sql.py +42 -48
mindsdb/api/http/namespaces/config.py +1 -1
mindsdb/api/http/namespaces/file.py +14 -23
mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
mindsdb/integrations/libs/api_handler.py +261 -57
mindsdb/integrations/libs/base.py +100 -29
mindsdb/integrations/utilities/files/file_reader.py +99 -73
mindsdb/integrations/utilities/handler_utils.py +23 -8
mindsdb/integrations/utilities/sql_utils.py +35 -40
mindsdb/interfaces/agents/agents_controller.py +196 -192
mindsdb/interfaces/agents/constants.py +7 -1
mindsdb/interfaces/agents/langchain_agent.py +42 -11
mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
mindsdb/interfaces/data_catalog/__init__.py +0 -0
mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
mindsdb/interfaces/database/database.py +81 -57
mindsdb/interfaces/database/integrations.py +220 -234
mindsdb/interfaces/database/log.py +72 -104
mindsdb/interfaces/database/projects.py +156 -193
mindsdb/interfaces/file/file_controller.py +21 -65
mindsdb/interfaces/knowledge_base/controller.py +63 -10
mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
mindsdb/interfaces/skills/skills_controller.py +54 -36
mindsdb/interfaces/skills/sql_agent.py +109 -86
mindsdb/interfaces/storage/db.py +223 -79
mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
mindsdb/utilities/config.py +9 -2
mindsdb/utilities/log.py +35 -26
mindsdb/utilities/ml_task_queue/task.py +19 -22
mindsdb/utilities/render/sqlalchemy_render.py +129 -181
mindsdb/utilities/starters.py +40 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py CHANGED Viewed

@@ -6,22 +6,24 @@ from snowflake.sqlalchemy import snowdialect
 from snowflake import connector
 from snowflake.connector.errors import NotSupportedError
 from snowflake.connector.cursor import SnowflakeCursor, ResultMetadata
+from typing import Optional, List
 from mindsdb_sql_parser.ast.base import ASTNode
 from mindsdb_sql_parser.ast import Select, Identifier
 from mindsdb.utilities import log
-from mindsdb.integrations.libs.base import DatabaseHandler
+from mindsdb.integrations.libs.base import MetaDatabaseHandler
 from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
 from mindsdb.integrations.libs.response import (
     HandlerStatusResponse as StatusResponse,
     HandlerResponse as Response,
-    RESPONSE_TYPE
+    RESPONSE_TYPE,
 )
 from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
 try:
     import pyarrow as pa
     memory_pool = pa.default_memory_pool()
 except Exception:
     memory_pool = None
@@ -31,7 +33,7 @@ logger = log.getLogger(__name__)
 def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE:
-    """ Map Snowflake types to MySQL types.
+    """Map Snowflake types to MySQL types.
     Args:
         internal_type_name (str): The name of the Snowflake type to map.
@@ -41,22 +43,22 @@ def _map_type(internal_type_name: str) -> MYSQL_DATA_TYPE:
     """
     internal_type_name = internal_type_name.upper()
     types_map = {
-        ('NUMBER', 'DECIMAL', 'DEC', 'NUMERIC'): MYSQL_DATA_TYPE.DECIMAL,
-        ('INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT'): MYSQL_DATA_TYPE.INT,
-        ('FLOAT', 'FLOAT4', 'FLOAT8'): MYSQL_DATA_TYPE.FLOAT,
-        ('DOUBLE', 'DOUBLE PRECISION', 'REAL'): MYSQL_DATA_TYPE.DOUBLE,
-        ('VARCHAR'): MYSQL_DATA_TYPE.VARCHAR,
-        ('CHAR', 'CHARACTER', 'NCHAR'): MYSQL_DATA_TYPE.CHAR,
-        ('STRING', 'TEXT', 'NVARCHAR'): MYSQL_DATA_TYPE.TEXT,
-        ('NVARCHAR2', 'CHAR VARYING', 'NCHAR VARYING'): MYSQL_DATA_TYPE.VARCHAR,
-        ('BINARY', 'VARBINARY'): MYSQL_DATA_TYPE.BINARY,
-        ('BOOLEAN',): MYSQL_DATA_TYPE.BOOL,
-        ('TIMESTAMP_NTZ', 'DATETIME'): MYSQL_DATA_TYPE.DATETIME,
-        ('DATE',): MYSQL_DATA_TYPE.DATE,
-        ('TIME',): MYSQL_DATA_TYPE.TIME,
-        ('TIMESTAMP_LTZ'): MYSQL_DATA_TYPE.DATETIME,
-        ('TIMESTAMP_TZ'): MYSQL_DATA_TYPE.DATETIME,
-        ('VARIANT', 'OBJECT', 'ARRAY', 'MAP', 'GEOGRAPHY', 'GEOMETRY', 'VECTOR'): MYSQL_DATA_TYPE.VARCHAR
+        ("NUMBER", "DECIMAL", "DEC", "NUMERIC"): MYSQL_DATA_TYPE.DECIMAL,
+        ("INT , INTEGER , BIGINT , SMALLINT , TINYINT , BYTEINT"): MYSQL_DATA_TYPE.INT,
+        ("FLOAT", "FLOAT4", "FLOAT8"): MYSQL_DATA_TYPE.FLOAT,
+        ("DOUBLE", "DOUBLE PRECISION", "REAL"): MYSQL_DATA_TYPE.DOUBLE,
+        ("VARCHAR"): MYSQL_DATA_TYPE.VARCHAR,
+        ("CHAR", "CHARACTER", "NCHAR"): MYSQL_DATA_TYPE.CHAR,
+        ("STRING", "TEXT", "NVARCHAR"): MYSQL_DATA_TYPE.TEXT,
+        ("NVARCHAR2", "CHAR VARYING", "NCHAR VARYING"): MYSQL_DATA_TYPE.VARCHAR,
+        ("BINARY", "VARBINARY"): MYSQL_DATA_TYPE.BINARY,
+        ("BOOLEAN",): MYSQL_DATA_TYPE.BOOL,
+        ("TIMESTAMP_NTZ", "DATETIME"): MYSQL_DATA_TYPE.DATETIME,
+        ("DATE",): MYSQL_DATA_TYPE.DATE,
+        ("TIME",): MYSQL_DATA_TYPE.TIME,
+        ("TIMESTAMP_LTZ"): MYSQL_DATA_TYPE.DATETIME,
+        ("TIMESTAMP_TZ"): MYSQL_DATA_TYPE.DATETIME,
+        ("VARIANT", "OBJECT", "ARRAY", "MAP", "GEOGRAPHY", "GEOMETRY", "VECTOR"): MYSQL_DATA_TYPE.VARCHAR,
     }
     for db_types_list, mysql_data_type in types_map.items():
@@ -84,26 +86,32 @@ def _make_table_response(result: DataFrame, cursor: SnowflakeCursor) -> Response
     for column in description:
         column_dtype = result[column.name].dtype
         description_column_type = connector.constants.FIELD_ID_TO_NAME.get(column.type_code)
+        if description_column_type in ("OBJECT", "ARRAY"):
+            mysql_types.append(MYSQL_DATA_TYPE.JSON)
+            continue
+        if description_column_type == "VECTOR":
+            mysql_types.append(MYSQL_DATA_TYPE.VECTOR)
+            continue
         if pd_types.is_integer_dtype(column_dtype):
             column_dtype_name = column_dtype.name
-            if column_dtype_name in ('int8', 'Int8'):
+            if column_dtype_name in ("int8", "Int8"):
                 mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
-            elif column_dtype in ('int16', 'Int16'):
+            elif column_dtype in ("int16", "Int16"):
                 mysql_types.append(MYSQL_DATA_TYPE.SMALLINT)
-            elif column_dtype in ('int32', 'Int32'):
+            elif column_dtype in ("int32", "Int32"):
                 mysql_types.append(MYSQL_DATA_TYPE.MEDIUMINT)
-            elif column_dtype in ('int64', 'Int64'):
+            elif column_dtype in ("int64", "Int64"):
                 mysql_types.append(MYSQL_DATA_TYPE.BIGINT)
             else:
                 mysql_types.append(MYSQL_DATA_TYPE.INT)
             continue
         if pd_types.is_float_dtype(column_dtype):
             column_dtype_name = column_dtype.name
-            if column_dtype_name in ('float16', 'Float16'):  # Float16 does not exists so far
+            if column_dtype_name in ("float16", "Float16"):  # Float16 does not exists so far
                 mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
-            elif column_dtype_name in ('float32', 'Float32'):
+            elif column_dtype_name in ("float32", "Float32"):
                 mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
-            elif column_dtype_name in ('float64', 'Float64'):
+            elif column_dtype_name in ("float64", "Float64"):
                 mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
             else:
                 mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
@@ -115,35 +123,35 @@ def _make_table_response(result: DataFrame, cursor: SnowflakeCursor) -> Response
             mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
             series = result[column.name]
             # snowflake use pytz.timezone
-            if series.dt.tz is not None and getattr(series.dt.tz, 'zone', 'UTC') != 'UTC':
-                series = series.dt.tz_convert('UTC')
+            if series.dt.tz is not None and getattr(series.dt.tz, "zone", "UTC") != "UTC":
+                series = series.dt.tz_convert("UTC")
                 result[column.name] = series.dt.tz_localize(None)
             continue
         if pd_types.is_object_dtype(column_dtype):
-            if description_column_type == 'TEXT':
+            if description_column_type == "TEXT":
                 # we can also check column.internal_size, if == 16777216 then it is TEXT, else VARCHAR(internal_size)
                 mysql_types.append(MYSQL_DATA_TYPE.TEXT)
                 continue
-            elif description_column_type == 'BINARY':
+            elif description_column_type == "BINARY":
                 # if column.internal_size == 8388608 then BINARY, else VARBINARY(internal_size)
                 mysql_types.append(MYSQL_DATA_TYPE.BINARY)
                 continue
-            elif description_column_type == 'DATE':
+            elif description_column_type == "DATE":
                 mysql_types.append(MYSQL_DATA_TYPE.DATE)
                 continue
-            elif description_column_type == 'TIME':
+            elif description_column_type == "TIME":
                 mysql_types.append(MYSQL_DATA_TYPE.TIME)
                 continue
-        if description_column_type == 'FIXED':
+        if description_column_type == "FIXED":
             if column.scale == 0:
                 mysql_types.append(MYSQL_DATA_TYPE.INT)
             else:
                 # It is NUMBER, DECIMAL or NUMERIC with scale > 0
                 mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
             continue
-        elif description_column_type == 'REAL':
+        elif description_column_type == "REAL":
             mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
             continue
@@ -154,24 +162,19 @@ def _make_table_response(result: DataFrame, cursor: SnowflakeCursor) -> Response
         columns=[column.name for column in description],
     )
-    return Response(
-        RESPONSE_TYPE.TABLE,
-        data_frame=df,
-        affected_rows=None,
-        mysql_types=mysql_types
-    )
+    return Response(RESPONSE_TYPE.TABLE, data_frame=df, affected_rows=None, mysql_types=mysql_types)
-class SnowflakeHandler(DatabaseHandler):
+class SnowflakeHandler(MetaDatabaseHandler):
     """
     This handler handles connection and execution of the Snowflake statements.
     """
-    name = 'snowflake'
+    name = "snowflake"
     def __init__(self, name, **kwargs):
         super().__init__(name)
-        self.connection_data = kwargs.get('connection_data')
+        self.connection_data = kwargs.get("connection_data")
         self.renderer = SqlalchemyRender(snowdialect.dialect)
         self.is_connected = False
@@ -193,18 +196,18 @@ class SnowflakeHandler(DatabaseHandler):
             return self.connection
         # Mandatory connection parameters
-        if not all(key in self.connection_data for key in ['account', 'user', 'password', 'database']):
-            raise ValueError('Required parameters (account, user, password, database) must be provided.')
+        if not all(key in self.connection_data for key in ["account", "user", "password", "database"]):
+            raise ValueError("Required parameters (account, user, password, database) must be provided.")
         config = {
-            'account': self.connection_data.get('account'),
-            'user': self.connection_data.get('user'),
-            'password': self.connection_data.get('password'),
-            'database': self.connection_data.get('database')
+            "account": self.connection_data.get("account"),
+            "user": self.connection_data.get("user"),
+            "password": self.connection_data.get("password"),
+            "database": self.connection_data.get("database"),
         }
         # Optional connection parameters
-        optional_params = ['schema', 'warehouse', 'role']
+        optional_params = ["schema", "warehouse", "role"]
         for param in optional_params:
             if param in self.connection_data:
                 config[param] = self.connection_data[param]
@@ -215,7 +218,7 @@ class SnowflakeHandler(DatabaseHandler):
             self.is_connected = True
             return self.connection
         except connector.errors.Error as e:
-            logger.error(f'Error connecting to Snowflake, {e}!')
+            logger.error(f"Error connecting to Snowflake, {e}!")
             raise
     def disconnect(self):
@@ -244,10 +247,10 @@ class SnowflakeHandler(DatabaseHandler):
             # Execute a simple query to test the connection
             with connection.cursor() as cur:
-                cur.execute('select 1;')
+                cur.execute("select 1;")
             response.success = True
         except (connector.errors.Error, ValueError) as e:
-            logger.error(f'Error connecting to Snowflake, {e}!')
+            logger.error(f"Error connecting to Snowflake, {e}!")
             response.error_message = str(e)
         if response.success and need_to_close:
@@ -276,7 +279,6 @@ class SnowflakeHandler(DatabaseHandler):
             try:
                 cur.execute(query)
                 try:
                     try:
                         batches_iter = cur.fetch_pandas_batches()
                     except ValueError:
@@ -297,64 +299,52 @@ class SnowflakeHandler(DatabaseHandler):
                         if memory_estimation_check_done is False and batches_rowcount > 1000:
                             memory_estimation_check_done = True
                             available_memory_kb = psutil.virtual_memory().available >> 10
-                            batches_size_kb = sum([(x.memory_usage(index=True, deep=True).sum() >> 10) for x in batches])
+                            batches_size_kb = sum(
+                                [(x.memory_usage(index=True, deep=True).sum() >> 10) for x in batches]
+                            )
                             total_rowcount = cur.rowcount
                             rest_rowcount = total_rowcount - batches_rowcount
                             rest_estimated_size_kb = int((rest_rowcount / batches_rowcount) * batches_size_kb)
                             if (available_memory_kb * 0.9) < rest_estimated_size_kb:
                                 logger.error(
-                                    'Attempt to get too large dataset:\n'
-                                    f'batches_rowcount={batches_rowcount}, size_kb={batches_size_kb}\n'
-                                    f'total_rowcount={total_rowcount}, estimated_size_kb={rest_estimated_size_kb}\n'
-                                    f'available_memory_kb={available_memory_kb}'
+                                    "Attempt to get too large dataset:\n"
+                                    f"batches_rowcount={batches_rowcount}, size_kb={batches_size_kb}\n"
+                                    f"total_rowcount={total_rowcount}, estimated_size_kb={rest_estimated_size_kb}\n"
+                                    f"available_memory_kb={available_memory_kb}"
                                 )
-                                raise MemoryError('Not enought memory')
+                                raise MemoryError("Not enought memory")
                         # endregion
                     if len(batches) > 0:
                         response = _make_table_response(result=pandas.concat(batches, ignore_index=True), cursor=cur)
                     else:
-                        response = Response(
-                            RESPONSE_TYPE.TABLE,
-                            DataFrame(
-                                [],
-                                columns=[x[0] for x in cur.description]
-                            )
-                        )
+                        response = Response(RESPONSE_TYPE.TABLE, DataFrame([], columns=[x[0] for x in cur.description]))
                 except NotSupportedError:
                     # Fallback for CREATE/DELETE/UPDATE. These commands returns table with single column,
                     # but it cannot be retrieved as pandas DataFrame.
                     result = cur.fetchall()
                     match result:
                         case (
-                            [{'number of rows inserted': affected_rows}]
-                            | [{'number of rows deleted': affected_rows}]
-                            | [{'number of rows updated': affected_rows, 'number of multi-joined rows updated': _}]
+                            [{"number of rows inserted": affected_rows}]
+                            | [{"number of rows deleted": affected_rows}]
+                            | [{"number of rows updated": affected_rows, "number of multi-joined rows updated": _}]
                         ):
                             response = Response(RESPONSE_TYPE.OK, affected_rows=affected_rows)
                         case list():
                             response = Response(
-                                RESPONSE_TYPE.TABLE,
-                                DataFrame(
-                                    result,
-                                    columns=[x[0] for x in cur.description]
-                                )
+                                RESPONSE_TYPE.TABLE, DataFrame(result, columns=[x[0] for x in cur.description])
                             )
                         case _:
                             # Looks like SnowFlake always returns something in response, so this is suspicious
-                            logger.warning('Snowflake did not return any data in response.')
+                            logger.warning("Snowflake did not return any data in response.")
                             response = Response(RESPONSE_TYPE.OK)
             except Exception as e:
                 logger.error(f"Error running query: {query} on {self.connection_data.get('database')}, {e}!")
-                response = Response(
-                    RESPONSE_TYPE.ERROR,
-                    error_code=0,
-                    error_message=str(e)
-                )
+                response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
         if need_to_close is True:
             self.disconnect()
-        if memory_pool is not None and memory_pool.backend_name == 'jemalloc':
+        if memory_pool is not None and memory_pool.backend_name == "jemalloc":
             # This reduce memory consumption, but will slow down next query slightly.
             # Except pool type 'jemalloc': memory consumption do not change significantly
             # and next query processing time may be even lower.
@@ -385,7 +375,7 @@ class SnowflakeHandler(DatabaseHandler):
         quoted_columns = []
         if query.targets is not None:
             for column in query.targets:
-                if hasattr(column, 'alias') and column.alias is not None:
+                if hasattr(column, "alias") and column.alias is not None:
                     if column.alias.is_quoted[-1]:
                         quoted_columns.append(column.alias.parts[-1])
                 elif isinstance(column, Identifier):
@@ -455,3 +445,264 @@ class SnowflakeHandler(DatabaseHandler):
         result.to_columns_table_response(map_type_fn=_map_type)
         return result
+    def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
+        """
+        Retrieves metadata information about the tables in the Snowflake database to be stored in the data catalog.
+        Args:
+            table_names (list): A list of table names for which to retrieve metadata information.
+        Returns:
+            Response: A response object containing the metadata information, formatted as per the `Response` class.
+        """
+        query = """
+            SELECT
+                TABLE_CATALOG,
+                TABLE_SCHEMA,
+                TABLE_NAME,
+                TABLE_TYPE,
+                COMMENT AS TABLE_DESCRIPTION,
+                ROW_COUNT,
+                CREATED,
+                LAST_ALTERED
+            FROM INFORMATION_SCHEMA.TABLES
+            WHERE TABLE_SCHEMA = current_schema()
+            AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
+        """
+        if table_names is not None and len(table_names) > 0:
+            table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names])
+            query += f" AND TABLE_NAME IN ({table_names_str})"
+        result = self.native_query(query)
+        result.data_frame["ROW_COUNT"] = result.data_frame["ROW_COUNT"].astype(int)
+        return result
+    def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
+        """
+        Retrieves column metadata for the specified tables (or all tables if no list is provided).
+        Args:
+            table_names (list): A list of table names for which to retrieve column metadata.
+        Returns:
+            Response: A response object containing the column metadata.
+        """
+        query = """
+            SELECT
+                TABLE_NAME,
+                COLUMN_NAME,
+                DATA_TYPE,
+                COMMENT AS COLUMN_DESCRIPTION,
+                COLUMN_DEFAULT,
+                (IS_NULLABLE = 'YES') AS IS_NULLABLE,
+                CHARACTER_MAXIMUM_LENGTH,
+                CHARACTER_OCTET_LENGTH,
+                NUMERIC_PRECISION,
+                NUMERIC_SCALE,
+                DATETIME_PRECISION,
+                CHARACTER_SET_NAME,
+                COLLATION_NAME
+            FROM INFORMATION_SCHEMA.COLUMNS
+            WHERE TABLE_SCHEMA = current_schema()
+        """
+        if table_names is not None and len(table_names) > 0:
+            table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names])
+            query += f" AND TABLE_NAME IN ({table_names_str})"
+        result = self.native_query(query)
+        return result
+    def meta_get_column_statistics(self, table_names: Optional[List[str]] = None) -> Response:
+        """
+        Retrieves basic column statistics: null %, distinct count.
+        Due to Snowflake limitations, this runs per-table not per-column.
+        TODO:  Add most_common_values and most_common_frequencies
+        """
+        columns_query = """
+            SELECT TABLE_NAME, COLUMN_NAME
+            FROM INFORMATION_SCHEMA.COLUMNS
+            WHERE TABLE_SCHEMA = current_schema()
+        """
+        if table_names:
+            table_names_str = ", ".join([f"'{t.upper()}'" for t in table_names])
+            columns_query += f" AND TABLE_NAME IN ({table_names_str})"
+        columns_result = self.native_query(columns_query)
+        if (
+            columns_result.type == RESPONSE_TYPE.ERROR
+            or columns_result.data_frame is None
+            or columns_result.data_frame.empty
+        ):
+            return Response(RESPONSE_TYPE.ERROR, error_message="No columns found.")
+        columns_df = columns_result.data_frame
+        grouped = columns_df.groupby("TABLE_NAME")
+        all_stats = []
+        for table_name, group in grouped:
+            select_parts = []
+            for _, row in group.iterrows():
+                col = row["COLUMN_NAME"]
+                # Ensure column names in the query are properly quoted if they contain special characters or are case-sensitive
+                quoted_col = f'"{col}"'
+                select_parts.extend(
+                    [
+                        f'COUNT_IF({quoted_col} IS NULL) AS "nulls_{col}"',
+                        f'APPROX_COUNT_DISTINCT({quoted_col}) AS "distincts_{col}"',
+                        f'MIN({quoted_col}) AS "min_{col}"',
+                        f'MAX({quoted_col}) AS "max_{col}"',
+                    ]
+                )
+            quoted_table_name = f'"{table_name}"'
+            stats_query = f"""
+            SELECT COUNT(*) AS "total_rows", {", ".join(select_parts)}
+            FROM {quoted_table_name}
+            """
+            try:
+                stats_res = self.native_query(stats_query)
+                if stats_res.type != RESPONSE_TYPE.TABLE or stats_res.data_frame is None or stats_res.data_frame.empty:
+                    logger.warning(
+                        f"Could not retrieve stats for table {table_name}. Query returned no data or an error: {stats_res.error_message if stats_res.type == RESPONSE_TYPE.ERROR else 'No data'}"
+                    )
+                    # Add placeholder stats if query fails or returns empty
+                    for _, row in group.iterrows():
+                        all_stats.append(
+                            {
+                                "table_name": table_name,
+                                "column_name": row["COLUMN_NAME"],
+                                "null_percentage": None,
+                                "distinct_values_count": None,
+                                "most_common_values": [],
+                                "most_common_frequencies": [],
+                                "minimum_value": None,
+                                "maximum_value": None,
+                            }
+                        )
+                    continue
+                stats_data = stats_res.data_frame.iloc[0]
+                total_rows = stats_data.get("total_rows", 0)
+                for _, row in group.iterrows():
+                    col = row["COLUMN_NAME"]
+                    # Keys for stats_data should match the aliases in stats_query (e.g., "nulls_COLNAME")
+                    nulls = stats_data.get(f"nulls_{col}", 0)
+                    distincts = stats_data.get(f"distincts_{col}", None)
+                    min_val = stats_data.get(f"min_{col}", None)
+                    max_val = stats_data.get(f"max_{col}", None)
+                    null_pct = (nulls / total_rows) * 100 if total_rows is not None and total_rows > 0 else None
+                    all_stats.append(
+                        {
+                            "table_name": table_name,
+                            "column_name": col,
+                            "null_percentage": null_pct,
+                            "distinct_values_count": distincts,
+                            "most_common_values": [],
+                            "most_common_frequencies": [],
+                            "minimum_value": min_val,
+                            "maximum_value": max_val,
+                        }
+                    )
+            except Exception as e:
+                logger.error(f"Exception while fetching statistics for table {table_name}: {e}")
+                for _, row in group.iterrows():
+                    all_stats.append(
+                        {
+                            "table_name": table_name,
+                            "column_name": row["COLUMN_NAME"],
+                            "null_percentage": None,
+                            "distinct_values_count": None,
+                            "most_common_values": [],
+                            "most_common_frequencies": [],
+                            "minimum_value": None,
+                            "maximum_value": None,
+                        }
+                    )
+        if not all_stats:
+            return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame())
+        return Response(RESPONSE_TYPE.TABLE, data_frame=pandas.DataFrame(all_stats))
+    def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response:
+        """
+        Retrieves primary key information for the specified tables (or all tables if no list is provided).
+        Args:
+            table_names (list): A list of table names for which to retrieve primary key information.
+        Returns:
+            Response: A response object containing the primary key information.
+        """
+        try:
+            query = """
+                SHOW PRIMARY KEYS IN TABLE;
+            """
+            response = self.native_query(query)
+            if response.type == RESPONSE_TYPE.ERROR and response.error_message:
+                logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}")
+            df = response.data_frame
+            if not df.empty:
+                if table_names:
+                    df = df[df["table_name"].isin(table_names)]
+                df = df[["table_name", "column_name", "key_sequence", "constraint_name"]]
+                df = df.rename(columns={"key_sequence": "ordinal_position"})
+            response.data_frame = df
+            return response
+        except Exception as e:
+            logger.error(f"Exception in meta_get_primary_keys: {e!r}")
+            return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}")
+    def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Response:
+        """
+        Retrieves foreign key information for the specified tables (or all tables if no list is provided).
+        Args:
+            table_names (list): A list of table names for which to retrieve foreign key information.
+        Returns:
+            Response: A response object containing the foreign key information.
+        """
+        try:
+            query = """
+                SHOW IMPORTED KEYS IN TABLE;
+            """
+            response = self.native_query(query)
+            if response.type == RESPONSE_TYPE.ERROR and response.error_message:
+                logger.error(f"Query error in meta_get_primary_keys: {response.error_message}\nQuery:\n{query}")
+            df = response.data_frame
+            if not df.empty:
+                if table_names:
+                    df = df[df["pk_table_name"].isin(table_names) & df["fk_table_name"].isin(table_names)]
+                df = df[["pk_table_name", "pk_column_name", "fk_table_name", "fk_column_name"]]
+                df = df.rename(
+                    columns={
+                        "pk_table_name": "child_table_name",
+                        "pk_column_name": "child_column_name",
+                        "fk_table_name": "parent_table_name",
+                        "fk_column_name": "parent_column_name",
+                    }
+                )
+            response.data_frame = df
+            return response
+        except Exception as e:
+            logger.error(f"Exception in meta_get_primary_keys: {e!r}")
+            return Response(RESPONSE_TYPE.ERROR, error_message=f"Exception querying primary keys: {e!r}")

MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.5.4.2py3-none-any.whl → 25.6.2.0py3-none-any.whl