PyPI - MindsDB - Versions diffs - 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl - Mend

MindsDB 25.5.4.2py3-none-any.whl → 25.6.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (69) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/agent.py +28 -25
mindsdb/api/a2a/common/server/server.py +32 -26
mindsdb/api/executor/command_executor.py +69 -14
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
mindsdb/api/executor/planner/plan_join.py +67 -77
mindsdb/api/executor/planner/query_planner.py +176 -155
mindsdb/api/executor/planner/steps.py +37 -12
mindsdb/api/executor/sql_query/result_set.py +45 -64
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
mindsdb/api/executor/utilities/sql.py +42 -48
mindsdb/api/http/namespaces/config.py +1 -1
mindsdb/api/http/namespaces/file.py +14 -23
mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
mindsdb/integrations/libs/api_handler.py +261 -57
mindsdb/integrations/libs/base.py +100 -29
mindsdb/integrations/utilities/files/file_reader.py +99 -73
mindsdb/integrations/utilities/handler_utils.py +23 -8
mindsdb/integrations/utilities/sql_utils.py +35 -40
mindsdb/interfaces/agents/agents_controller.py +196 -192
mindsdb/interfaces/agents/constants.py +7 -1
mindsdb/interfaces/agents/langchain_agent.py +42 -11
mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
mindsdb/interfaces/data_catalog/__init__.py +0 -0
mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
mindsdb/interfaces/database/database.py +81 -57
mindsdb/interfaces/database/integrations.py +220 -234
mindsdb/interfaces/database/log.py +72 -104
mindsdb/interfaces/database/projects.py +156 -193
mindsdb/interfaces/file/file_controller.py +21 -65
mindsdb/interfaces/knowledge_base/controller.py +63 -10
mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
mindsdb/interfaces/skills/skills_controller.py +54 -36
mindsdb/interfaces/skills/sql_agent.py +109 -86
mindsdb/interfaces/storage/db.py +223 -79
mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
mindsdb/utilities/config.py +9 -2
mindsdb/utilities/log.py +35 -26
mindsdb/utilities/ml_task_queue/task.py +19 -22
mindsdb/utilities/render/sqlalchemy_render.py +129 -181
mindsdb/utilities/starters.py +40 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/agents/constants.py CHANGED Viewed

@@ -15,7 +15,10 @@ OPEN_AI_CHAT_MODELS = (
     "gpt-4-32k",
     "gpt-4-1106-preview",
     "gpt-4-0125-preview",
+    "gpt-4.1",
+    "gpt-4.1-mini",
     "gpt-4o",
+    "o4-mini",
     "o3-mini",
     "o1-mini",
 )
@@ -216,8 +219,11 @@ You are an AI assistant powered by MindsDB. When answering questions, follow the
    - Finally use kb_query_tool to query the knowledge base for specific information
 2. For questions about database tables and their contents:
-   - Use the sql_tool to query the tables directly
+   - Use the sql_db_query to query the tables directly
    - You can join tables if needed to get comprehensive information
+   - **Important Rule for SQL Queries:** If you formulate an SQL query as part of answering a user's question, you *must* then use the `sql_db_query` tool to execute that query and get its results. The SQL query string itself is NOT the final answer to the user unless the user has specifically asked for the query. Your final AI response should be based on the *results* obtained from executing the query.
 For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
 """

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -226,7 +226,7 @@ def process_chunk(chunk):
 class LangchainAgent:
-    def __init__(self, agent: db.Agents, model: dict = None):
+    def __init__(self, agent: db.Agents, model: dict = None, params: dict = None):
         self.agent = agent
         self.model = model
@@ -239,16 +239,35 @@ class LangchainAgent:
         self.mdb_langfuse_callback_handler: Optional[object] = None  # custom (see langfuse_callback_handler.py)
         self.langfuse_client_wrapper = LangfuseClientWrapper()
-        self.args = self._initialize_args()
+        self.args = self._initialize_args(params)
         # Back compatibility for old models
         self.provider = self.args.get("provider", get_llm_provider(self.args))
-    def _initialize_args(self) -> dict:
-        """Initialize the arguments based on the agent's parameters."""
-        args = self.agent.params.copy()
-        args["model_name"] = self.agent.model_name
-        args["provider"] = self.agent.provider
+    def _initialize_args(self, params: dict = None) -> dict:
+        """
+        Initialize the arguments for agent execution.
+        Takes the parameters passed during execution and sets necessary defaults.
+        The params are already merged with defaults by AgentsController.get_agent_llm_params.
+        Args:
+            params: Parameters for agent execution (already merged with defaults)
+        Returns:
+            dict: Final parameters for agent execution
+        """
+        # Use the parameters passed to the method (already merged with defaults by AgentsController)
+        # No fallback needed as AgentsController.get_agent_llm_params already handles this
+        args = params.copy() if params else {}
+        # Set model name and provider if given in create agent otherwise use global llm defaults
+        # AgentsController.get_agent_llm_params
+        if self.agent.model_name is not None:
+            args["model_name"] = self.agent.model_name
+        if self.agent.provider is not None:
+            args["provider"] = self.agent.provider
         args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
         # agent is using current langchain model
@@ -261,11 +280,20 @@ class LangchainAgent:
                 # only update prompt_template if it is set on the model
                 args["prompt_template"] = prompt_template
+        # Set default prompt template if not provided
         if args.get("prompt_template") is None:
+            # Default prompt template depends on agent mode
             if args.get("mode") == "retrieval":
                 args["prompt_template"] = DEFAULT_RAG_PROMPT_TEMPLATE
+                logger.info(f"Using default retrieval prompt template: {DEFAULT_RAG_PROMPT_TEMPLATE[:50]}...")
             else:
-                raise ValueError("Please provide a `prompt_template` or set `mode=retrieval`")
+                # Set a default prompt template for non-retrieval mode
+                default_prompt = "you are an assistant, answer using the tables connected"
+                args["prompt_template"] = default_prompt
+                logger.info(f"Using default prompt template: {default_prompt}")
+        if "prompt_template" in args:
+            logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
         return args
@@ -318,7 +346,7 @@ class LangchainAgent:
         self.provider = args.get("provider", get_llm_provider(args))
         df = df.reset_index(drop=True)
-        agent = self.create_agent(df, args)
+        agent = self.create_agent(df)
         # Use last message as prompt, remove other questions.
         user_column = args.get("user_column", USER_COLUMN)
         df.iloc[:-1, df.columns.get_loc(user_column)] = None
@@ -348,14 +376,17 @@ class LangchainAgent:
         self.provider = args.get("provider", get_llm_provider(args))
         df = df.reset_index(drop=True)
-        agent = self.create_agent(df, args)
+        agent = self.create_agent(df)
         # Use last message as prompt, remove other questions.
         user_column = args.get("user_column", USER_COLUMN)
         df.iloc[:-1, df.columns.get_loc(user_column)] = None
         return self.stream_agent(df, agent, args)
-    def create_agent(self, df: pd.DataFrame, args: Dict = None) -> AgentExecutor:
+    def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
         # Set up tools.
+        args = self.args
         llm = create_chat_model(args)
         self.llm = llm

mindsdb/interfaces/agents/mcp_client_agent.py CHANGED Viewed

@@ -63,11 +63,19 @@ class MCPQueryTool(BaseTool):
         return loop.run_until_complete(self._arun(query))
+# todo move instantiation to agent controller
 class MCPLangchainAgent(LangchainAgent):
     """Extension of LangchainAgent that delegates to MCP server"""
-    def __init__(self, agent: db.Agents, model: dict = None, mcp_host: str = "127.0.0.1", mcp_port: int = 47337):
-        super().__init__(agent, model)
+    def __init__(
+        self,
+        agent: db.Agents,
+        model: dict = None,
+        params: dict = None,
+        mcp_host: str = "127.0.0.1",
+        mcp_port: int = 47337,
+    ):
+        super().__init__(agent, model, params)
         self.mcp_host = mcp_host
         self.mcp_port = mcp_port
         self.exit_stack = AsyncExitStack()
@@ -85,7 +93,7 @@ class MCPLangchainAgent(LangchainAgent):
                 server_params = StdioServerParameters(
                     command="python",
                     args=["-m", "mindsdb", "--api=mcp"],
-                    env={"MCP_HOST": self.mcp_host, "MCP_PORT": str(self.mcp_port)}
+                    env={"MCP_HOST": self.mcp_host, "MCP_PORT": str(self.mcp_port)},
                 )
                 logger.info(f"Connecting to MCP server at {self.mcp_host}:{self.mcp_port}")
@@ -99,7 +107,9 @@ class MCPLangchainAgent(LangchainAgent):
                 # Test the connection by listing tools
                 tools_response = await self.session.list_tools()
-                logger.info(f"Successfully connected to MCP server. Available tools: {[tool.name for tool in tools_response.tools]}")
+                logger.info(
+                    f"Successfully connected to MCP server. Available tools: {[tool.name for tool in tools_response.tools]}"
+                )
             except Exception as e:
                 logger.error(f"Failed to connect to MCP server: {str(e)}")
@@ -141,7 +151,7 @@ class MCPLangchainAgent(LangchainAgent):
         response = super().get_completion(messages, stream)
         # Ensure response is a string (not a DataFrame)
-        if hasattr(response, 'to_string'):  # It's a DataFrame
+        if hasattr(response, "to_string"):  # It's a DataFrame
             return response.to_string()
         return response
@@ -167,7 +177,7 @@ class LiteLLMAgentWrapper:
         formatted_messages = [
             {
                 "question": msg["content"] if msg["role"] == "user" else "",
-                "answer": msg["content"] if msg["role"] == "assistant" else ""
+                "answer": msg["content"] if msg["role"] == "assistant" else "",
             }
             for msg in messages
         ]
@@ -177,23 +187,16 @@ class LiteLLMAgentWrapper:
         # Ensure response is a string
         if not isinstance(response, str):
-            if hasattr(response, 'to_string'):  # It's a DataFrame
+            if hasattr(response, "to_string"):  # It's a DataFrame
                 response = response.to_string()
             else:
                 response = str(response)
         # Format response in LiteLLM expected format
         return {
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": response
-                    }
-                }
-            ],
+            "choices": [{"message": {"role": "assistant", "content": response}}],
             "model": self.agent.args["model_name"],
-            "object": "chat.completion"
+            "object": "chat.completion",
         }
     async def acompletion_stream(self, messages: List[Dict[str, str]], **kwargs) -> Iterator[Dict[str, Any]]:
@@ -202,7 +205,7 @@ class LiteLLMAgentWrapper:
         formatted_messages = [
             {
                 "question": msg["content"] if msg["role"] == "user" else "",
-                "answer": msg["content"] if msg["role"] == "assistant" else ""
+                "answer": msg["content"] if msg["role"] == "assistant" else "",
             }
             for msg in messages
         ]
@@ -217,7 +220,7 @@ class LiteLLMAgentWrapper:
                     yield {
                         "choices": [{"delta": {"role": "assistant", "content": content}}],
                         "model": model_name,
-                        "object": "chat.completion.chunk"
+                        "object": "chat.completion.chunk",
                     }
                 # Allow async context switch
                 await asyncio.sleep(0)
@@ -230,7 +233,9 @@ class LiteLLMAgentWrapper:
         await self.agent.cleanup()
-def create_mcp_agent(agent_name: str, project_name: str, mcp_host: str = "127.0.0.1", mcp_port: int = 47337) -> LiteLLMAgentWrapper:
+def create_mcp_agent(
+    agent_name: str, project_name: str, mcp_host: str = "127.0.0.1", mcp_port: int = 47337
+) -> LiteLLMAgentWrapper:
     """Create an MCP agent and wrap it for LiteLLM compatibility"""
     from mindsdb.interfaces.agents.agents_controller import AgentsController
     from mindsdb.interfaces.storage import db
@@ -245,8 +250,11 @@ def create_mcp_agent(agent_name: str, project_name: str, mcp_host: str = "127.0.
     if agent_db is None:
         raise ValueError(f"Agent {agent_name} not found in project {project_name}")
-    # Create MCP agent
-    mcp_agent = MCPLangchainAgent(agent_db, mcp_host=mcp_host, mcp_port=mcp_port)
+    # Get merged parameters (defaults + agent params)
+    merged_params = agent_controller.get_agent_llm_params(agent_db.params)
+    # Create MCP agent with merged parameters
+    mcp_agent = MCPLangchainAgent(agent_db, params=merged_params, mcp_host=mcp_host, mcp_port=mcp_port)
     # Wrap for LiteLLM compatibility
     return LiteLLMAgentWrapper(mcp_agent)

mindsdb/interfaces/data_catalog/__init__.py ADDED Viewed

File without changes

mindsdb/interfaces/data_catalog/base_data_catalog.py ADDED Viewed

@@ -0,0 +1,54 @@
+from typing import List, Optional, Union
+from mindsdb.integrations.libs.api_handler import MetaAPIHandler
+from mindsdb.integrations.libs.base import MetaDatabaseHandler
+from mindsdb.utilities import log
+logger = log.getLogger("mindsdb")
+class BaseDataCatalog:
+    """
+    This is the base class for the Data Catalog interface.
+    """
+    def __init__(self, database_name: str, table_names: Optional[List[str]] = None) -> None:
+        """
+        Initialize the DataCatalogReader.
+        Args:
+            database_name (str): The data source to read/write metadata from.
+            table_names (Optional[List[str]]): The list of table names to read or write metadata for. If None, all tables will be read or written.
+        """
+        from mindsdb.api.executor.controllers.session_controller import (
+            SessionController,
+        )
+        session = SessionController()
+        self.database_name = database_name
+        self.data_handler: Union[MetaDatabaseHandler, MetaAPIHandler] = session.integration_controller.get_data_handler(
+            database_name
+        )
+        integration = session.integration_controller.get(database_name)
+        self.integration_id = integration["id"]
+        self.integration_engine = integration["engine"]
+        # TODO: Handle situations where a schema is provided along with the database name, e.g., 'schema.table'.
+        # TODO: Handle situations where a file path is provided with integrations like S3, e.g., 'dir/file.csv'.
+        self.table_names = table_names
+        self.logger = logger
+    def is_data_catalog_supported(self) -> bool:
+        """
+        Check if the data catalog is supported for the given database.
+        Returns:
+            bool: True if the data catalog is supported, False otherwise.
+        """
+        if not isinstance(self.data_handler, (MetaDatabaseHandler, MetaAPIHandler)):
+            self.logger.warning(f"Data catalog is not supported for the '{self.integration_engine}' integration'. ")
+            return False
+        return True

mindsdb/interfaces/data_catalog/data_catalog_loader.py ADDED Viewed

@@ -0,0 +1,359 @@
+from typing import List, Union
+import pandas as pd
+from mindsdb.integrations.libs.response import RESPONSE_TYPE
+from mindsdb.interfaces.data_catalog.base_data_catalog import BaseDataCatalog
+from mindsdb.interfaces.storage import db
+class DataCatalogLoader(BaseDataCatalog):
+    """
+    This class is responsible for loading the metadata from a data source (via the handler) and storing it in the data catalog.
+    """
+    def load_metadata(self) -> None:
+        """
+        Load the metadata from the handler and store it in the database.
+        """
+        if not self.is_data_catalog_supported():
+            return
+        loaded_table_names = self._get_loaded_table_names()
+        tables = self._load_table_metadata(loaded_table_names)
+        if tables:
+            columns = self._load_column_metadata(tables)
+            self._load_column_statistics(tables, columns)
+            self._load_primary_keys(tables, columns)
+            self._load_foreign_keys(tables, columns)
+        self.logger.info(f"Metadata loading completed for {self.database_name}.")
+    def _get_loaded_table_names(self) -> List[str]:
+        """
+        Retrieve the names of tables that are already present in the data catalog for the current integration.
+        If table_names are provided, only those tables will be checked.
+        Returns:
+            List[str]: Names of tables already loaded in the data catalog.
+        """
+        query = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id)
+        if self.table_names:
+            query = query.filter(db.MetaTables.name.in_(self.table_names))
+        tables = query.all()
+        table_names = [table.name for table in tables]
+        if table_names:
+            self.logger.info(f"Tables already loaded in the data catalog: {', '.join(table_names)}.")
+        return table_names
+    def _load_table_metadata(self, loaded_table_names: List[str] = None) -> List[Union[db.MetaTables, None]]:
+        """
+        Load the table metadata from the handler.
+        """
+        self.logger.info(f"Loading tables for {self.database_name}")
+        response = self.data_handler.meta_get_tables(self.table_names)
+        if response.resp_type != RESPONSE_TYPE.TABLE:
+            self.logger.error(f"Failed to load tables for {self.database_name}: {response.error_message}")
+            return []
+        df = response.data_frame
+        if df.empty:
+            self.logger.info(f"No tables to add for {self.database_name}.")
+            return []
+        # Filter out tables that are already loaded in the data catalog
+        if loaded_table_names:
+            df = df[~df["table_name"].isin(loaded_table_names)]
+        if df.empty:
+            self.logger.info(f"No new tables to load for {self.database_name}.")
+            return []
+        df.columns = df.columns.str.lower()
+        tables = self._add_table_metadata(df)
+        self.logger.info(f"Tables loaded for {self.database_name}.")
+        return tables
+    def _add_table_metadata(self, df: pd.DataFrame) -> List[db.MetaTables]:
+        """
+        Add the table metadata to the database.
+        """
+        tables = []
+        try:
+            for row in df.to_dict(orient="records"):
+                # Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
+                val = row.get("row_count")
+                row_count = int(val) if pd.notna(val) else None
+                record = db.MetaTables(
+                    integration_id=self.integration_id,
+                    name=row.get("table_name") or row.get("name"),
+                    schema=row.get("table_schema"),
+                    description=row.get("table_description"),
+                    type=row.get("table_type"),
+                    row_count=row_count,
+                )
+                tables.append(record)
+            db.session.add_all(tables)
+            db.session.commit()
+        except Exception as e:
+            self.logger.error(f"Failed to add tables: {e}")
+            db.session.rollback()
+            raise
+        return tables
+    def _load_column_metadata(self, tables: db.MetaTables) -> List[db.MetaColumns]:
+        """
+        Load the column metadata from the handler.
+        """
+        self.logger.info(f"Loading columns for {self.database_name}")
+        response = self.data_handler.meta_get_columns(self.table_names)
+        if response.resp_type != RESPONSE_TYPE.TABLE:
+            self.logger.error(f"Failed to load columns for {self.database_name}: {response.error_message}")
+            return []
+        df = response.data_frame
+        if df.empty:
+            self.logger.info(f"No columns to load for {self.database_name}.")
+            return []
+        df.columns = df.columns.str.lower()
+        columns = self._add_column_metadata(df, tables)
+        self.logger.info(f"Columns loaded for {self.database_name}.")
+        return columns
+    def _add_column_metadata(self, df: pd.DataFrame, tables: db.MetaTables) -> List[db.MetaColumns]:
+        """
+        Add the column metadata to the database.
+        """
+        columns = []
+        try:
+            for row in df.to_dict(orient="records"):
+                record = db.MetaColumns(
+                    table_id=next((table.id for table in tables if table.name == row.get("table_name"))),
+                    name=row.get("column_name"),
+                    data_type=row.get("data_type"),
+                    default_value=row.get("column_default"),
+                    description=row.get("description"),
+                    is_nullable=row.get("is_nullable"),
+                )
+                columns.append(record)
+            db.session.add_all(columns)
+            db.session.commit()
+        except Exception as e:
+            self.logger.error(f"Failed to add columns: {e}")
+            db.session.rollback()
+            raise
+        return columns
+    def _load_column_statistics(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Load the column statistics metadata from the handler.
+        """
+        self.logger.info(f"Loading column statistics for {self.database_name}")
+        response = self.data_handler.meta_get_column_statistics(self.table_names)
+        if response.resp_type != RESPONSE_TYPE.TABLE:
+            self.logger.error(f"Failed to load column statistics for {self.database_name}: {response.error_message}")
+            return
+        df = response.data_frame
+        if df.empty:
+            self.logger.info(f"No column statistics to load for {self.database_name}.")
+            return
+        df.columns = df.columns.str.lower()
+        self._add_column_statistics(df, tables, columns)
+        self.logger.info(f"Column statistics loaded for {self.database_name}.")
+    def _add_column_statistics(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Add the column statistics metadata to the database.
+        """
+        column_statistics = []
+        try:
+            for row in df.to_dict(orient="records"):
+                table_id = next((table.id for table in tables if table.name == row.get("table_name")))
+                column_id = next(
+                    (
+                        column.id
+                        for column in columns
+                        if column.name == row.get("column_name") and column.table_id == table_id
+                    )
+                )
+                # Convert the distinct_values_count to an integer if it is not NaN, otherwise set it to None.
+                val = row.get("distinct_values_count")
+                distinct_values_count = int(val) if pd.notna(val) else None
+                # Convert the most_common_frequencies to a list of strings.
+                most_common_frequencies = [str(val) for val in row.get("most_common_frequencies") or []]
+                record = db.MetaColumnStatistics(
+                    column_id=column_id,
+                    most_common_values=row.get("most_common_values"),
+                    most_common_frequencies=most_common_frequencies,
+                    null_percentage=row.get("null_percentage"),
+                    distinct_values_count=distinct_values_count,
+                    minimum_value=row.get("minimum_value"),
+                    maximum_value=row.get("maximum_value"),
+                )
+                column_statistics.append(record)
+            db.session.add_all(column_statistics)
+            db.session.commit()
+        except Exception as e:
+            self.logger.error(f"Failed to add column statistics: {e}")
+            db.session.rollback()
+            raise
+    def _load_primary_keys(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Load the primary keys metadata from the handler.
+        """
+        self.logger.info(f"Loading primary keys for {self.database_name}")
+        response = self.data_handler.meta_get_primary_keys(self.table_names)
+        if response.resp_type != RESPONSE_TYPE.TABLE:
+            self.logger.error(f"Failed to load primary keys for {self.database_name}: {response.error_message}")
+            return
+        df = response.data_frame
+        if df.empty:
+            self.logger.info(f"No primary keys to load for {self.database_name}.")
+            return
+        df.columns = df.columns.str.lower()
+        self._add_primary_keys(df, tables, columns)
+        self.logger.info(f"Primary keys loaded for {self.database_name}.")
+    def _add_primary_keys(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Add the primary keys metadata to the database.
+        """
+        primary_keys = []
+        try:
+            for row in df.to_dict(orient="records"):
+                table_id = next((table.id for table in tables if table.name == row.get("table_name")))
+                column_id = next(
+                    (
+                        column.id
+                        for column in columns
+                        if column.name == row.get("column_name") and column.table_id == table_id
+                    )
+                )
+                record = db.MetaPrimaryKeys(
+                    table_id=table_id,
+                    column_id=column_id,
+                    constraint_name=row.get("constraint_name"),
+                )
+                primary_keys.append(record)
+            db.session.add_all(primary_keys)
+            db.session.commit()
+        except Exception as e:
+            self.logger.error(f"Failed to add primary keys: {e}")
+            db.session.rollback()
+            raise
+    def _load_foreign_keys(self, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Load the foreign keys metadata from the handler.
+        """
+        self.logger.info(f"Loading foreign keys for {self.database_name}")
+        response = self.data_handler.meta_get_foreign_keys(self.table_names)
+        if response.resp_type != RESPONSE_TYPE.TABLE:
+            self.logger.error(f"Failed to foreign keys for {self.database_name}: {response.error_message}")
+            return
+        df = response.data_frame
+        if df.empty:
+            self.logger.info(f"No foreign keys to load for {self.database_name}.")
+            return
+        df.columns = df.columns.str.lower()
+        self._add_foreign_keys(df, tables, columns)
+        self.logger.info(f"Foreign keys loaded for {self.database_name}.")
+    def _add_foreign_keys(self, df: pd.DataFrame, tables: db.MetaTables, columns: db.MetaColumns) -> None:
+        """
+        Add the foreign keys metadata to the database.
+        """
+        foreign_keys = []
+        try:
+            for row in df.to_dict(orient="records"):
+                try:
+                    parent_table_id = next((table.id for table in tables if table.name == row.get("parent_table_name")))
+                    parent_column_id = next(
+                        (
+                            column.id
+                            for column in columns
+                            if column.name == row.get("parent_column_name") and column.table_id == parent_table_id
+                        )
+                    )
+                    child_table_id = next((table.id for table in tables if table.name == row.get("child_table_name")))
+                    child_column_id = next(
+                        (
+                            column.id
+                            for column in columns
+                            if column.name == row.get("child_column_name") and column.table_id == child_table_id
+                        )
+                    )
+                except StopIteration:
+                    self.logger.warning(
+                        f"The foreign key relationship for {row.get('parent_table_name')} -> {row.get('child_table_name')} "
+                        f"could not be established. One or more tables or columns may not exist in the metadata."
+                    )
+                    continue
+                record = db.MetaForeignKeys(
+                    parent_table_id=parent_table_id,
+                    parent_column_id=parent_column_id,
+                    child_table_id=child_table_id,
+                    child_column_id=child_column_id,
+                    constraint_name=row.get("constraint_name"),
+                )
+                foreign_keys.append(record)
+            db.session.add_all(foreign_keys)
+            db.session.commit()
+        except Exception as e:
+            self.logger.error(f"Failed to add foreign keys: {e}")
+            db.session.rollback()
+            raise
+    def unload_metadata(self) -> None:
+        """
+        Remove the metadata for the specified database from the data catalog.
+        """
+        if not self.is_data_catalog_supported():
+            return
+        meta_tables = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id).all()
+        if not meta_tables:
+            self.logger.info(f"No metadata found for {self.database_name}. Nothing to remove.")
+            return
+        for table in meta_tables:
+            db.session.query(db.MetaPrimaryKeys).filter_by(table_id=table.id).delete()
+            db.session.query(db.MetaForeignKeys).filter(
+                (db.MetaForeignKeys.parent_table_id == table.id) | (db.MetaForeignKeys.child_table_id == table.id)
+            ).delete()
+            meta_columns = db.session.query(db.MetaColumns).filter_by(table_id=table.id).all()
+            for col in meta_columns:
+                db.session.query(db.MetaColumnStatistics).filter_by(column_id=col.id).delete()
+                db.session.delete(col)
+            db.session.delete(table)
+        db.session.commit()
+        self.logger.info(f"Metadata for {self.database_name} removed successfully.")

MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.5.4.2py3-none-any.whl → 25.6.2.0py3-none-any.whl