PyPI - MindsDB - Versions diffs - 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl - Mend

MindsDB 25.5.4.2py3-none-any.whl → 25.6.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (76) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/agent.py +50 -26
mindsdb/api/a2a/common/server/server.py +32 -26
mindsdb/api/a2a/task_manager.py +68 -6
mindsdb/api/executor/command_executor.py +69 -14
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
mindsdb/api/executor/planner/plan_join.py +67 -77
mindsdb/api/executor/planner/query_planner.py +176 -155
mindsdb/api/executor/planner/steps.py +37 -12
mindsdb/api/executor/sql_query/result_set.py +45 -64
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
mindsdb/api/executor/utilities/sql.py +42 -48
mindsdb/api/http/namespaces/config.py +1 -1
mindsdb/api/http/namespaces/file.py +14 -23
mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
mindsdb/integrations/libs/api_handler.py +279 -57
mindsdb/integrations/libs/base.py +185 -30
mindsdb/integrations/utilities/files/file_reader.py +99 -73
mindsdb/integrations/utilities/handler_utils.py +23 -8
mindsdb/integrations/utilities/sql_utils.py +35 -40
mindsdb/interfaces/agents/agents_controller.py +226 -196
mindsdb/interfaces/agents/constants.py +8 -1
mindsdb/interfaces/agents/langchain_agent.py +42 -11
mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
mindsdb/interfaces/data_catalog/__init__.py +0 -0
mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
mindsdb/interfaces/database/database.py +81 -57
mindsdb/interfaces/database/integrations.py +222 -234
mindsdb/interfaces/database/log.py +72 -104
mindsdb/interfaces/database/projects.py +156 -193
mindsdb/interfaces/file/file_controller.py +21 -65
mindsdb/interfaces/knowledge_base/controller.py +66 -25
mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
mindsdb/interfaces/skills/skills_controller.py +31 -36
mindsdb/interfaces/skills/sql_agent.py +113 -86
mindsdb/interfaces/storage/db.py +242 -82
mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
mindsdb/utilities/config.py +13 -2
mindsdb/utilities/log.py +35 -26
mindsdb/utilities/ml_task_queue/task.py +19 -22
mindsdb/utilities/render/sqlalchemy_render.py +129 -181
mindsdb/utilities/starters.py +40 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/agents/constants.py CHANGED Viewed

@@ -15,7 +15,10 @@ OPEN_AI_CHAT_MODELS = (
     "gpt-4-32k",
     "gpt-4-1106-preview",
     "gpt-4-0125-preview",
+    "gpt-4.1",
+    "gpt-4.1-mini",
     "gpt-4o",
+    "o4-mini",
     "o3-mini",
     "o1-mini",
 )
@@ -216,8 +219,12 @@ You are an AI assistant powered by MindsDB. When answering questions, follow the
    - Finally use kb_query_tool to query the knowledge base for specific information
 2. For questions about database tables and their contents:
-   - Use the sql_tool to query the tables directly
+   - Use the sql_db_query to query the tables directly
    - You can join tables if needed to get comprehensive information
+   - You are running on a federated query engine, so joins across multiple databases are allowed and supported
+   - **Important Rule for SQL Queries:** If you formulate an SQL query as part of answering a user's question, you *must* then use the `sql_db_query` tool to execute that query and get its results. The SQL query string itself is NOT the final answer to the user unless the user has specifically asked for the query. Your final AI response should be based on the *results* obtained from executing the query.
 For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
 """

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -226,7 +226,7 @@ def process_chunk(chunk):
 class LangchainAgent:
-    def __init__(self, agent: db.Agents, model: dict = None):
+    def __init__(self, agent: db.Agents, model: dict = None, params: dict = None):
         self.agent = agent
         self.model = model
@@ -239,16 +239,35 @@ class LangchainAgent:
         self.mdb_langfuse_callback_handler: Optional[object] = None  # custom (see langfuse_callback_handler.py)
         self.langfuse_client_wrapper = LangfuseClientWrapper()
-        self.args = self._initialize_args()
+        self.args = self._initialize_args(params)
         # Back compatibility for old models
         self.provider = self.args.get("provider", get_llm_provider(self.args))
-    def _initialize_args(self) -> dict:
-        """Initialize the arguments based on the agent's parameters."""
-        args = self.agent.params.copy()
-        args["model_name"] = self.agent.model_name
-        args["provider"] = self.agent.provider
+    def _initialize_args(self, params: dict = None) -> dict:
+        """
+        Initialize the arguments for agent execution.
+        Takes the parameters passed during execution and sets necessary defaults.
+        The params are already merged with defaults by AgentsController.get_agent_llm_params.
+        Args:
+            params: Parameters for agent execution (already merged with defaults)
+        Returns:
+            dict: Final parameters for agent execution
+        """
+        # Use the parameters passed to the method (already merged with defaults by AgentsController)
+        # No fallback needed as AgentsController.get_agent_llm_params already handles this
+        args = params.copy() if params else {}
+        # Set model name and provider if given in create agent otherwise use global llm defaults
+        # AgentsController.get_agent_llm_params
+        if self.agent.model_name is not None:
+            args["model_name"] = self.agent.model_name
+        if self.agent.provider is not None:
+            args["provider"] = self.agent.provider
         args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
         # agent is using current langchain model
@@ -261,11 +280,20 @@ class LangchainAgent:
                 # only update prompt_template if it is set on the model
                 args["prompt_template"] = prompt_template
+        # Set default prompt template if not provided
         if args.get("prompt_template") is None:
+            # Default prompt template depends on agent mode
             if args.get("mode") == "retrieval":
                 args["prompt_template"] = DEFAULT_RAG_PROMPT_TEMPLATE
+                logger.info(f"Using default retrieval prompt template: {DEFAULT_RAG_PROMPT_TEMPLATE[:50]}...")
             else:
-                raise ValueError("Please provide a `prompt_template` or set `mode=retrieval`")
+                # Set a default prompt template for non-retrieval mode
+                default_prompt = "you are an assistant, answer using the tables connected"
+                args["prompt_template"] = default_prompt
+                logger.info(f"Using default prompt template: {default_prompt}")
+        if "prompt_template" in args:
+            logger.info(f"Using prompt template: {args['prompt_template'][:50]}...")
         return args
@@ -318,7 +346,7 @@ class LangchainAgent:
         self.provider = args.get("provider", get_llm_provider(args))
         df = df.reset_index(drop=True)
-        agent = self.create_agent(df, args)
+        agent = self.create_agent(df)
         # Use last message as prompt, remove other questions.
         user_column = args.get("user_column", USER_COLUMN)
         df.iloc[:-1, df.columns.get_loc(user_column)] = None
@@ -348,14 +376,17 @@ class LangchainAgent:
         self.provider = args.get("provider", get_llm_provider(args))
         df = df.reset_index(drop=True)
-        agent = self.create_agent(df, args)
+        agent = self.create_agent(df)
         # Use last message as prompt, remove other questions.
         user_column = args.get("user_column", USER_COLUMN)
         df.iloc[:-1, df.columns.get_loc(user_column)] = None
         return self.stream_agent(df, agent, args)
-    def create_agent(self, df: pd.DataFrame, args: Dict = None) -> AgentExecutor:
+    def create_agent(self, df: pd.DataFrame) -> AgentExecutor:
         # Set up tools.
+        args = self.args
         llm = create_chat_model(args)
         self.llm = llm

mindsdb/interfaces/agents/mcp_client_agent.py CHANGED Viewed

@@ -63,11 +63,19 @@ class MCPQueryTool(BaseTool):
         return loop.run_until_complete(self._arun(query))
+# todo move instantiation to agent controller
 class MCPLangchainAgent(LangchainAgent):
     """Extension of LangchainAgent that delegates to MCP server"""
-    def __init__(self, agent: db.Agents, model: dict = None, mcp_host: str = "127.0.0.1", mcp_port: int = 47337):
-        super().__init__(agent, model)
+    def __init__(
+        self,
+        agent: db.Agents,
+        model: dict = None,
+        params: dict = None,
+        mcp_host: str = "127.0.0.1",
+        mcp_port: int = 47337,
+    ):
+        super().__init__(agent, model, params)
         self.mcp_host = mcp_host
         self.mcp_port = mcp_port
         self.exit_stack = AsyncExitStack()
@@ -85,7 +93,7 @@ class MCPLangchainAgent(LangchainAgent):
                 server_params = StdioServerParameters(
                     command="python",
                     args=["-m", "mindsdb", "--api=mcp"],
-                    env={"MCP_HOST": self.mcp_host, "MCP_PORT": str(self.mcp_port)}
+                    env={"MCP_HOST": self.mcp_host, "MCP_PORT": str(self.mcp_port)},
                 )
                 logger.info(f"Connecting to MCP server at {self.mcp_host}:{self.mcp_port}")
@@ -99,7 +107,9 @@ class MCPLangchainAgent(LangchainAgent):
                 # Test the connection by listing tools
                 tools_response = await self.session.list_tools()
-                logger.info(f"Successfully connected to MCP server. Available tools: {[tool.name for tool in tools_response.tools]}")
+                logger.info(
+                    f"Successfully connected to MCP server. Available tools: {[tool.name for tool in tools_response.tools]}"
+                )
             except Exception as e:
                 logger.error(f"Failed to connect to MCP server: {str(e)}")
@@ -141,7 +151,7 @@ class MCPLangchainAgent(LangchainAgent):
         response = super().get_completion(messages, stream)
         # Ensure response is a string (not a DataFrame)
-        if hasattr(response, 'to_string'):  # It's a DataFrame
+        if hasattr(response, "to_string"):  # It's a DataFrame
             return response.to_string()
         return response
@@ -167,7 +177,7 @@ class LiteLLMAgentWrapper:
         formatted_messages = [
             {
                 "question": msg["content"] if msg["role"] == "user" else "",
-                "answer": msg["content"] if msg["role"] == "assistant" else ""
+                "answer": msg["content"] if msg["role"] == "assistant" else "",
             }
             for msg in messages
         ]
@@ -177,23 +187,16 @@ class LiteLLMAgentWrapper:
         # Ensure response is a string
         if not isinstance(response, str):
-            if hasattr(response, 'to_string'):  # It's a DataFrame
+            if hasattr(response, "to_string"):  # It's a DataFrame
                 response = response.to_string()
             else:
                 response = str(response)
         # Format response in LiteLLM expected format
         return {
-            "choices": [
-                {
-                    "message": {
-                        "role": "assistant",
-                        "content": response
-                    }
-                }
-            ],
+            "choices": [{"message": {"role": "assistant", "content": response}}],
             "model": self.agent.args["model_name"],
-            "object": "chat.completion"
+            "object": "chat.completion",
         }
     async def acompletion_stream(self, messages: List[Dict[str, str]], **kwargs) -> Iterator[Dict[str, Any]]:
@@ -202,7 +205,7 @@ class LiteLLMAgentWrapper:
         formatted_messages = [
             {
                 "question": msg["content"] if msg["role"] == "user" else "",
-                "answer": msg["content"] if msg["role"] == "assistant" else ""
+                "answer": msg["content"] if msg["role"] == "assistant" else "",
             }
             for msg in messages
         ]
@@ -217,7 +220,7 @@ class LiteLLMAgentWrapper:
                     yield {
                         "choices": [{"delta": {"role": "assistant", "content": content}}],
                         "model": model_name,
-                        "object": "chat.completion.chunk"
+                        "object": "chat.completion.chunk",
                     }
                 # Allow async context switch
                 await asyncio.sleep(0)
@@ -230,7 +233,9 @@ class LiteLLMAgentWrapper:
         await self.agent.cleanup()
-def create_mcp_agent(agent_name: str, project_name: str, mcp_host: str = "127.0.0.1", mcp_port: int = 47337) -> LiteLLMAgentWrapper:
+def create_mcp_agent(
+    agent_name: str, project_name: str, mcp_host: str = "127.0.0.1", mcp_port: int = 47337
+) -> LiteLLMAgentWrapper:
     """Create an MCP agent and wrap it for LiteLLM compatibility"""
     from mindsdb.interfaces.agents.agents_controller import AgentsController
     from mindsdb.interfaces.storage import db
@@ -245,8 +250,11 @@ def create_mcp_agent(agent_name: str, project_name: str, mcp_host: str = "127.0.
     if agent_db is None:
         raise ValueError(f"Agent {agent_name} not found in project {project_name}")
-    # Create MCP agent
-    mcp_agent = MCPLangchainAgent(agent_db, mcp_host=mcp_host, mcp_port=mcp_port)
+    # Get merged parameters (defaults + agent params)
+    merged_params = agent_controller.get_agent_llm_params(agent_db.params)
+    # Create MCP agent with merged parameters
+    mcp_agent = MCPLangchainAgent(agent_db, params=merged_params, mcp_host=mcp_host, mcp_port=mcp_port)
     # Wrap for LiteLLM compatibility
     return LiteLLMAgentWrapper(mcp_agent)

mindsdb/interfaces/agents/mindsdb_database_agent.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """
-    Wrapper around MindsDB's executor and integration controller following the implementation of the original
-    langchain.sql_database.SQLDatabase class to partly replicate its behavior.
+Wrapper around MindsDB's executor and integration controller following the implementation of the original
+langchain.sql_database.SQLDatabase class to partly replicate its behavior.
 """
 import traceback
 from typing import Any, Iterable, List, Optional
@@ -13,26 +14,25 @@ logger = log.getLogger(__name__)
 def extract_essential(input: str) -> str:
-    """ Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
-        'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
+    """Sometimes LLM include to input unnecessary data. We can't control stochastic nature of LLM, so we need to
+    'clean' input somehow. LLM prompt contains instruction to enclose input between '$START$' and '$STOP$'.
     """
-    if '$START$' in input:
-        input = input.partition('$START$')[-1]
-    if '$STOP$' in input:
-        input = input.partition('$STOP$')[0]
-    return input.strip(' ')
+    if "$START$" in input:
+        input = input.partition("$START$")[-1]
+    if "$STOP$" in input:
+        input = input.partition("$STOP$")[0]
+    return input.strip(" ")
 class MindsDBSQL(SQLDatabase):
     @staticmethod
-    def custom_init(
-        sql_agent: 'SQLAgent'
-    ) -> 'MindsDBSQL':
+    def custom_init(sql_agent: "SQLAgent") -> "MindsDBSQL":
         instance = MindsDBSQL()
         instance._sql_agent = sql_agent
         return instance
     """ Can't modify signature, as LangChain does a Pydantic check."""
     def __init__(
         self,
         engine: Optional[Any] = None,
@@ -51,7 +51,7 @@ class MindsDBSQL(SQLDatabase):
     @property
     def dialect(self) -> str:
-        return 'mindsdb'
+        return "mindsdb"
     @property
     def table_info(self) -> str:
@@ -93,23 +93,26 @@ class MindsDBSQL(SQLDatabase):
         command = extract_essential(command)
         try:
             # Log the query for debugging
             logger.info(f"Executing SQL query: {command}")
+            # Removing backticks causes in query execution.
             # remove backticks
-            command = command.replace('`', '')
+            # command = command.replace('`', '')
             # Parse the SQL string to an AST object first
             from mindsdb_sql_parser import parse_sql
             ast_query = parse_sql(command)
             # Now execute the parsed query
-            result = self._sql_agent.skill_tool.get_command_executor().execute_command(ast_query, database_name="mindsdb")
+            result = self._sql_agent.skill_tool.get_command_executor().execute_command(
+                ast_query, database_name="mindsdb"
+            )
             # Convert ExecuteAnswer to a DataFrame for easier manipulation
             df = None
-            if hasattr(result, 'data') and hasattr(result.data, 'data_frame'):
+            if hasattr(result, "data") and hasattr(result.data, "data_frame"):
                 df = result.data.data_frame
             else:
                 # Fallback to to_df when data_frame attr not available
@@ -130,7 +133,9 @@ class MindsDBSQL(SQLDatabase):
         except Exception as e:
             logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")
             # If this is a knowledge base query, provide a more helpful error message
-            if "knowledge_base" in command.lower() or any(kb in command for kb in self._sql_agent.get_usable_knowledge_base_names()):
+            if "knowledge_base" in command.lower() or any(
+                kb in command for kb in self._sql_agent.get_usable_knowledge_base_names()
+            ):
                 return f"Error executing knowledge base query: {str(e)}. Please check that the knowledge base exists and your query syntax is correct."
             return f"Error: {str(e)}"

mindsdb/interfaces/data_catalog/__init__.py ADDED Viewed

File without changes

mindsdb/interfaces/data_catalog/base_data_catalog.py ADDED Viewed

@@ -0,0 +1,54 @@
+from typing import List, Optional, Union
+from mindsdb.integrations.libs.api_handler import MetaAPIHandler
+from mindsdb.integrations.libs.base import MetaDatabaseHandler
+from mindsdb.utilities import log
+logger = log.getLogger("mindsdb")
+class BaseDataCatalog:
+    """
+    This is the base class for the Data Catalog interface.
+    """
+    def __init__(self, database_name: str, table_names: Optional[List[str]] = None) -> None:
+        """
+        Initialize the DataCatalogReader.
+        Args:
+            database_name (str): The data source to read/write metadata from.
+            table_names (Optional[List[str]]): The list of table names to read or write metadata for. If None, all tables will be read or written.
+        """
+        from mindsdb.api.executor.controllers.session_controller import (
+            SessionController,
+        )
+        session = SessionController()
+        self.database_name = database_name
+        self.data_handler: Union[MetaDatabaseHandler, MetaAPIHandler] = session.integration_controller.get_data_handler(
+            database_name
+        )
+        integration = session.integration_controller.get(database_name)
+        self.integration_id = integration["id"]
+        self.integration_engine = integration["engine"]
+        # TODO: Handle situations where a schema is provided along with the database name, e.g., 'schema.table'.
+        # TODO: Handle situations where a file path is provided with integrations like S3, e.g., 'dir/file.csv'.
+        self.table_names = table_names
+        self.logger = logger
+    def is_data_catalog_supported(self) -> bool:
+        """
+        Check if the data catalog is supported for the given database.
+        Returns:
+            bool: True if the data catalog is supported, False otherwise.
+        """
+        if not isinstance(self.data_handler, (MetaDatabaseHandler, MetaAPIHandler)):
+            self.logger.warning(f"Data catalog is not supported for the '{self.integration_engine}' integration'. ")
+            return False
+        return True

MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.5.4.2py3-none-any.whl → 25.6.3.0py3-none-any.whl