PyPI - MindsDB - Versions diffs - 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl - Mend

MindsDB 25.5.4.1py3-none-any.whl → 25.6.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (70) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/agent.py +28 -25
mindsdb/api/a2a/common/server/server.py +32 -26
mindsdb/api/a2a/run_a2a.py +1 -1
mindsdb/api/executor/command_executor.py +69 -14
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
mindsdb/api/executor/planner/plan_join.py +67 -77
mindsdb/api/executor/planner/query_planner.py +176 -155
mindsdb/api/executor/planner/steps.py +37 -12
mindsdb/api/executor/sql_query/result_set.py +45 -64
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
mindsdb/api/executor/utilities/sql.py +42 -48
mindsdb/api/http/namespaces/config.py +1 -1
mindsdb/api/http/namespaces/file.py +14 -23
mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
mindsdb/integrations/libs/api_handler.py +261 -57
mindsdb/integrations/libs/base.py +100 -29
mindsdb/integrations/utilities/files/file_reader.py +99 -73
mindsdb/integrations/utilities/handler_utils.py +23 -8
mindsdb/integrations/utilities/sql_utils.py +35 -40
mindsdb/interfaces/agents/agents_controller.py +196 -192
mindsdb/interfaces/agents/constants.py +7 -1
mindsdb/interfaces/agents/langchain_agent.py +42 -11
mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
mindsdb/interfaces/data_catalog/__init__.py +0 -0
mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
mindsdb/interfaces/database/database.py +81 -57
mindsdb/interfaces/database/integrations.py +220 -234
mindsdb/interfaces/database/log.py +72 -104
mindsdb/interfaces/database/projects.py +156 -193
mindsdb/interfaces/file/file_controller.py +21 -65
mindsdb/interfaces/knowledge_base/controller.py +63 -10
mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
mindsdb/interfaces/skills/skills_controller.py +54 -36
mindsdb/interfaces/skills/sql_agent.py +109 -86
mindsdb/interfaces/storage/db.py +223 -79
mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
mindsdb/utilities/config.py +9 -2
mindsdb/utilities/log.py +35 -26
mindsdb/utilities/ml_task_queue/task.py +19 -22
mindsdb/utilities/render/sqlalchemy_render.py +129 -181
mindsdb/utilities/starters.py +49 -1
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0

mindsdb/__about__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 __title__ = "MindsDB"
 __package_name__ = "mindsdb"
-__version__ = "25.5.4.1"
+__version__ = "25.6.2.0"
 __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
 __email__ = "jorge@mindsdb.com"
 __author__ = "MindsDB Inc"

mindsdb/api/a2a/agent.py CHANGED Viewed

@@ -28,9 +28,7 @@ class MindsDBAgent:
         self.host = host
         self.port = port
         self.base_url = f"http://{host}:{port}"
-        self.agent_url = (
-            f"{self.base_url}/api/projects/{project_name}/agents/{agent_name}"
-        )
+        self.agent_url = f"{self.base_url}/api/projects/{project_name}/agents/{agent_name}"
         self.sql_url = f"{self.base_url}/api/sql/query"
         logger.info(f"Initialized MindsDB agent connector to {self.base_url}")
@@ -65,9 +63,7 @@ class MindsDBAgent:
                 for column in ["response", "result", "answer", "completion", "output"]:
                     if column in result_row:
                         content = result_row[column]
-                        logger.info(
-                            f"Found result in column '{column}': {content[:100]}..."
-                        )
+                        logger.info(f"Found result in column '{column}': {content[:100]}...")
                         return {
                             "content": content,
                             "parts": [{"type": "text", "text": content}],
@@ -122,9 +118,7 @@ class MindsDBAgent:
                 "parts": [{"type": "text", "text": error_msg}],
             }
-    def streaming_invoke(
-        self, messages: List[dict], timeout: int = DEFAULT_STREAM_TIMEOUT
-    ) -> Iterator[Dict[str, Any]]:
+    def streaming_invoke(self, messages: List[dict], timeout: int = DEFAULT_STREAM_TIMEOUT) -> Iterator[Dict[str, Any]]:
         """Stream responses from the MindsDB agent using the direct API endpoint.
         Args:
@@ -140,15 +134,11 @@ class MindsDBAgent:
             url = f"{self.base_url}/api/projects/{self.project_name}/agents/{self.agent_name}/completions/stream"
             # Log request for debugging
-            logger.info(
-                f"Sending streaming request to MindsDB agent: {self.agent_name}"
-            )
+            logger.info(f"Sending streaming request to MindsDB agent: {self.agent_name}")
             logger.debug(f"Request messages: {json.dumps(messages)[:200]}...")
             # Send the request to MindsDB streaming API with timeout
-            stream = requests.post(
-                url, json={"messages": messages}, stream=True, timeout=timeout
-            )
+            stream = requests.post(url, json={"messages": messages}, stream=True, timeout=timeout)
             stream.raise_for_status()
             # Process the streaming response directly
@@ -165,9 +155,7 @@ class MindsDBAgent:
                                 # Pass through the chunk with minimal modifications
                                 yield chunk
                             except json.JSONDecodeError as e:
-                                logger.warning(
-                                    f"Failed to parse JSON from line: {data}. Error: {str(e)}"
-                                )
+                                logger.warning(f"Failed to parse JSON from line: {data}. Error: {str(e)}")
                                 # Yield error information but continue processing
                                 yield {
                                     "error": f"JSON parse error: {str(e)}",
@@ -186,9 +174,7 @@ class MindsDBAgent:
                             logger.debug(f"Received non-data line: {line}")
                             # If it looks like a raw text response (not SSE format), wrap it
-                            if not line.startswith("event:") and not line.startswith(
-                                ":"
-                            ):
+                            if not line.startswith("event:") and not line.startswith(":"):
                                 yield {"content": line, "is_task_complete": False}
                     except UnicodeDecodeError as e:
                         logger.warning(f"Failed to decode line: {str(e)}")
@@ -252,13 +238,30 @@ class MindsDBAgent:
         # Send a final completion message
         yield {"is_task_complete": True, "metadata": {"complete": True}}
-    async def stream(self, query, session_id) -> AsyncIterable[Dict[str, Any]]:
-        """Stream responses from the MindsDB agent (uses streaming API endpoint)."""
+    async def stream(
+        self,
+        query: str,
+        session_id: str,
+        history: List[dict] | None = None,
+    ) -> AsyncIterable[Dict[str, Any]]:
+        """Stream responses from the MindsDB agent (uses streaming API endpoint).
+        Args:
+            query: The current query to send to the agent.
+            session_id: Unique identifier for the conversation session.
+            history: Optional list of previous messages in the conversation.
+        Returns:
+            AsyncIterable yielding chunks of the streaming response.
+        """
         try:
             logger.info(f"Using streaming API for query: {query[:100]}...")
-            # Format the query into the message structure expected by streaming_invoke
-            messages = [{"question": query, "answer": None}]
+            # Start with history if provided, otherwise empty list
+            messages = history or []
+            # Add the current query to the messages
+            messages.append({"question": query, "answer": None})
             # Use the streaming_invoke method to get real streaming responses
             streaming_response = self.streaming_invoke(messages)

mindsdb/api/a2a/common/server/server.py CHANGED Viewed

@@ -20,7 +20,8 @@ from ...common.types import (
 )
 from pydantic import ValidationError
 import json
-from typing import AsyncIterable, Any
+import time
+from typing import AsyncIterable, Any, Dict
 from ...common.server.task_manager import TaskManager
 import logging
@@ -44,9 +45,9 @@ class A2AServer:
         self.agent_card = agent_card
         self.app = Starlette()
         self.app.add_route(self.endpoint, self._process_request, methods=["POST"])
-        self.app.add_route(
-            "/.well-known/agent.json", self._get_agent_card, methods=["GET"]
-        )
+        self.app.add_route("/.well-known/agent.json", self._get_agent_card, methods=["GET"])
+        # Add status endpoint
+        self.app.add_route("/status", self._get_status, methods=["GET"])
         # TODO: Remove this when we have a proper CORS policy
         self.app.add_middleware(
             CORSMiddleware,
@@ -55,6 +56,7 @@ class A2AServer:
             allow_methods=["*"],
             allow_headers=["*"],
         )
+        self.start_time = time.time()
     def start(self):
         if self.agent_card is None:
@@ -66,18 +68,30 @@ class A2AServer:
         import uvicorn
         # Configure uvicorn with optimized settings for streaming
-        uvicorn.run(
-            self.app,
-            host=self.host,
-            port=self.port,
-            http="h11",
-            timeout_keep_alive=65,
-            log_level="info"
-        )
+        uvicorn.run(self.app, host=self.host, port=self.port, http="h11", timeout_keep_alive=65, log_level="info")
     def _get_agent_card(self, request: Request) -> JSONResponse:
         return JSONResponse(self.agent_card.model_dump(exclude_none=True))
+    def _get_status(self, request: Request) -> JSONResponse:
+        """
+        Status endpoint that returns basic server information.
+        This endpoint can be used by the frontend to check if the A2A server is running.
+        """
+        uptime_seconds = time.time() - self.start_time
+        status_info: Dict[str, Any] = {
+            "status": "ok",
+            "service": "mindsdb-a2a",
+            "uptime_seconds": round(uptime_seconds, 2),
+            "host": self.host,
+            "port": self.port,
+            "agent_name": self.agent_card.name if self.agent_card else None,
+            "version": self.agent_card.version if self.agent_card else "unknown",
+        }
+        return JSONResponse(status_info)
     async def _process_request(self, request: Request):
         try:
             body = await request.json()
@@ -89,23 +103,15 @@ class A2AServer:
                 result = await self.task_manager.on_send_task(json_rpc_request)
             elif isinstance(json_rpc_request, SendTaskStreamingRequest):
                 # Don't await the async generator, just pass it to _create_response
-                result = self.task_manager.on_send_task_subscribe(
-                    json_rpc_request
-                )
+                result = self.task_manager.on_send_task_subscribe(json_rpc_request)
             elif isinstance(json_rpc_request, CancelTaskRequest):
                 result = await self.task_manager.on_cancel_task(json_rpc_request)
             elif isinstance(json_rpc_request, SetTaskPushNotificationRequest):
-                result = await self.task_manager.on_set_task_push_notification(
-                    json_rpc_request
-                )
+                result = await self.task_manager.on_set_task_push_notification(json_rpc_request)
             elif isinstance(json_rpc_request, GetTaskPushNotificationRequest):
-                result = await self.task_manager.on_get_task_push_notification(
-                    json_rpc_request
-                )
+                result = await self.task_manager.on_get_task_push_notification(json_rpc_request)
             elif isinstance(json_rpc_request, TaskResubscriptionRequest):
-                result = await self.task_manager.on_resubscribe_to_task(
-                    json_rpc_request
-                )
+                result = await self.task_manager.on_resubscribe_to_task(json_rpc_request)
             else:
                 logger.warning(f"Unexpected request type: {type(json_rpc_request)}")
                 raise ValueError(f"Unexpected request type: {type(request)}")
@@ -152,10 +158,10 @@ class A2AServer:
                     "X-Accel-Buffering": "no",
                     "Connection": "keep-alive",
                     "Content-Type": "text/event-stream",
-                    "Transfer-Encoding": "chunked"
+                    "Transfer-Encoding": "chunked",
                 },
                 # Explicitly set media_type
-                media_type="text/event-stream"
+                media_type="text/event-stream",
             )
         elif isinstance(result, JSONRPCResponse):
             return JSONResponse(result.model_dump(exclude_none=True))

mindsdb/api/a2a/run_a2a.py CHANGED Viewed

@@ -57,7 +57,7 @@ def main(config_override: Optional[Dict[str, Any]] = None, *args, **kwargs):
         logger.info("Successfully imported a2a module")
         # Get configuration from config system or use provided override
-        a2a_config = config_override if config_override is not None else config.get("a2a", {})
+        a2a_config = config_override if config_override is not None else config.get("api", {}).get("a2a", {})
         # Set log level if specified
         if a2a_config.get("log_level"):

mindsdb/api/executor/command_executor.py CHANGED Viewed

@@ -6,6 +6,7 @@ from functools import reduce
 import pandas as pd
 from mindsdb_sql_parser import parse_sql
+from mindsdb_sql_parser.ast.mindsdb import AlterDatabase
 from mindsdb_sql_parser.ast import (
     Alter,
     ASTNode,
@@ -39,6 +40,7 @@ from mindsdb_sql_parser.ast import (
 # typed models
 from mindsdb_sql_parser.ast.mindsdb import (
+    AlterView,
     CreateAgent,
     CreateAnomalyDetectionModel,
     CreateChatBot,
@@ -51,6 +53,7 @@ from mindsdb_sql_parser.ast.mindsdb import (
     CreateTrigger,
     CreateView,
     CreateKnowledgeBaseIndex,
+    EvaluateKnowledgeBase,
     DropAgent,
     DropChatBot,
     DropDatasource,
@@ -189,6 +192,8 @@ class ExecuteCommands:
             return self.answer_drop_tables(statement, database_name)
         elif statement_type is DropDatasource or statement_type is DropDatabase:
             return self.answer_drop_database(statement)
+        elif statement_type is AlterDatabase:
+            return self.answer_alter_database(statement)
         elif statement_type is Describe:
             # NOTE in sql 'describe table' is same as 'show columns'
             obj_type = statement.type
@@ -551,7 +556,9 @@ class ExecuteCommands:
         ):
             return self.answer_create_predictor(statement, database_name)
         elif statement_type is CreateView:
-            return self.answer_create_view(statement, database_name)
+            return self.answer_create_or_alter_view(statement, database_name)
+        elif statement_type is AlterView:
+            return self.answer_create_or_alter_view(statement, database_name)
         elif statement_type is DropView:
             return self.answer_drop_view(statement, database_name)
         elif statement_type is Delete:
@@ -618,6 +625,8 @@ class ExecuteCommands:
             return self.answer_evaluate_metric(statement, database_name)
         elif statement_type is CreateKnowledgeBaseIndex:
             return self.answer_create_kb_index(statement, database_name)
+        elif statement_type is EvaluateKnowledgeBase:
+            return self.answer_evaluate_kb(statement, database_name)
         else:
             logger.warning(f"Unknown SQL statement: {sql}")
             raise NotSupportedYet(f"Unknown SQL statement: {sql}")
@@ -906,6 +915,14 @@ class ExecuteCommands:
         self.session.kb_controller.create_index(table_name=table_name, project_name=project_name)
         return ExecuteAnswer()
+    def answer_evaluate_kb(self, statement: EvaluateKnowledgeBase, database_name):
+        table_name = statement.name.parts[-1]
+        project_name = statement.name.parts[0] if len(statement.name.parts) > 1 else database_name
+        scores = self.session.kb_controller.evaluate(
+            table_name=table_name, project_name=project_name, params=statement.params
+        )
+        return ExecuteAnswer(data=ResultSet.from_df(scores))
     def _get_model_info(self, identifier, except_absent=True, database_name=None):
         if len(identifier.parts) == 1:
             identifier.parts = [database_name, identifier.parts[0]]
@@ -1181,6 +1198,13 @@ class ExecuteCommands:
                 raise
         return ExecuteAnswer()
+    def answer_alter_database(self, statement):
+        if len(statement.name.parts) != 1:
+            raise Exception("Database name should contain only 1 part.")
+        db_name = statement.name.parts[0]
+        self.session.database_controller.update(db_name, data=statement.params)
+        return ExecuteAnswer()
     def answer_drop_tables(self, statement, database_name):
         """answer on 'drop table [if exists] {name}'
         Args:
@@ -1214,17 +1238,35 @@ class ExecuteCommands:
         return ExecuteAnswer()
-    def answer_create_view(self, statement, database_name):
+    def answer_create_or_alter_view(self, statement: ASTNode, database_name: str) -> ExecuteAnswer:
+        """Process CREATE and ALTER VIEW commands
+        Args:
+            statement (ASTNode): data for creating or altering view
+            database_name (str): name of the current database
+        Returns:
+            ExecuteAnswer: answer for the command
+        """
         project_name = database_name
-        # TEMP
-        if isinstance(statement.name, Identifier):
+        if isinstance(statement.name, str):
+            parts = statement.name.split(".")
+        elif isinstance(statement.name, Identifier):
             parts = statement.name.parts
         else:
-            parts = statement.name.split(".")
+            raise ValueError(f"Unknown type of view name: {statement.name}")
-        view_name = parts[-1]
-        if len(parts) == 2:
-            project_name = parts[0]
+        match parts:
+            case [project_name, view_name]:
+                pass
+            case [view_name]:
+                pass
+            case _:
+                raise ValueError(
+                    'View name should be in the form "project_name.view_name" '
+                    f'or "view_name", got {statement.name.parts}'
+                )
         query_str = statement.query_str
@@ -1233,7 +1275,7 @@ class ExecuteCommands:
                 targets=[Star()],
                 from_table=NativeQuery(integration=statement.from_table, query=statement.query_str),
             )
-            query_str = str(query)
+            query_str = query.to_string()
         else:
             query = parse_sql(query_str)
@@ -1248,11 +1290,21 @@ class ExecuteCommands:
                 query_context_controller.release_context(query_context_controller.IGNORE_CONTEXT)
         project = self.session.database_controller.get_project(project_name)
-        try:
-            project.create_view(view_name, query=query_str)
-        except EntityExistsError:
-            if getattr(statement, "if_not_exists", False) is False:
-                raise
+        if isinstance(statement, CreateView):
+            try:
+                project.create_view(view_name, query=query_str)
+            except EntityExistsError:
+                if getattr(statement, "if_not_exists", False) is False:
+                    raise
+        elif isinstance(statement, AlterView):
+            try:
+                project.update_view(view_name, query=query_str)
+            except EntityNotExistsError:
+                raise ExecutorException(f"View {view_name} does not exist in {project_name}")
+        else:
+            raise ValueError(f"Unknown view DDL statement: {statement}")
         return ExecuteAnswer()
     def answer_drop_view(self, statement, database_name):
@@ -1467,6 +1519,9 @@ class ExecuteCommands:
         is_full=False,
         database_name=None,
     ):
+        if isinstance(target, Identifier) is False:
+            raise TableNotExistError("The table name is required for the query.")
         if len(target.parts) > 1:
             db = target.parts[0]
         elif isinstance(database_name, str) and len(database_name) > 0:

mindsdb/api/executor/datahub/datanodes/integration_datanode.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import time
 import inspect
 from dataclasses import astuple
-from typing import Iterable
+from typing import Iterable, List
 import numpy as np
 import pandas as pd
-from sqlalchemy.types import (
-    Integer, Float
-)
+from sqlalchemy.types import Integer, Float
 from mindsdb_sql_parser.ast.base import ASTNode
 from mindsdb_sql_parser.ast import Insert, Identifier, CreateTable, TableColumn, DropTables
@@ -32,7 +30,7 @@ class DBHandlerException(Exception):
 class IntegrationDataNode(DataNode):
-    type = 'integration'
+    type = "integration"
     def __init__(self, integration_name, ds_type, integration_controller):
         self.integration_name = integration_name
@@ -46,15 +44,17 @@ class IntegrationDataNode(DataNode):
     def get_tables(self):
         response = self.integration_handler.get_tables()
         if response.type == RESPONSE_TYPE.TABLE:
-            result_dict = response.data_frame.to_dict(orient='records')
+            result_dict = response.data_frame.to_dict(orient="records")
             result = []
             for row in result_dict:
                 result.append(TablesRow.from_dict(row))
             return result
         else:
             raise Exception(f"Can't get tables: {response.error_message}")
+        result_dict = response.data_frame.to_dict(orient="records")
+        return [TablesRow.from_dict(row) for row in result_dict]
     def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame:
         """Get a DataFrame containing representation of information_schema.columns for the specified table.
@@ -66,7 +66,7 @@ class IntegrationDataNode(DataNode):
             pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table.
                           The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES.
         """
-        if 'schema_name' in inspect.signature(self.integration_handler.get_columns).parameters:
+        if "schema_name" in inspect.signature(self.integration_handler.get_columns).parameters:
             response = self.integration_handler.get_columns(table_name, schema_name)
         else:
             response = self.integration_handler.get_columns(table_name)
@@ -81,18 +81,18 @@ class IntegrationDataNode(DataNode):
         # region fallback for old handlers
         df = response.data_frame
         df.columns = [name.upper() for name in df.columns]
-        if 'FIELD' not in df.columns or 'TYPE' not in df.columns:
+        if "FIELD" not in df.columns or "TYPE" not in df.columns:
             logger.warning(
                 f"Response from the handler's `get_columns` call does not contain required columns: f{df.columns}"
             )
             return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES))
-        new_df = df[['FIELD', 'TYPE']]
-        new_df.columns = ['COLUMN_NAME', 'DATA_TYPE']
+        new_df = df[["FIELD", "TYPE"]]
+        new_df.columns = ["COLUMN_NAME", "DATA_TYPE"]
-        new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[
-            INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE
-        ].apply(lambda x: infer_mysql_type(x).value)
+        new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE].apply(
+            lambda x: infer_mysql_type(x).value
+        )
         for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
             if column_name in new_df.columns:
@@ -116,54 +116,50 @@ class IntegrationDataNode(DataNode):
         return df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list()
     def drop_table(self, name: Identifier, if_exists=False):
-        drop_ast = DropTables(
-            tables=[name],
-            if_exists=if_exists
-        )
+        drop_ast = DropTables(tables=[name], if_exists=if_exists)
         self.query(drop_ast)
-    def create_table(self, table_name: Identifier, result_set: ResultSet = None, columns=None,
-                     is_replace=False, is_create=False, **kwargs) -> DataHubResponse:
+    def create_table(
+        self,
+        table_name: Identifier,
+        result_set: ResultSet = None,
+        columns: List[TableColumn] = None,
+        is_replace: bool = False,
+        is_create: bool = False,
+        raise_if_exists: bool = True,
+        **kwargs,
+    ) -> DataHubResponse:
         # is_create - create table
+        #   if !raise_if_exists: error will be skipped
         # is_replace - drop table if exists
         # is_create==False and is_replace==False: just insert
         table_columns_meta = {}
         if columns is None:
-            columns = []
-            df = result_set.get_raw_df()
             columns: list[TableColumn] = result_set.get_ast_columns()
-            table_columns_meta = {
-                column.name: column.type
-                for column in columns
-            }
+            table_columns_meta = {column.name: column.type for column in columns}
         if is_replace:
             # drop
-            drop_ast = DropTables(
-                tables=[table_name],
-                if_exists=True
-            )
+            drop_ast = DropTables(tables=[table_name], if_exists=True)
             self.query(drop_ast)
             is_create = True
         if is_create:
-            create_table_ast = CreateTable(
-                name=table_name,
-                columns=columns,
-                is_replace=is_replace
-            )
-            self.query(create_table_ast)
+            create_table_ast = CreateTable(name=table_name, columns=columns, is_replace=is_replace)
+            try:
+                self.query(create_table_ast)
+            except Exception as e:
+                if raise_if_exists:
+                    raise e
         if result_set is None:
             # it is just a 'create table'
             return DataHubResponse()
         # native insert
-        if hasattr(self.integration_handler, 'insert'):
+        if hasattr(self.integration_handler, "insert"):
             df = result_set.to_df()
             result: HandlerResponse = self.integration_handler.insert(table_name.parts[-1], df)
@@ -176,9 +172,9 @@ class IntegrationDataNode(DataNode):
             column_type = table_columns_meta[col.alias]
             if column_type == Integer:
-                type_name = 'int'
+                type_name = "int"
             elif column_type == Float:
-                type_name = 'float'
+                type_name = "float"
             else:
                 continue
@@ -193,24 +189,19 @@ class IntegrationDataNode(DataNode):
             # not need to insert
             return DataHubResponse()
-        insert_ast = Insert(
-            table=table_name,
-            columns=insert_columns,
-            values=values,
-            is_plain=True
-        )
+        insert_ast = Insert(table=table_name, columns=insert_columns, values=values, is_plain=True)
         try:
             result: DataHubResponse = self.query(insert_ast)
         except Exception as e:
-            msg = f'[{self.ds_type}/{self.integration_name}]: {str(e)}'
+            msg = f"[{self.ds_type}/{self.integration_name}]: {str(e)}"
             raise DBHandlerException(msg) from e
         return DataHubResponse(affected_rows=result.affected_rows)
     def has_support_stream(self) -> bool:
         # checks if data handler has query_stream method
-        return hasattr(self.integration_handler, 'query_stream') and callable(self.integration_handler.query_stream)
+        return hasattr(self.integration_handler, "query_stream") and callable(self.integration_handler.query_stream)
     @profiler.profile()
     def query_stream(self, query: ASTNode, fetch_size: int = None) -> Iterable:
@@ -230,24 +221,26 @@ class IntegrationDataNode(DataNode):
             # metrics
             elapsed_seconds = time.perf_counter() - time_before_query
             query_time_with_labels = metrics.INTEGRATION_HANDLER_QUERY_TIME.labels(
-                get_class_name(self.integration_handler), result.type)
+                get_class_name(self.integration_handler), result.type
+            )
             query_time_with_labels.observe(elapsed_seconds)
             num_rows = 0
             if result.data_frame is not None:
                 num_rows = len(result.data_frame.index)
             response_size_with_labels = metrics.INTEGRATION_HANDLER_RESPONSE_SIZE.labels(
-                get_class_name(self.integration_handler), result.type)
+                get_class_name(self.integration_handler), result.type
+            )
             response_size_with_labels.observe(num_rows)
         except Exception as e:
             msg = str(e).strip()
-            if msg == '':
+            if msg == "":
                 msg = e.__class__.__name__
-            msg = f'[{self.ds_type}/{self.integration_name}]: {msg}'
+            msg = f"[{self.ds_type}/{self.integration_name}]: {msg}"
             raise DBHandlerException(msg) from e
         if result.type == RESPONSE_TYPE.ERROR:
-            raise Exception(f'Error in {self.integration_name}: {result.error_message}')
+            raise Exception(f"Error in {self.integration_name}: {result.error_message}")
         if result.type == RESPONSE_TYPE.OK:
             return DataHubResponse(affected_rows=result.affected_rows)
@@ -265,17 +258,8 @@ class IntegrationDataNode(DataNode):
             logger.error(f"Issue with clearing DF from NaN values: {e}")
         # endregion
-        columns_info = [
-            {
-                'name': k,
-                'type': v
-            }
-            for k, v in df.dtypes.items()
-        ]
+        columns_info = [{"name": k, "type": v} for k, v in df.dtypes.items()]
         return DataHubResponse(
-            data_frame=df,
-            columns=columns_info,
-            affected_rows=result.affected_rows,
-            mysql_types=result.mysql_types
+            data_frame=df, columns=columns_info, affected_rows=result.affected_rows, mysql_types=result.mysql_types
         )

MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.5.4.1py3-none-any.whl → 25.6.2.0py3-none-any.whl