PyPI - MindsDB - Versions diffs - 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl - Mend

MindsDB 25.6.3.1py3-none-any.whl → 25.7.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show

mindsdb/api/executor/planner/query_prepare.py CHANGED Viewed

@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
 def to_string(identifier):
     # alternative to AST.to_string() but without quoting
-    return '.'.join(identifier.parts)
+    return ".".join(identifier.parts)
 class Table:
@@ -32,7 +32,6 @@ class Column:
     def __init__(self, node=None, table=None, name=None, type=None):
         alias = None
         if node is not None:
             if isinstance(node, ast.Identifier):
                 # set name
                 name = node.parts[-1]  # ???
@@ -67,26 +66,25 @@ class Statement:
         self.offset = 0
-class PreparedStatementPlanner():
+class PreparedStatementPlanner:
     def __init__(self, planner):
         self.planner = planner
     def get_type_of_var(self, v):
         if isinstance(v, str):
-            return 'str'
+            return "str"
         elif isinstance(v, float):
-            return 'float'
+            return "float"
         elif isinstance(v, int):
-            return 'integer'
+            return "integer"
-        return 'str'
+        return "str"
     def get_statement_info(self):
         stmt = self.planner.statement
         if stmt is None:
-            raise PlanningException('Statement is not prepared')
+            raise PlanningException("Statement is not prepared")
         columns_result = []
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
             if column.table is not None:
                 table = column.table.name
                 ds = column.table.ds
-            columns_result.append(dict(
-                alias=column.alias,
-                type=column.type,
-                name=column.name,
-                table_name=table,
-                table_alias=table,
-                ds=ds,
-            ))
+            columns_result.append(
+                dict(
+                    alias=column.alias,
+                    type=column.type,
+                    name=column.name,
+                    table_name=table,
+                    table_alias=table,
+                    ds=ds,
+                )
+            )
         parameters = []
         for param in stmt.params:
-            name = '?'
-            parameters.append(dict(
-                alias=name,
-                type='str',
-                name=name,
-            ))
-        return {
-            'parameters': parameters,
-            'columns': columns_result
-        }
+            name = "?"
+            parameters.append(
+                dict(
+                    alias=name,
+                    type="str",
+                    name=name,
+                )
+            )
-    def get_table_of_column(self, t):
+        return {"parameters": parameters, "columns": columns_result}
+    def get_table_of_column(self, t):
         tables_map = self.planner.statement.tables_map
         # get tables to check
         if len(t.parts) > 1:
             # try to find table
             table_parts = t.parts[:-1]
-            table_name = '.'.join(table_parts)
+            table_name = ".".join(table_parts)
             if table_name in tables_map:
                 return tables_map[table_name]
             elif len(table_parts) > 1:
                 # maybe datasource is 1st part
                 table_parts = table_parts[1:]
-                table_name = '.'.join(table_parts)
+                table_name = ".".join(table_parts)
                 if table_name in tables_map:
                     return tables_map[table_name]
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
             # in reverse order
             for p in table.parts[::-1]:
                 parts.insert(0, p)
-                keys.append('.'.join(parts))
+                keys.append(".".join(parts))
         # remember table
-        tbl = Table(
-            ds=ds,
-            node=table,
-            is_predictor=is_predictor
-        )
+        tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
         tbl.keys = keys
         return tbl
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
         stmt.tables_map = {}
         stmt.tables_lvl1 = []
         if query.from_table is not None:
             if isinstance(query.from_table, ast.Join):
                 # get all tables
                 join_tables = utils.convert_join_to_list(query.from_table)
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
             if isinstance(query.from_table, ast.Select):
                 # nested select, get only last select
-                join_tables = [
-                    dict(
-                        table=utils.get_deepest_select(query.from_table).from_table
-                    )
-                ]
+                join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
             for i, join_table in enumerate(join_tables):
-                table = join_table['table']
+                table = join_table["table"]
                 if isinstance(table, ast.Identifier):
                     tbl = self.table_from_identifier(table)
                     if tbl.is_predictor:
                         # Is the last table?
                         if i + 1 < len(join_tables):
-                            raise PlanningException('Predictor must be last table in query')
+                            raise PlanningException("Predictor must be last table in query")
                     stmt.tables_lvl1.append(tbl)
                     for key in tbl.keys:
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
         # is there any predictors at other levels?
         lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
         if len(query_predictors) != len(lvl1_predictors):
-            raise PlanningException('Predictor is not at first level')
+            raise PlanningException("Predictor is not at first level")
         # === get targets ===
         columns = []
         get_all_tables = False
         for t in query.targets:
             column = Column(t)
             # column alias
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
                 column.type = self.get_type_of_var(t.value)
             elif isinstance(t, ast.Function):
                 # mysql function
-                if t.op == 'connection_id':
-                    column.type = 'integer'
+                if t.op == "connection_id":
+                    column.type = "integer"
                 else:
-                    column.type = 'str'
+                    column.type = "str"
             else:
                 # TODO go down into lower level.
                 #  It can be function, operation, select.
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
                 # TODO add several known types for function, i.e ABS-int
                 # TODO TypeCast - as casted type
-                column.type = 'str'
+                column.type = "str"
             if alias is not None:
                 column.alias = alias
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
                 if step.result_data is not None:
                     # save results
-                    if len(step.result_data['tables']) > 0:
-                        table_info = step.result_data['tables'][0]
-                        columns_info = step.result_data['columns'][table_info]
+                    if len(step.result_data["tables"]) > 0:
+                        table_info = step.result_data["tables"][0]
+                        columns_info = step.result_data["columns"][table_info]
                         table.columns = []
                         table.ds = table_info[0]
                         for col in columns_info:
                             if isinstance(col, tuple):
                                 # is predictor
-                                col = dict(name=col[0], type='str')
+                                col = dict(name=col[0], type="str")
                             table.columns.append(
                                 Column(
-                                    name=col['name'],
-                                    type=col['type'],
+                                    name=col["name"],
+                                    type=col["type"],
                                 )
                             )
                     # map by names
-                    table.columns_map = {
-                        i.name.upper(): i
-                        for i in table.columns
-                    }
+                    table.columns_map = {i.name.upper(): i for i in table.columns}
         # === create columns list ===
         columns_result = []
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
                 # add data from all tables
                 for table in stmt.tables_lvl1:
                     if table.columns is None:
-                        raise PlanningException(f'Table is not found {table.name}')
+                        raise PlanningException(f"Table is not found {table.name}")
                     for col in table.columns:
                         # col = {name: 'col', type: 'str'}
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
                             column.type = table.columns_map[col_name].type
                         else:
                             # continue
-                            raise PlanningException(f'Column not found {col_name}')
+                            raise PlanningException(f"Column not found {col_name}")
                 else:
                     # table is not found, looking for in all tables
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
             # forcing alias
             if column.alias is None:
-                column.alias = f'column_{i}'
+                column.alias = f"column_{i}"
             # forcing type
             if column.type is None:
-                column.type = 'str'
+                column.type = "str"
             columns_result.append(column)
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
         if step.result_data is not None:
             # save results
-            if len(step.result_data['tables']) > 0:
-                table_info = step.result_data['tables'][0]
-                columns_info = step.result_data['columns'][table_info]
+            if len(step.result_data["tables"]) > 0:
+                table_info = step.result_data["tables"][0]
+                columns_info = step.result_data["columns"][table_info]
                 table.columns = []
                 table.ds = table_info[0]
                 for col in columns_info:
                     if isinstance(col, tuple):
                         # is predictor
-                        col = dict(name=col[0], type='str')
+                        col = dict(name=col[0], type="str")
                     table.columns.append(
                         Column(
-                            name=col['name'],
-                            type=col['type'],
+                            name=col["name"],
+                            type=col["type"],
                         )
                     )
                 # map by names
-                table.columns_map = {
-                    i.name.upper(): i
-                    for i in table.columns
-                }
+                table.columns_map = {i.name.upper(): i for i in table.columns}
         # save results
         columns_result = []
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
             if column.type is None:
                 # forcing type
-                column.type = 'str'
+                column.type = "str"
             columns_result.append(column)
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
         stmt = self.planner.statement
         stmt.columns = [
-            Column(name='Variable_name', type='str'),
-            Column(name='Value', type='str'),
+            Column(name="Variable_name", type="str"),
+            Column(name="Value", type="str"),
         ]
         return []
     def prepare_steps(self, query):
         stmt = Statement()
         self.planner.statement = stmt
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
         if isinstance(query, ast.Show):
             return self.prepare_show(query)
         else:
             # do nothing
             return []
             # raise NotImplementedError(query.__name__)
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
         query = self.planner.query
         if params is not None:
             if len(params) != len(stmt.params):
                 raise PlanningException("Count of execution parameters don't match prepared statement")
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
         stmt.params = None
         if (
-                isinstance(query, ast.Select)
-                or isinstance(query, ast.Union)
-                or isinstance(query, ast.CreateTable)
-                or isinstance(query, ast.Insert)
-                or isinstance(query, ast.Update)
-                or isinstance(query, ast.Delete)
+            isinstance(query, ast.Select)
+            or isinstance(query, ast.Union)
+            or isinstance(query, ast.CreateTable)
+            or isinstance(query, ast.Insert)
+            or isinstance(query, ast.Update)
+            or isinstance(query, ast.Delete)
+            or isinstance(query, ast.Intersect)
+            or isinstance(query, ast.Except)
         ):
             return self.plan_query(query)
         else:

mindsdb/api/executor/sql_query/steps/fetch_dataframe.py CHANGED Viewed

@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
     Parameter,
     BinaryOperation,
     Tuple,
+    Union,
+    Intersect,
 )
 from mindsdb.api.executor.planner.steps import FetchDataframeStep
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
             response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
             df = response.data_frame
         else:
-            table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
+            if isinstance(step.query, (Union, Intersect)):
+                table_alias = ["", "", ""]
+            else:
+                table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
             # TODO for information_schema we have 'database' = 'mindsdb'

mindsdb/api/executor/sql_query/steps/union_step.py CHANGED Viewed

@@ -9,7 +9,6 @@ from .base import BaseStepCall
 class UnionStepCall(BaseStepCall):
     bind = UnionStep
     def call(self, step):
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
         # count of columns have to match
         if len(left_result.columns) != len(right_result.columns):
             raise WrongArgumentError(
-                f'UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} ')
+                f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
+            )
         # types have to match
         # TODO: return checking type later
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
         table_a, names = left_result.to_df_cols()
         table_b, _ = right_result.to_df_cols()
-        op = 'UNION ALL'
-        if step.unique:
-            op = 'UNION'
+        if step.operation.lower() == "intersect":
+            op = "INTERSECT"
+        else:
+            op = "UNION"
+        if step.unique is not True:
+            op += " ALL"
         query = f"""
             SELECT * FROM table_a
             {op}
             SELECT * FROM table_b
         """
-        resp_df, _description = query_df_with_type_infer_fallback(query, {
-            'table_a': table_a,
-            'table_b': table_b
-        })
+        resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
         resp_df.replace({np.nan: None}, inplace=True)
         return ResultSet.from_df_cols(df=resp_df, columns_dict=names)

mindsdb/api/http/namespaces/file.py CHANGED Viewed

@@ -3,6 +3,7 @@ import shutil
 import tarfile
 import tempfile
 import zipfile
+from urllib.parse import urlparse
 import multipart
 import requests
@@ -13,7 +14,7 @@ from flask_restx import Resource
 from mindsdb.api.http.namespaces.configs.files import ns_conf
 from mindsdb.api.http.utils import http_error
 from mindsdb.metrics.metrics import api_endpoint_metrics
-from mindsdb.utilities.config import Config
+from mindsdb.utilities.config import config
 from mindsdb.utilities.context import context as ctx
 from mindsdb.utilities import log
 from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
@@ -105,31 +106,55 @@ class File(Resource):
         if data.get("source_type") == "url":
             url = data["source"]
-            config = Config()
-            allowed_urls = config.get("file_upload_domains", [])
-            if allowed_urls and not validate_urls(url, allowed_urls):
-                return http_error(400, "Invalid File URL source.", f"Allowed hosts are: {', '.join(allowed_urls)}.")
+            try:
+                url = urlparse(url)
+                if not (url.scheme and url.netloc):
+                    raise ValueError()
+                url = url.geturl()
+            except Exception:
+                return http_error(
+                    400,
+                    "Invalid URL",
+                    f"The URL is not valid: {data['source']}",
+                )
+            url_file_upload_enabled = config["url_file_upload"]["enabled"]
+            if url_file_upload_enabled is False:
+                return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
+            allowed_origins = config["url_file_upload"]["allowed_origins"]
+            disallowed_origins = config["url_file_upload"]["disallowed_origins"]
+            if validate_urls(url, allowed_origins, disallowed_origins) is False:
+                return http_error(
+                    400,
+                    "Invalid URL",
+                    "URL is not allowed for security reasons. Allowed hosts are: "
+                    f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
+                )
             data["file"] = clear_filename(data["name"])
             is_cloud = config.get("cloud", False)
-            if is_cloud and is_private_url(url):
-                return http_error(400, f"URL is private: {url}")
-            if is_cloud is True and ctx.user_class != 1:
-                info = requests.head(url)
-                file_size = info.headers.get("Content-Length")
-                try:
-                    file_size = int(file_size)
-                except Exception:
-                    pass
-                if file_size is None:
-                    return http_error(
-                        400,
-                        "Error getting file info",
-                        "Сan't determine remote file size",
-                    )
-                if file_size > MAX_FILE_SIZE:
-                    return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
+            if is_cloud:
+                if is_private_url(url):
+                    return http_error(400, f"URL is private: {url}")
+                if ctx.user_class != 1:
+                    info = requests.head(url, timeout=30)
+                    file_size = info.headers.get("Content-Length")
+                    try:
+                        file_size = int(file_size)
+                    except Exception:
+                        pass
+                    if file_size is None:
+                        return http_error(
+                            400,
+                            "Error getting file info",
+                            "Сan't determine remote file size",
+                        )
+                    if file_size > MAX_FILE_SIZE:
+                        return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
             with requests.get(url, stream=True) as r:
                 if r.status_code != 200:
                     return http_error(400, "Error getting file", f"Got status code: {r.status_code}")

mindsdb/api/mcp/start.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
+from typing import Any
+from textwrap import dedent
 from contextlib import asynccontextmanager
 from collections.abc import AsyncIterator
-from typing import Optional, Dict, Any
 from dataclasses import dataclass
 import uvicorn
@@ -41,16 +42,32 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
 mcp = FastMCP(
     "MindsDB",
     lifespan=app_lifespan,
-    dependencies=["mindsdb"]  # Add any additional dependencies
+    dependencies=["mindsdb"],  # Add any additional dependencies
 )
 # MCP Queries
 LISTING_QUERY = "SHOW DATABASES"
-@mcp.tool()
-def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
-    """
-    Execute a SQL query against MindsDB
+query_tool_description = dedent("""\
+    Executes a SQL query against MindsDB.
+    A database must be specified either in the `context` parameter or directly in the query string (e.g., `SELECT * FROM my_database.my_table`). Queries like `SELECT * FROM my_table` will fail without a `context`.
+    Args:
+        query (str): The SQL query to execute.
+        context (dict, optional): The default database context. For example, `{"db": "my_postgres"}`.
+    Returns:
+        A dictionary describing the result.
+        - For a successful query with no data to return (e.g., an `UPDATE` statement), the response is `{"type": "ok"}`.
+        - If the query returns tabular data, the response is a dictionary containing `data` (a list of rows) and `column_names` (a list of column names). For example: `{"type": "table", "data": [[1, "a"], [2, "b"]], "column_names": ["column_a", "column_b"]}`.
+        - In case of an error, a response is `{"type": "error", "error_message": "the error message"}`.
+""")
+@mcp.tool(name="query", description=query_tool_description)
+def query(query: str, context: dict | None = None) -> dict[str, Any]:
+    """Execute a SQL query against MindsDB
     Args:
         query: The SQL query to execute
@@ -63,7 +80,7 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
     if context is None:
         context = {}
-    logger.debug(f'Incoming MCP query: {query}')
+    logger.debug(f"Incoming MCP query: {query}")
     mysql_proxy = FakeMysqlProxy()
     mysql_proxy.set_context(context)
@@ -78,34 +95,30 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
             return {
                 "type": SQL_RESPONSE_TYPE.TABLE,
                 "data": result.result_set.to_lists(json_types=True),
-                "column_names": [
-                    column.alias or column.name
-                    for column in result.result_set.columns
-                ],
+                "column_names": [column.alias or column.name for column in result.result_set.columns],
             }
         else:
-            return {
-                "type": SQL_RESPONSE_TYPE.ERROR,
-                "error_code": 0,
-                "error_message": "Unknown response type"
-            }
+            return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": "Unknown response type"}
     except Exception as e:
         logger.error(f"Error processing query: {str(e)}")
-        return {
-            "type": SQL_RESPONSE_TYPE.ERROR,
-            "error_code": 0,
-            "error_message": str(e)
-        }
+        return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": str(e)}
-@mcp.tool()
-def list_databases() -> Dict[str, Any]:
+list_databases_tool_description = (
+    "Returns a list of all database connections currently available in MindsDB. "
+    + "The tool takes no parameters and responds with a list of database names, "
+    + 'for example: ["my_postgres", "my_mysql", "test_db"].'
+)
+@mcp.tool(name="list_databases", description=list_databases_tool_description)
+def list_databases() -> list[str]:
     """
-    List all databases in MindsDB along with their tables
+    List all databases in MindsDB
     Returns:
-        Dict containing the list of databases and their tables
+       list[str]: list of databases
     """
     mysql_proxy = FakeMysqlProxy()
@@ -124,6 +137,7 @@ def list_databases() -> Dict[str, Any]:
         elif result.type == SQL_RESPONSE_TYPE.TABLE:
             data = result.result_set.to_lists(json_types=True)
+            data = [val[0] for val in data]
             return data
     except Exception as e:
@@ -135,12 +149,12 @@ def list_databases() -> Dict[str, Any]:
 class CustomAuthMiddleware(BaseHTTPMiddleware):
-    """Custom middleware to handle authentication basing on header 'Authorization'
-    """
+    """Custom middleware to handle authentication basing on header 'Authorization'"""
     async def dispatch(self, request: Request, call_next):
-        mcp_access_token = os.environ.get('MINDSDB_MCP_ACCESS_TOKEN')
+        mcp_access_token = os.environ.get("MINDSDB_MCP_ACCESS_TOKEN")
         if mcp_access_token is not None:
-            auth_token = request.headers.get('Authorization', '').partition('Bearer ')[-1]
+            auth_token = request.headers.get("Authorization", "").partition("Bearer ")[-1]
             if mcp_access_token != auth_token:
                 return Response(status_code=401, content="Unauthorized", media_type="text/plain")
@@ -171,8 +185,8 @@ def start(*args, **kwargs):
         port (int): Port to listen on
     """
     config = Config()
-    port = int(config['api'].get('mcp', {}).get('port', 47337))
-    host = config['api'].get('mcp', {}).get('host', '127.0.0.1')
+    port = int(config["api"].get("mcp", {}).get("port", 47337))
+    host = config["api"].get("mcp", {}).get("host", "127.0.0.1")
     logger.info(f"Starting MCP server on {host}:{port}")
     mcp.settings.host = host

MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.3.1py3-none-any.whl → 25.7.1.0py3-none-any.whl