PyPI - MindsDB - Versions diffs - 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl - Mend

MindsDB 25.5.4.1py3-none-any.whl → 25.6.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (70) hide show

mindsdb/__about__.py +1 -1
mindsdb/api/a2a/agent.py +28 -25
mindsdb/api/a2a/common/server/server.py +32 -26
mindsdb/api/a2a/run_a2a.py +1 -1
mindsdb/api/executor/command_executor.py +69 -14
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
mindsdb/api/executor/planner/plan_join.py +67 -77
mindsdb/api/executor/planner/query_planner.py +176 -155
mindsdb/api/executor/planner/steps.py +37 -12
mindsdb/api/executor/sql_query/result_set.py +45 -64
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
mindsdb/api/executor/utilities/sql.py +42 -48
mindsdb/api/http/namespaces/config.py +1 -1
mindsdb/api/http/namespaces/file.py +14 -23
mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
mindsdb/integrations/libs/api_handler.py +261 -57
mindsdb/integrations/libs/base.py +100 -29
mindsdb/integrations/utilities/files/file_reader.py +99 -73
mindsdb/integrations/utilities/handler_utils.py +23 -8
mindsdb/integrations/utilities/sql_utils.py +35 -40
mindsdb/interfaces/agents/agents_controller.py +196 -192
mindsdb/interfaces/agents/constants.py +7 -1
mindsdb/interfaces/agents/langchain_agent.py +42 -11
mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
mindsdb/interfaces/data_catalog/__init__.py +0 -0
mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
mindsdb/interfaces/database/database.py +81 -57
mindsdb/interfaces/database/integrations.py +220 -234
mindsdb/interfaces/database/log.py +72 -104
mindsdb/interfaces/database/projects.py +156 -193
mindsdb/interfaces/file/file_controller.py +21 -65
mindsdb/interfaces/knowledge_base/controller.py +63 -10
mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
mindsdb/interfaces/skills/skills_controller.py +54 -36
mindsdb/interfaces/skills/sql_agent.py +109 -86
mindsdb/interfaces/storage/db.py +223 -79
mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
mindsdb/utilities/config.py +9 -2
mindsdb/utilities/log.py +35 -26
mindsdb/utilities/ml_task_queue/task.py +19 -22
mindsdb/utilities/render/sqlalchemy_render.py +129 -181
mindsdb/utilities/starters.py +49 -1
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0

mindsdb/api/executor/planner/steps.py CHANGED Viewed

@@ -9,7 +9,9 @@ class PlanStep:
     @property
     def result(self):
         if self.step_num is None:
-            raise PlanningException(f'Can\'t reference a step with no assigned step number. Tried to reference: {type(self)}')
+            raise PlanningException(
+                f"Can't reference a step with no assigned step number. Tried to reference: {type(self)}"
+            )
         return Result(self.step_num)
     def __eq__(self, other):
@@ -18,7 +20,7 @@ class PlanStep:
         for k in vars(self):
             # skip result comparison
-            if k == 'result_data':
+            if k == "result_data":
                 continue
             if getattr(self, k) != getattr(other, k):
@@ -28,8 +30,8 @@ class PlanStep:
     def __repr__(self):
         attrs_dict = vars(self)
-        attrs_str = ', '.join([f'{k}={str(v)}' for k, v in attrs_dict.items()])
-        return f'{self.__class__.__name__}({attrs_str})'
+        attrs_str = ", ".join([f"{k}={str(v)}" for k, v in attrs_dict.items()])
+        return f"{self.__class__.__name__}({attrs_str})"
     def set_result(self, result):
         self.result_data = result
@@ -37,6 +39,7 @@ class PlanStep:
 class ProjectStep(PlanStep):
     """Selects columns from a dataframe"""
     def __init__(self, columns, dataframe, ignore_doubles=False, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.columns = columns
@@ -47,6 +50,7 @@ class ProjectStep(PlanStep):
 # TODO remove
 class FilterStep(PlanStep):
     """Filters some dataframe according to a query"""
     def __init__(self, dataframe, query, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.dataframe = dataframe
@@ -66,6 +70,7 @@ class GroupByStep(PlanStep):
 class JoinStep(PlanStep):
     """Joins two dataframes, producing a new dataframe"""
     def __init__(self, left, right, query, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.left = left
@@ -75,7 +80,8 @@ class JoinStep(PlanStep):
 class UnionStep(PlanStep):
     """Union of two dataframes, producing a new dataframe"""
-    def __init__(self, left, right, unique, operation='union', *args, **kwargs):
+    def __init__(self, left, right, unique, operation="union", *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.left = left
         self.right = right
@@ -95,6 +101,7 @@ class OrderByStep(PlanStep):
 class LimitOffsetStep(PlanStep):
     """Applies limit and offset to a dataframe"""
     def __init__(self, dataframe, limit=None, offset=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.dataframe = dataframe
@@ -104,6 +111,7 @@ class LimitOffsetStep(PlanStep):
 class FetchDataframeStep(PlanStep):
     """Fetches a dataframe from external integration"""
     def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.integration = integration
@@ -114,15 +122,28 @@ class FetchDataframeStep(PlanStep):
 class FetchDataframeStepPartition(FetchDataframeStep):
     """Fetches a dataframe from external integration in partitions"""
-    def __init__(self, *args, **kwargs):
+    def __init__(self, steps=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.steps = []
+        if steps is None:
+            steps = []
+        self.steps = steps
 class ApplyPredictorStep(PlanStep):
     """Applies a mindsdb predictor on some dataframe and returns a new dataframe with predictions"""
-    def __init__(self, namespace, predictor, dataframe, params: dict = None,
-                 row_dict: dict = None, columns_map: dict = None, *args, **kwargs):
+    def __init__(
+        self,
+        namespace,
+        predictor,
+        dataframe,
+        params: dict = None,
+        row_dict: dict = None,
+        columns_map: dict = None,
+        *args,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
         self.namespace = namespace
         self.predictor = predictor
@@ -149,6 +170,7 @@ class ApplyTimeseriesPredictorStep(ApplyPredictorStep):
 class ApplyPredictorRowStep(PlanStep):
     """Applies a mindsdb predictor to one row of values and returns a dataframe of one row, the predictor."""
     def __init__(self, namespace, predictor, row_dict, params=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.namespace = namespace
@@ -159,6 +181,7 @@ class ApplyPredictorRowStep(PlanStep):
 class GetPredictorColumns(PlanStep):
     """Returns an empty dataframe of shape and columns like predictor results."""
     def __init__(self, namespace, predictor, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.namespace = namespace
@@ -167,6 +190,7 @@ class GetPredictorColumns(PlanStep):
 class GetTableColumns(PlanStep):
     """Returns an empty dataframe of shape and columns like select from table."""
     def __init__(self, namespace, table, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.namespace = namespace
@@ -175,7 +199,8 @@ class GetTableColumns(PlanStep):
 class MapReduceStep(PlanStep):
     """Applies a step for each value in a list, and then reduces results to a single dataframe"""
-    def __init__(self, values, step, reduce='union', partition=None, *args, **kwargs):
+    def __init__(self, values, step, reduce="union", partition=None, *args, **kwargs):
         """
         :param values: input step data
         :param step: step to be applied
@@ -202,8 +227,8 @@ class MultipleSteps(PlanStep):
 class SaveToTable(PlanStep):
     def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs):
         """
-            Creates table if not exists and fills it with content of dataframe
-            is_replace - to drop table beforehand
+        Creates table if not exists and fills it with content of dataframe
+        is_replace - to drop table beforehand
         """
         super().__init__(*args, **kwargs)
         self.table = table

mindsdb/api/executor/sql_query/result_set.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import copy
+from array import array
+from typing import Any
 from dataclasses import dataclass, field, MISSING
 import numpy as np
@@ -49,6 +51,12 @@ def get_mysql_data_type_from_series(series: pd.Series, do_infer: bool = False) -
     return MYSQL_DATA_TYPE.TEXT
+def _dump_vector(value: Any) -> Any:
+    if isinstance(value, array):
+        return value.tolist()
+    return value
 @dataclass(kw_only=True, slots=True)
 class Column:
     name: str = field(default=MISSING)
@@ -70,7 +78,7 @@ class Column:
         table_name = self.table_name if self.table_alias is None else self.table_alias
         name = self.name if self.alias is None else self.alias
-        name = f'{prefix}_{table_name}_{name}'
+        name = f"{prefix}_{table_name}_{name}"
         return name
@@ -95,7 +103,7 @@ class ResultSet:
         df: pd.DataFrame | None = None,
         affected_rows: int | None = None,
         is_prediction: bool = False,
-        mysql_types: list[MYSQL_DATA_TYPE] | None = None
+        mysql_types: list[MYSQL_DATA_TYPE] | None = None,
     ):
         """
         Args:
@@ -122,9 +130,9 @@ class ResultSet:
         self.mysql_types = mysql_types
     def __repr__(self):
-        col_names = ', '.join([col.name for col in self._columns])
+        col_names = ", ".join([col.name for col in self._columns])
-        return f'{self.__class__.__name__}({self.length()} rows, cols: {col_names})'
+        return f"{self.__class__.__name__}({self.length()} rows, cols: {col_names})"
     def __len__(self) -> int:
         if self._df is None:
@@ -140,38 +148,30 @@ class ResultSet:
     @classmethod
     def from_df(
-        cls, df: pd.DataFrame, database=None, table_name=None, table_alias=None,
-        is_prediction: bool = False, mysql_types: list[MYSQL_DATA_TYPE] | None = None
+        cls,
+        df: pd.DataFrame,
+        database=None,
+        table_name=None,
+        table_alias=None,
+        is_prediction: bool = False,
+        mysql_types: list[MYSQL_DATA_TYPE] | None = None,
     ):
         match mysql_types:
             case None:
                 mysql_types = [None] * len(df.columns)
             case list() if len(mysql_types) != len(df.columns):
-                raise WrongArgumentError(
-                    f'Mysql types length mismatch: {len(mysql_types)} != {len(df.columns)}'
-                )
+                raise WrongArgumentError(f"Mysql types length mismatch: {len(mysql_types)} != {len(df.columns)}")
         columns = [
-            Column(
-                name=column_name,
-                table_name=table_name,
-                table_alias=table_alias,
-                database=database,
-                type=mysql_type
-            ) for column_name, mysql_type
-            in zip(df.columns, mysql_types)
+            Column(name=column_name, table_name=table_name, table_alias=table_alias, database=database, type=mysql_type)
+            for column_name, mysql_type in zip(df.columns, mysql_types)
         ]
         rename_df_columns(df)
-        return cls(
-            df=df,
-            columns=columns,
-            is_prediction=is_prediction,
-            mysql_types=mysql_types
-        )
+        return cls(df=df, columns=columns, is_prediction=is_prediction, mysql_types=mysql_types)
     @classmethod
-    def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> 'ResultSet':
+    def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> "ResultSet":
         """Create ResultSet from dataframe and dictionary of columns
         Args:
@@ -185,29 +185,18 @@ class ResultSet:
         Raises:
             ValueError: if a column is not found in columns_dict and strict is True
         """
-        alias_idx = {
-            column.alias: column
-            for column in columns_dict.values()
-            if column.alias is not None
-        }
+        alias_idx = {column.alias: column for column in columns_dict.values() if column.alias is not None}
         columns = []
         for column_name in df.columns:
             if strict and column_name not in columns_dict:
-                raise ValueError(f'Column {column_name} not found in columns_dict')
-            column = (
-                columns_dict.get(column_name)
-                or alias_idx.get(column_name)
-                or Column(name=column_name)
-            )
+                raise ValueError(f"Column {column_name} not found in columns_dict")
+            column = columns_dict.get(column_name) or alias_idx.get(column_name) or Column(name=column_name)
             columns.append(column)
         rename_df_columns(df)
-        return cls(
-            columns=columns,
-            df=df
-        )
+        return cls(columns=columns, df=df)
     def to_df(self):
         columns_names = self.get_column_names()
@@ -215,7 +204,7 @@ class ResultSet:
         rename_df_columns(df, columns_names)
         return df
-    def to_df_cols(self, prefix: str = '') -> tuple[pd.DataFrame, dict[str, Column]]:
+    def to_df_cols(self, prefix: str = "") -> tuple[pd.DataFrame, dict[str, Column]]:
         # returns dataframe and dict of columns
         #   can be restored to ResultSet by from_df_cols method
@@ -235,7 +224,7 @@ class ResultSet:
     def get_tables(self):
         tables_idx = []
         tables = []
-        cols = ['database', 'table_name', 'table_alias']
+        cols = ["database", "table_name", "table_alias"]
         for col in self._columns:
             table = (col.database, col.table_name, col.table_alias)
             if table not in tables_idx:
@@ -258,7 +247,7 @@ class ResultSet:
                 col_idx = i
                 break
         if col_idx is None:
-            raise WrongArgumentError(f'Column is not found: {col}')
+            raise WrongArgumentError(f"Column is not found: {col}")
         return col_idx
     def add_column(self, col, values=None):
@@ -281,10 +270,7 @@ class ResultSet:
         return self._columns
     def get_column_names(self):
-        columns = [
-            col.name if col.alias is None else col.alias
-            for col in self._columns
-        ]
+        columns = [col.name if col.alias is None else col.alias for col in self._columns]
         return columns
     def find_columns(self, alias=None, table_alias=None):
@@ -324,7 +310,7 @@ class ResultSet:
     def add_raw_df(self, df):
         if len(df.columns) != len(self._columns):
-            raise WrongArgumentError(f'Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}')
+            raise WrongArgumentError(f"Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}")
         rename_df_columns(df)
@@ -340,7 +326,7 @@ class ResultSet:
             convert_floating=True,
             infer_objects=False,
             convert_string=False,
-            convert_boolean=False
+            convert_boolean=False,
         )
         self.add_raw_df(df)
@@ -367,9 +353,9 @@ class ResultSet:
             MYSQL_DATA_TYPE.BOOLEAN: sqlalchemy_types.BOOLEAN,
             MYSQL_DATA_TYPE.FLOAT: sqlalchemy_types.FLOAT,
             MYSQL_DATA_TYPE.DOUBLE: sqlalchemy_types.FLOAT,
-            MYSQL_DATA_TYPE.TIME: sqlalchemy_types.TIME,
-            MYSQL_DATA_TYPE.DATE: sqlalchemy_types.DATE,
-            MYSQL_DATA_TYPE.DATETIME: sqlalchemy_types.DATETIME,
+            MYSQL_DATA_TYPE.TIME: sqlalchemy_types.Time,
+            MYSQL_DATA_TYPE.DATE: sqlalchemy_types.Date,
+            MYSQL_DATA_TYPE.DATETIME: sqlalchemy_types.DateTime,
             MYSQL_DATA_TYPE.TIMESTAMP: sqlalchemy_types.TIMESTAMP,
         }
@@ -379,7 +365,7 @@ class ResultSet:
             # infer MYSQL_DATA_TYPE if not set
             if isinstance(column_type, MYSQL_DATA_TYPE) is False:
                 if column_type is not None:
-                    logger.warning(f'Unexpected column type: {column_type}')
+                    logger.warning(f"Unexpected column type: {column_type}")
                 if self._df is None:
                     column_type = MYSQL_DATA_TYPE.TEXT
                 else:
@@ -387,12 +373,7 @@ class ResultSet:
             sqlalchemy_type = type_mapping.get(column_type, sqlalchemy_types.TEXT)
-            columns.append(
-                TableColumn(
-                    name=column.alias,
-                    type=sqlalchemy_type
-                )
-            )
+            columns.append(TableColumn(name=column.alias, type=sqlalchemy_type))
         return columns
     def to_lists(self, json_types=False):
@@ -410,12 +391,15 @@ class ResultSet:
             for name, dtype in df.dtypes.to_dict().items():
                 if pd.api.types.is_datetime64_any_dtype(dtype):
                     df[name] = df[name].dt.strftime("%Y-%m-%d %H:%M:%S.%f")
-            df = df.replace({np.nan: None})
+            for i, column in enumerate(self.columns):
+                if column.type == MYSQL_DATA_TYPE.VECTOR:
+                    df[i] = df[i].apply(_dump_vector)
+            df.replace({np.nan: None}, inplace=True)
             return df.to_records(index=False).tolist()
         # slower but keep timestamp type
         df = self._df.replace({np.nan: None})  # TODO rework
-        return df.to_dict('split')['data']
+        return df.to_dict("split")["data"]
     def get_column_values(self, col_idx):
         # get by column index
@@ -434,14 +418,11 @@ class ResultSet:
             self._df[col_idx] = values
     def add_from_result_set(self, rs):
         source_names = rs.get_column_names()
         col_sequence = []
         for name in self.get_column_names():
-            col_sequence.append(
-                source_names.index(name)
-            )
+            col_sequence.append(source_names.index(name))
         raw_df = rs.get_raw_df()[col_sequence]

mindsdb/api/executor/sql_query/steps/fetch_dataframe.py CHANGED Viewed

@@ -28,7 +28,7 @@ def get_table_alias(table_obj, default_db_name):
     elif isinstance(table_obj, Select):
         # it is subquery
         if table_obj.alias is None:
-            name = 't'
+            name = "t"
         else:
             name = table_obj.alias.parts[0]
         name = (default_db_name, name)
@@ -37,10 +37,10 @@ def get_table_alias(table_obj, default_db_name):
         return get_table_alias(table_obj.left, default_db_name)
     else:
         # unknown yet object
-        return default_db_name, 't', 't'
+        return default_db_name, "t", "t"
     if table_obj.alias is not None:
-        name = name + ('.'.join(table_obj.alias.parts),)
+        name = name + (".".join(table_obj.alias.parts),)
     else:
         name = name + (name[1],)
     return name
@@ -57,7 +57,7 @@ def get_fill_param_fnc(steps_data):
                 node_prev = callstack[0]
                 if isinstance(node_prev, BinaryOperation):
                     # Check case: 'something IN Parameter()'
-                    if node_prev.op.lower() == 'in' and node_prev.args[1] is node:
+                    if node_prev.op.lower() == "in" and node_prev.args[1] is node:
                         is_single_item = False
             if is_single_item and len(items) == 1:
@@ -71,32 +71,28 @@ def get_fill_param_fnc(steps_data):
             rs = steps_data[node.value.step_num]
             items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
             return Tuple(items)
     return fill_params
 class FetchDataframeStepCall(BaseStepCall):
     bind = FetchDataframeStep
     def call(self, step):
         dn = self.session.datahub.get(step.integration)
         query = step.query
         if dn is None:
-            raise UnknownError(f'Unknown integration name: {step.integration}')
+            raise UnknownError(f"Unknown integration name: {step.integration}")
         if query is None:
-            table_alias = (self.context.get('database'), 'result', 'result')
+            table_alias = (self.context.get("database"), "result", "result")
             # fetch raw_query
-            response: DataHubResponse = dn.query(
-                native_query=step.raw_query,
-                session=self.session
-            )
+            response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
             df = response.data_frame
         else:
-            table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
+            table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
             # TODO for information_schema we have 'database' = 'mindsdb'
@@ -106,19 +102,19 @@ class FetchDataframeStepCall(BaseStepCall):
             query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session)
-            response: DataHubResponse = dn.query(
-                query=query,
-                session=self.session
-            )
+            response: DataHubResponse = dn.query(query=query, session=self.session)
             df = response.data_frame
             if context_callback:
                 context_callback(df, response.columns)
+        # if query registered, set progress
+        if self.sql_query.run_query is not None:
+            self.sql_query.run_query.set_progress(df, None)
         return ResultSet.from_df(
             df,
             table_name=table_alias[1],
             table_alias=table_alias[2],
             database=table_alias[0],
-            mysql_types=response.mysql_types
+            mysql_types=response.mysql_types,
         )

mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py CHANGED Viewed

@@ -57,21 +57,21 @@ class FetchDataframePartitionCall(BaseStepCall):
         # get query record
         run_query = self.sql_query.run_query
         if run_query is None:
-            raise RuntimeError('Error with partitioning of the query')
+            raise RuntimeError("Error with partitioning of the query")
         run_query.set_params(step.params)
-        self.table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
+        self.table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
         self.current_step_num = step.step_num
         self.substeps = step.steps
         # ml task queue enabled?
         use_threads, thread_count = False, None
-        if config['ml_task_queue']['type'] == 'redis':
+        if config["ml_task_queue"]["type"] == "redis":
             use_threads = True
         # use threads?
-        if 'threads' in step.params:
-            threads = step.params['threads']
+        if "threads" in step.params:
+            threads = step.params["threads"]
             if isinstance(threads, int):
                 thread_count = threads
                 use_threads = True
@@ -81,7 +81,7 @@ class FetchDataframePartitionCall(BaseStepCall):
                 # disable even with ml task queue
                 use_threads = False
-        on_error = step.params.get('error', 'raise')
+        on_error = step.params.get("error", "raise")
         if use_threads:
             return self.fetch_threads(run_query, query, thread_count=thread_count, on_error=on_error)
         else:
@@ -89,7 +89,7 @@ class FetchDataframePartitionCall(BaseStepCall):
     def fetch_iterate(self, run_query: RunningQuery, query: ASTNode, on_error: str = None) -> ResultSet:
         """
-         Process batches one by one in circle
+        Process batches one by one in circle
         """
         results = []
@@ -99,7 +99,7 @@ class FetchDataframePartitionCall(BaseStepCall):
                 sub_data = self.exec_sub_steps(df)
                 results.append(sub_data)
             except Exception as e:
-                if on_error == 'skip':
+                if on_error == "skip":
                     logger.error(e)
                 else:
                     raise e
@@ -131,12 +131,12 @@ class FetchDataframePartitionCall(BaseStepCall):
         - the final result is returned and used outside to concatenate with results of other's batches
         """
         input_data = ResultSet.from_df(
-            df,
-            table_name=self.table_alias[1],
-            table_alias=self.table_alias[2],
-            database=self.table_alias[0]
+            df, table_name=self.table_alias[1], table_alias=self.table_alias[2], database=self.table_alias[0]
         )
+        if len(self.substeps) == 0:
+            return input_data
         # execute with modified previous results
         steps_data2 = self.steps_data.copy()
         steps_data2[self.current_step_num] = input_data
@@ -147,8 +147,9 @@ class FetchDataframePartitionCall(BaseStepCall):
             steps_data2[substep.step_num] = sub_data
         return sub_data
-    def fetch_threads(self, run_query: RunningQuery, query: ASTNode,
-                      thread_count: int = None, on_error: str = None) -> ResultSet:
+    def fetch_threads(
+        self, run_query: RunningQuery, query: ASTNode, thread_count: int = None, on_error: str = None
+    ) -> ResultSet:
         """
         Process batches in threads
         - spawn required count of threads
@@ -170,9 +171,7 @@ class FetchDataframePartitionCall(BaseStepCall):
         results = []
         with ContextThreadPoolExecutor(max_workers=thread_count) as executor:
             for df in run_query.get_partitions(self.dn, self, query):
                 # split into chunks and send to workers
                 futures = []
                 for df2 in split_data_frame(df, partition_size):
@@ -182,13 +181,13 @@ class FetchDataframePartitionCall(BaseStepCall):
                     try:
                         results.append(future.result())
                     except Exception as e:
-                        if on_error == 'skip':
+                        if on_error == "skip":
                             logger.error(e)
                         else:
                             executor.shutdown()
                             raise e
                 if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
                     executor.shutdown()
-                    raise RuntimeError('Query is interrupted')
+                    raise RuntimeError("Query is interrupted")
         return self.concat_results(results)

MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.5.4.1py3-none-any.whl → 25.6.2.0py3-none-any.whl