PyPI - MindsDB - Versions diffs - 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl - Mend

MindsDB 25.7.3.0py3-none-any.whl → 25.8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (102) hide show

mindsdb/api/executor/sql_query/steps/apply_predictor_step.py CHANGED Viewed

@@ -26,38 +26,28 @@ from .base import BaseStepCall
 def get_preditor_alias(step, mindsdb_database):
-    predictor_name = '.'.join(step.predictor.parts)
-    predictor_alias = '.'.join(step.predictor.alias.parts) if step.predictor.alias is not None else predictor_name
+    predictor_name = ".".join(step.predictor.parts)
+    predictor_alias = ".".join(step.predictor.alias.parts) if step.predictor.alias is not None else predictor_name
     return (mindsdb_database, predictor_name, predictor_alias)
 class ApplyPredictorBaseCall(BaseStepCall):
     def apply_predictor(self, project_name, predictor_name, df, version, params):
         # is it an agent?
         agent = self.session.agents_controller.get_agent(predictor_name, project_name)
         if agent is not None:
-            messages = df.to_dict('records')
+            messages = df.to_dict("records")
             predictions = self.session.agents_controller.get_completion(
-                agent,
-                messages=messages,
-                project_name=project_name,
+                agent, messages=messages, project_name=project_name, params=params
             )
         else:
             project_datanode = self.session.datahub.get(project_name)
-            predictions = project_datanode.predict(
-                model_name=predictor_name,
-                df=df,
-                version=version,
-                params=params
-            )
+            predictions = project_datanode.predict(model_name=predictor_name, df=df, version=version, params=params)
         return predictions
 class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
     bind = ApplyPredictorRowStep
     def call(self, step):
@@ -89,7 +79,7 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
         for k, v in where_data.items():
             predictions[k] = v
-        table_name = get_preditor_alias(step, self.context.get('database'))
+        table_name = get_preditor_alias(step, self.context.get("database"))
         if len(predictions) == 0:
             columns_names = project_datanode.get_table_columns_names(predictor_name)
@@ -100,12 +90,11 @@ class ApplyPredictorRowStepCall(ApplyPredictorBaseCall):
             database=table_name[0],
             table_name=table_name[1],
             table_alias=table_name[2],
-            is_prediction=True
+            is_prediction=True,
         )
 class ApplyPredictorStepCall(ApplyPredictorBaseCall):
     bind = ApplyPredictorStep
     def call(self, step):
@@ -115,20 +104,20 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
         params = step.params or {}
         # adding __mindsdb_row_id, use first table if exists
-        if len(data.find_columns('__mindsdb_row_id')) == 0:
+        if len(data.find_columns("__mindsdb_row_id")) == 0:
             table = data.get_tables()[0] if len(data.get_tables()) > 0 else None
             row_id_col = Column(
-                name='__mindsdb_row_id',
-                database=table['database'] if table is not None else None,
-                table_name=table['table_name'] if table is not None else None,
-                table_alias=table['table_alias'] if table is not None else None
+                name="__mindsdb_row_id",
+                database=table["database"] if table is not None else None,
+                table_name=table["table_name"] if table is not None else None,
+                table_alias=table["table_alias"] if table is not None else None,
             )
-            row_id = self.context.get('row_id')
+            row_id = self.context.get("row_id")
             values = range(row_id, row_id + data.length())
             data.add_column(row_id_col, values)
-            self.context['row_id'] += data.length()
+            self.context["row_id"] += data.length()
         project_name = step.namespace
         predictor_name = step.predictor.parts[0]
@@ -143,47 +132,46 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                 data.set_column_values(k, v)
         predictor_metadata = {}
-        for pm in self.context['predictor_metadata']:
-            if pm['name'] == predictor_name and pm['integration_name'].lower() == project_name:
+        for pm in self.context["predictor_metadata"]:
+            if pm["name"] == predictor_name and pm["integration_name"].lower() == project_name:
                 predictor_metadata = pm
                 break
-        is_timeseries = predictor_metadata['timeseries']
+        is_timeseries = predictor_metadata["timeseries"]
         _mdb_forecast_offset = None
         if is_timeseries:
-            if '> LATEST' in self.context['query_str']:
+            if "> LATEST" in self.context["query_str"]:
                 # stream mode -- if > LATEST, forecast starts on inferred next timestamp
                 _mdb_forecast_offset = 1
-            elif '= LATEST' in self.context['query_str']:
+            elif "= LATEST" in self.context["query_str"]:
                 # override: when = LATEST, forecast starts on last provided timestamp instead of inferred next time
                 _mdb_forecast_offset = 0
             else:
                 # normal mode -- emit a forecast ($HORIZON data points on each) for each provided timestamp
-                params['force_ts_infer'] = True
+                params["force_ts_infer"] = True
                 _mdb_forecast_offset = None
-            data.add_column(Column(name='__mdb_forecast_offset'), _mdb_forecast_offset)
+            data.add_column(Column(name="__mdb_forecast_offset"), _mdb_forecast_offset)
-        table_name = get_preditor_alias(step, self.context['database'])
+        table_name = get_preditor_alias(step, self.context["database"])
         project_datanode = self.session.datahub.get(project_name)
         if len(data) == 0:
-            columns_names = project_datanode.get_table_columns_names(predictor_name) + ['__mindsdb_row_id']
+            columns_names = project_datanode.get_table_columns_names(predictor_name) + ["__mindsdb_row_id"]
             result = ResultSet(is_prediction=True)
             for column_name in columns_names:
-                result.add_column(Column(
-                    name=column_name,
-                    database=table_name[0],
-                    table_name=table_name[1],
-                    table_alias=table_name[2]
-                ))
+                result.add_column(
+                    Column(
+                        name=column_name, database=table_name[0], table_name=table_name[1], table_alias=table_name[2]
+                    )
+                )
         else:
-            predictor_id = predictor_metadata['id']
+            predictor_id = predictor_metadata["id"]
             table_df = data.to_df()
             if self.session.predictor_cache is not False:
-                key = f'{predictor_name}_{predictor_id}_{dataframe_checksum(table_df)}'
+                key = f"{predictor_name}_{predictor_id}_{dataframe_checksum(table_df)}"
-                predictor_cache = get_cache('predict')
+                predictor_cache = get_cache("predict")
                 predictions = predictor_cache.get(key)
             else:
                 predictions = None
@@ -221,7 +209,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
             # apply filter
             if is_timeseries:
-                pred_data = predictions.to_dict(orient='records')
+                pred_data = predictions.to_dict(orient="records")
                 where_data = list(data.get_records())
                 pred_data = self.apply_ts_filter(pred_data, where_data, step, predictor_metadata)
                 predictions = pd.DataFrame(pred_data)
@@ -231,37 +219,33 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                 database=table_name[0],
                 table_name=table_name[1],
                 table_alias=table_name[2],
-                is_prediction=True
+                is_prediction=True,
             )
         return result
     def apply_ts_filter(self, predictor_data, table_data, step, predictor_metadata):
         if step.output_time_filter is None:
             # no filter, exit
             return predictor_data
             # apply filter
-        group_cols = predictor_metadata['group_by_columns']
-        order_col = predictor_metadata['order_by_column']
+        group_cols = predictor_metadata["group_by_columns"]
+        order_col = predictor_metadata["order_by_column"]
         filter_args = step.output_time_filter.args
         filter_op = step.output_time_filter.op
         # filter field must be order column
-        if not (
-            isinstance(filter_args[0], Identifier)
-            and filter_args[0].parts[-1] == order_col
-        ):
+        if not (isinstance(filter_args[0], Identifier) and filter_args[0].parts[-1] == order_col):
             # exit otherwise
             return predictor_data
         def get_date_format(samples):
             # Try common formats first with explicit patterns
             for date_format, pattern in (
-                ('%Y-%m-%d', r'[\d]{4}-[\d]{2}-[\d]{2}'),
-                ('%Y-%m-%d %H:%M:%S', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}'),
+                ("%Y-%m-%d", r"[\d]{4}-[\d]{2}-[\d]{2}"),
+                ("%Y-%m-%d %H:%M:%S", r"[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}"),
                 # ('%Y-%m-%d %H:%M:%S%z', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}\+[\d]{2}:[\d]{2}'),
                 # ('%Y', '[\d]{4}')
             ):
@@ -281,6 +265,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                 # Parse the first sample to get its format
                 # The import is heavy, so we do it here on-demand
                 import dateparser
                 parsed_date = dateparser.parse(samples[0])
                 if parsed_date is None:
                     raise ValueError("Could not parse date")
@@ -290,25 +275,21 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                     if dateparser.parse(sample) is None:
                         raise ValueError("Inconsistent date formats in samples")
                 # Convert to strftime format based on the input
-                if re.search(r'\d{2}:\d{2}:\d{2}', samples[0]):
-                    return '%Y-%m-%d %H:%M:%S'
-                return '%Y-%m-%d'
+                if re.search(r"\d{2}:\d{2}:\d{2}", samples[0]):
+                    return "%Y-%m-%d %H:%M:%S"
+                return "%Y-%m-%d"
             except (ValueError, AttributeError):
                 # If dateparser fails, return a basic format as last resort
-                return '%Y-%m-%d'
+                return "%Y-%m-%d"
-        model_types = predictor_metadata['model_types']
-        if model_types.get(order_col) in ('float', 'integer'):
+        model_types = predictor_metadata["model_types"]
+        if model_types.get(order_col) in ("float", "integer"):
             # convert strings to digits
-            fnc = {
-                'integer': int,
-                'float': float
-            }[model_types[order_col]]
+            fnc = {"integer": int, "float": float}[model_types[order_col]]
             # convert predictor_data
             if len(predictor_data) > 0:
                 if isinstance(predictor_data[0][order_col], str):
                     for row in predictor_data:
                         row[order_col] = fnc(row[order_col])
                 elif isinstance(predictor_data[0][order_col], dt.date):
@@ -318,7 +299,6 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
             # convert predictor_data
             if isinstance(table_data[0][order_col], str):
                 for row in table_data:
                     row[order_col] = fnc(row[order_col])
             elif isinstance(table_data[0][order_col], dt.date):
@@ -327,18 +307,13 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                     row[order_col] = fnc(row[order_col])
             # convert args to date
-            samples = [
-                arg.value
-                for arg in filter_args
-                if isinstance(arg, Constant) and isinstance(arg.value, str)
-            ]
+            samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)]
             if len(samples) > 0:
                 for arg in filter_args:
                     if isinstance(arg, Constant) and isinstance(arg.value, str):
                         arg.value = fnc(arg.value)
-        if model_types.get(order_col) in ('date', 'datetime') or isinstance(predictor_data[0][order_col], pd.Timestamp):  # noqa
+        if model_types.get(order_col) in ("date", "datetime") or isinstance(predictor_data[0][order_col], pd.Timestamp):  # noqa
             # convert strings to date
             # it is making side effect on original data by changing it but let it be
@@ -364,11 +339,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
             _cast_samples(table_data, order_col)
             # convert args to date
-            samples = [
-                arg.value
-                for arg in filter_args
-                if isinstance(arg, Constant) and isinstance(arg.value, str)
-            ]
+            samples = [arg.value for arg in filter_args if isinstance(arg, Constant) and isinstance(arg.value, str)]
             if len(samples) > 0:
                 date_format = get_date_format(samples)
@@ -380,7 +351,6 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
         # first pass: get max values for Latest in table data
         latest_vals = {}
         if Latest() in filter_args:
             for row in table_data:
                 if group_cols is None:
                     key = 0  # the same for any value
@@ -400,11 +370,11 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
                     data2.append(row)
             elif isinstance(step.output_time_filter, BinaryOperation):
                 op_map = {
-                    '<': '__lt__',
-                    '<=': '__le__',
-                    '>': '__gt__',
-                    '>=': '__ge__',
-                    '=': '__eq__',
+                    "<": "__lt__",
+                    "<=": "__le__",
+                    ">": "__gt__",
+                    ">=": "__ge__",
+                    "=": "__eq__",
                 }
                 arg = filter_args[1]
                 if isinstance(arg, Latest):
@@ -435,5 +405,4 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
 class ApplyTimeseriesPredictorStepCall(ApplyPredictorStepCall):
     bind = ApplyTimeseriesPredictorStep

mindsdb/api/executor/sql_query/steps/fetch_dataframe.py CHANGED Viewed

@@ -50,29 +50,26 @@ def get_table_alias(table_obj, default_db_name):
 def get_fill_param_fnc(steps_data):
     def fill_params(node, callstack=None, **kwargs):
-        if isinstance(node, Parameter):
-            rs = steps_data[node.value.step_num]
-            items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
-            is_single_item = True
-            if callstack:
-                node_prev = callstack[0]
-                if isinstance(node_prev, BinaryOperation):
-                    # Check case: 'something IN Parameter()'
-                    if node_prev.op.lower() == "in" and node_prev.args[1] is node:
-                        is_single_item = False
-            if is_single_item and len(items) == 1:
-                # extract one value for option 'col=(subselect)'
-                node = items[0]
-            else:
-                node = Tuple(items)
-            return node
-        if isinstance(node, Parameter):
-            rs = steps_data[node.value.step_num]
-            items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
-            return Tuple(items)
+        if not isinstance(node, Parameter):
+            return
+        rs = steps_data[node.value.step_num]
+        items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
+        is_single_item = True
+        if callstack:
+            node_prev = callstack[0]
+            if isinstance(node_prev, BinaryOperation):
+                # Check case: 'something IN Parameter()'
+                if node_prev.op.lower() == "in" and node_prev.args[1] is node:
+                    is_single_item = False
+        if is_single_item and len(items) == 1:
+            # extract one value for option 'col=(subselect)'
+            node = items[0]
+        else:
+            node = Tuple(items)
+        return node
     return fill_params
@@ -115,7 +112,7 @@ class FetchDataframeStepCall(BaseStepCall):
         # if query registered, set progress
         if self.sql_query.run_query is not None:
-            self.sql_query.run_query.set_progress(df, None)
+            self.sql_query.run_query.set_progress(processed_rows=len(df))
         return ResultSet.from_df(
             df,
             table_name=table_alias[1],

mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py CHANGED Viewed

@@ -97,6 +97,7 @@ class FetchDataframePartitionCall(BaseStepCall):
         for df in run_query.get_partitions(self.dn, self, query):
             try:
                 sub_data = self.exec_sub_steps(df)
+                run_query.set_progress(processed_rows=len(df))
                 results.append(sub_data)
             except Exception as e:
                 if on_error == "skip":
@@ -175,17 +176,22 @@ class FetchDataframePartitionCall(BaseStepCall):
                 # split into chunks and send to workers
                 futures = []
                 for df2 in split_data_frame(df, partition_size):
-                    futures.append(executor.submit(self.exec_sub_steps, df2))
+                    futures.append([executor.submit(self.exec_sub_steps, df2), len(df2)])
-                for future in futures:
+                error = None
+                for future, rows_count in futures:
                     try:
                         results.append(future.result())
+                        run_query.set_progress(processed_rows=rows_count)
                     except Exception as e:
                         if on_error == "skip":
                             logger.error(e)
                         else:
                             executor.shutdown()
-                            raise e
+                            error = e
+                if error:
+                    raise error
                 if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
                     executor.shutdown()
                     raise RuntimeError("Query is interrupted")

mindsdb/api/executor/sql_query/steps/subselect_step.py CHANGED Viewed

@@ -2,7 +2,15 @@ from collections import defaultdict
 import pandas as pd
-from mindsdb_sql_parser.ast import Identifier, Select, Star, Constant, Parameter, Function, Variable, BinaryOperation
+from mindsdb_sql_parser.ast import (
+    Identifier,
+    Select,
+    Star,
+    Constant,
+    Function,
+    Variable,
+    BinaryOperation,
+)
 from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES
 from mindsdb.api.executor.planner.step_result import Result
@@ -52,13 +60,8 @@ class SubSelectStepCall(BaseStepCall):
         # inject previous step values
         if isinstance(query, Select):
-            def inject_values(node, **kwargs):
-                if isinstance(node, Parameter) and isinstance(node.value, Result):
-                    prev_result = self.steps_data[node.value.step_num]
-                    return Constant(prev_result.get_column_values(col_idx=0)[0])
-            query_traversal(query, inject_values)
+            fill_params = get_fill_param_fnc(self.steps_data)
+            query_traversal(query, fill_params)
         df = result.to_df()
         res = query_df(df, query, session=self.session)

MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.3.0py3-none-any.whl → 25.8.2.0py3-none-any.whl