MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (70) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/run_a2a.py +1 -1
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  8. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  9. mindsdb/api/executor/planner/plan_join.py +67 -77
  10. mindsdb/api/executor/planner/query_planner.py +176 -155
  11. mindsdb/api/executor/planner/steps.py +37 -12
  12. mindsdb/api/executor/sql_query/result_set.py +45 -64
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  15. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  16. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  17. mindsdb/api/executor/utilities/sql.py +42 -48
  18. mindsdb/api/http/namespaces/config.py +1 -1
  19. mindsdb/api/http/namespaces/file.py +14 -23
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  23. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  24. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  25. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  26. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  27. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  28. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  29. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  30. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  32. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  33. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  34. mindsdb/integrations/libs/api_handler.py +261 -57
  35. mindsdb/integrations/libs/base.py +100 -29
  36. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  37. mindsdb/integrations/utilities/handler_utils.py +23 -8
  38. mindsdb/integrations/utilities/sql_utils.py +35 -40
  39. mindsdb/interfaces/agents/agents_controller.py +196 -192
  40. mindsdb/interfaces/agents/constants.py +7 -1
  41. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  42. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  43. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  44. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  46. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  47. mindsdb/interfaces/database/database.py +81 -57
  48. mindsdb/interfaces/database/integrations.py +220 -234
  49. mindsdb/interfaces/database/log.py +72 -104
  50. mindsdb/interfaces/database/projects.py +156 -193
  51. mindsdb/interfaces/file/file_controller.py +21 -65
  52. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  53. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  54. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  55. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  56. mindsdb/interfaces/skills/skills_controller.py +54 -36
  57. mindsdb/interfaces/skills/sql_agent.py +109 -86
  58. mindsdb/interfaces/storage/db.py +223 -79
  59. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  60. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  61. mindsdb/utilities/config.py +9 -2
  62. mindsdb/utilities/log.py +35 -26
  63. mindsdb/utilities/ml_task_queue/task.py +19 -22
  64. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  65. mindsdb/utilities/starters.py +49 -1
  66. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
  67. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
  68. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  69. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  70. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,9 @@ class PlanStep:
9
9
  @property
10
10
  def result(self):
11
11
  if self.step_num is None:
12
- raise PlanningException(f'Can\'t reference a step with no assigned step number. Tried to reference: {type(self)}')
12
+ raise PlanningException(
13
+ f"Can't reference a step with no assigned step number. Tried to reference: {type(self)}"
14
+ )
13
15
  return Result(self.step_num)
14
16
 
15
17
  def __eq__(self, other):
@@ -18,7 +20,7 @@ class PlanStep:
18
20
 
19
21
  for k in vars(self):
20
22
  # skip result comparison
21
- if k == 'result_data':
23
+ if k == "result_data":
22
24
  continue
23
25
 
24
26
  if getattr(self, k) != getattr(other, k):
@@ -28,8 +30,8 @@ class PlanStep:
28
30
 
29
31
  def __repr__(self):
30
32
  attrs_dict = vars(self)
31
- attrs_str = ', '.join([f'{k}={str(v)}' for k, v in attrs_dict.items()])
32
- return f'{self.__class__.__name__}({attrs_str})'
33
+ attrs_str = ", ".join([f"{k}={str(v)}" for k, v in attrs_dict.items()])
34
+ return f"{self.__class__.__name__}({attrs_str})"
33
35
 
34
36
  def set_result(self, result):
35
37
  self.result_data = result
@@ -37,6 +39,7 @@ class PlanStep:
37
39
 
38
40
  class ProjectStep(PlanStep):
39
41
  """Selects columns from a dataframe"""
42
+
40
43
  def __init__(self, columns, dataframe, ignore_doubles=False, *args, **kwargs):
41
44
  super().__init__(*args, **kwargs)
42
45
  self.columns = columns
@@ -47,6 +50,7 @@ class ProjectStep(PlanStep):
47
50
  # TODO remove
48
51
  class FilterStep(PlanStep):
49
52
  """Filters some dataframe according to a query"""
53
+
50
54
  def __init__(self, dataframe, query, *args, **kwargs):
51
55
  super().__init__(*args, **kwargs)
52
56
  self.dataframe = dataframe
@@ -66,6 +70,7 @@ class GroupByStep(PlanStep):
66
70
 
67
71
  class JoinStep(PlanStep):
68
72
  """Joins two dataframes, producing a new dataframe"""
73
+
69
74
  def __init__(self, left, right, query, *args, **kwargs):
70
75
  super().__init__(*args, **kwargs)
71
76
  self.left = left
@@ -75,7 +80,8 @@ class JoinStep(PlanStep):
75
80
 
76
81
  class UnionStep(PlanStep):
77
82
  """Union of two dataframes, producing a new dataframe"""
78
- def __init__(self, left, right, unique, operation='union', *args, **kwargs):
83
+
84
+ def __init__(self, left, right, unique, operation="union", *args, **kwargs):
79
85
  super().__init__(*args, **kwargs)
80
86
  self.left = left
81
87
  self.right = right
@@ -95,6 +101,7 @@ class OrderByStep(PlanStep):
95
101
 
96
102
  class LimitOffsetStep(PlanStep):
97
103
  """Applies limit and offset to a dataframe"""
104
+
98
105
  def __init__(self, dataframe, limit=None, offset=None, *args, **kwargs):
99
106
  super().__init__(*args, **kwargs)
100
107
  self.dataframe = dataframe
@@ -104,6 +111,7 @@ class LimitOffsetStep(PlanStep):
104
111
 
105
112
  class FetchDataframeStep(PlanStep):
106
113
  """Fetches a dataframe from external integration"""
114
+
107
115
  def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs):
108
116
  super().__init__(*args, **kwargs)
109
117
  self.integration = integration
@@ -114,15 +122,28 @@ class FetchDataframeStep(PlanStep):
114
122
 
115
123
  class FetchDataframeStepPartition(FetchDataframeStep):
116
124
  """Fetches a dataframe from external integration in partitions"""
117
- def __init__(self, *args, **kwargs):
125
+
126
+ def __init__(self, steps=None, *args, **kwargs):
118
127
  super().__init__(*args, **kwargs)
119
- self.steps = []
128
+ if steps is None:
129
+ steps = []
130
+ self.steps = steps
120
131
 
121
132
 
122
133
  class ApplyPredictorStep(PlanStep):
123
134
  """Applies a mindsdb predictor on some dataframe and returns a new dataframe with predictions"""
124
- def __init__(self, namespace, predictor, dataframe, params: dict = None,
125
- row_dict: dict = None, columns_map: dict = None, *args, **kwargs):
135
+
136
+ def __init__(
137
+ self,
138
+ namespace,
139
+ predictor,
140
+ dataframe,
141
+ params: dict = None,
142
+ row_dict: dict = None,
143
+ columns_map: dict = None,
144
+ *args,
145
+ **kwargs,
146
+ ):
126
147
  super().__init__(*args, **kwargs)
127
148
  self.namespace = namespace
128
149
  self.predictor = predictor
@@ -149,6 +170,7 @@ class ApplyTimeseriesPredictorStep(ApplyPredictorStep):
149
170
 
150
171
  class ApplyPredictorRowStep(PlanStep):
151
172
  """Applies a mindsdb predictor to one row of values and returns a dataframe of one row, the predictor."""
173
+
152
174
  def __init__(self, namespace, predictor, row_dict, params=None, *args, **kwargs):
153
175
  super().__init__(*args, **kwargs)
154
176
  self.namespace = namespace
@@ -159,6 +181,7 @@ class ApplyPredictorRowStep(PlanStep):
159
181
 
160
182
  class GetPredictorColumns(PlanStep):
161
183
  """Returns an empty dataframe of shape and columns like predictor results."""
184
+
162
185
  def __init__(self, namespace, predictor, *args, **kwargs):
163
186
  super().__init__(*args, **kwargs)
164
187
  self.namespace = namespace
@@ -167,6 +190,7 @@ class GetPredictorColumns(PlanStep):
167
190
 
168
191
  class GetTableColumns(PlanStep):
169
192
  """Returns an empty dataframe of shape and columns like select from table."""
193
+
170
194
  def __init__(self, namespace, table, *args, **kwargs):
171
195
  super().__init__(*args, **kwargs)
172
196
  self.namespace = namespace
@@ -175,7 +199,8 @@ class GetTableColumns(PlanStep):
175
199
 
176
200
  class MapReduceStep(PlanStep):
177
201
  """Applies a step for each value in a list, and then reduces results to a single dataframe"""
178
- def __init__(self, values, step, reduce='union', partition=None, *args, **kwargs):
202
+
203
+ def __init__(self, values, step, reduce="union", partition=None, *args, **kwargs):
179
204
  """
180
205
  :param values: input step data
181
206
  :param step: step to be applied
@@ -202,8 +227,8 @@ class MultipleSteps(PlanStep):
202
227
  class SaveToTable(PlanStep):
203
228
  def __init__(self, table, dataframe, is_replace=False, params=None, *args, **kwargs):
204
229
  """
205
- Creates table if not exists and fills it with content of dataframe
206
- is_replace - to drop table beforehand
230
+ Creates table if not exists and fills it with content of dataframe
231
+ is_replace - to drop table beforehand
207
232
  """
208
233
  super().__init__(*args, **kwargs)
209
234
  self.table = table
@@ -1,4 +1,6 @@
1
1
  import copy
2
+ from array import array
3
+ from typing import Any
2
4
  from dataclasses import dataclass, field, MISSING
3
5
 
4
6
  import numpy as np
@@ -49,6 +51,12 @@ def get_mysql_data_type_from_series(series: pd.Series, do_infer: bool = False) -
49
51
  return MYSQL_DATA_TYPE.TEXT
50
52
 
51
53
 
54
+ def _dump_vector(value: Any) -> Any:
55
+ if isinstance(value, array):
56
+ return value.tolist()
57
+ return value
58
+
59
+
52
60
  @dataclass(kw_only=True, slots=True)
53
61
  class Column:
54
62
  name: str = field(default=MISSING)
@@ -70,7 +78,7 @@ class Column:
70
78
  table_name = self.table_name if self.table_alias is None else self.table_alias
71
79
  name = self.name if self.alias is None else self.alias
72
80
 
73
- name = f'{prefix}_{table_name}_{name}'
81
+ name = f"{prefix}_{table_name}_{name}"
74
82
  return name
75
83
 
76
84
 
@@ -95,7 +103,7 @@ class ResultSet:
95
103
  df: pd.DataFrame | None = None,
96
104
  affected_rows: int | None = None,
97
105
  is_prediction: bool = False,
98
- mysql_types: list[MYSQL_DATA_TYPE] | None = None
106
+ mysql_types: list[MYSQL_DATA_TYPE] | None = None,
99
107
  ):
100
108
  """
101
109
  Args:
@@ -122,9 +130,9 @@ class ResultSet:
122
130
  self.mysql_types = mysql_types
123
131
 
124
132
  def __repr__(self):
125
- col_names = ', '.join([col.name for col in self._columns])
133
+ col_names = ", ".join([col.name for col in self._columns])
126
134
 
127
- return f'{self.__class__.__name__}({self.length()} rows, cols: {col_names})'
135
+ return f"{self.__class__.__name__}({self.length()} rows, cols: {col_names})"
128
136
 
129
137
  def __len__(self) -> int:
130
138
  if self._df is None:
@@ -140,38 +148,30 @@ class ResultSet:
140
148
 
141
149
  @classmethod
142
150
  def from_df(
143
- cls, df: pd.DataFrame, database=None, table_name=None, table_alias=None,
144
- is_prediction: bool = False, mysql_types: list[MYSQL_DATA_TYPE] | None = None
151
+ cls,
152
+ df: pd.DataFrame,
153
+ database=None,
154
+ table_name=None,
155
+ table_alias=None,
156
+ is_prediction: bool = False,
157
+ mysql_types: list[MYSQL_DATA_TYPE] | None = None,
145
158
  ):
146
159
  match mysql_types:
147
160
  case None:
148
161
  mysql_types = [None] * len(df.columns)
149
162
  case list() if len(mysql_types) != len(df.columns):
150
- raise WrongArgumentError(
151
- f'Mysql types length mismatch: {len(mysql_types)} != {len(df.columns)}'
152
- )
163
+ raise WrongArgumentError(f"Mysql types length mismatch: {len(mysql_types)} != {len(df.columns)}")
153
164
 
154
165
  columns = [
155
- Column(
156
- name=column_name,
157
- table_name=table_name,
158
- table_alias=table_alias,
159
- database=database,
160
- type=mysql_type
161
- ) for column_name, mysql_type
162
- in zip(df.columns, mysql_types)
166
+ Column(name=column_name, table_name=table_name, table_alias=table_alias, database=database, type=mysql_type)
167
+ for column_name, mysql_type in zip(df.columns, mysql_types)
163
168
  ]
164
169
 
165
170
  rename_df_columns(df)
166
- return cls(
167
- df=df,
168
- columns=columns,
169
- is_prediction=is_prediction,
170
- mysql_types=mysql_types
171
- )
171
+ return cls(df=df, columns=columns, is_prediction=is_prediction, mysql_types=mysql_types)
172
172
 
173
173
  @classmethod
174
- def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> 'ResultSet':
174
+ def from_df_cols(cls, df: pd.DataFrame, columns_dict: dict[str, Column], strict: bool = True) -> "ResultSet":
175
175
  """Create ResultSet from dataframe and dictionary of columns
176
176
 
177
177
  Args:
@@ -185,29 +185,18 @@ class ResultSet:
185
185
  Raises:
186
186
  ValueError: if a column is not found in columns_dict and strict is True
187
187
  """
188
- alias_idx = {
189
- column.alias: column
190
- for column in columns_dict.values()
191
- if column.alias is not None
192
- }
188
+ alias_idx = {column.alias: column for column in columns_dict.values() if column.alias is not None}
193
189
 
194
190
  columns = []
195
191
  for column_name in df.columns:
196
192
  if strict and column_name not in columns_dict:
197
- raise ValueError(f'Column {column_name} not found in columns_dict')
198
- column = (
199
- columns_dict.get(column_name)
200
- or alias_idx.get(column_name)
201
- or Column(name=column_name)
202
- )
193
+ raise ValueError(f"Column {column_name} not found in columns_dict")
194
+ column = columns_dict.get(column_name) or alias_idx.get(column_name) or Column(name=column_name)
203
195
  columns.append(column)
204
196
 
205
197
  rename_df_columns(df)
206
198
 
207
- return cls(
208
- columns=columns,
209
- df=df
210
- )
199
+ return cls(columns=columns, df=df)
211
200
 
212
201
  def to_df(self):
213
202
  columns_names = self.get_column_names()
@@ -215,7 +204,7 @@ class ResultSet:
215
204
  rename_df_columns(df, columns_names)
216
205
  return df
217
206
 
218
- def to_df_cols(self, prefix: str = '') -> tuple[pd.DataFrame, dict[str, Column]]:
207
+ def to_df_cols(self, prefix: str = "") -> tuple[pd.DataFrame, dict[str, Column]]:
219
208
  # returns dataframe and dict of columns
220
209
  # can be restored to ResultSet by from_df_cols method
221
210
 
@@ -235,7 +224,7 @@ class ResultSet:
235
224
  def get_tables(self):
236
225
  tables_idx = []
237
226
  tables = []
238
- cols = ['database', 'table_name', 'table_alias']
227
+ cols = ["database", "table_name", "table_alias"]
239
228
  for col in self._columns:
240
229
  table = (col.database, col.table_name, col.table_alias)
241
230
  if table not in tables_idx:
@@ -258,7 +247,7 @@ class ResultSet:
258
247
  col_idx = i
259
248
  break
260
249
  if col_idx is None:
261
- raise WrongArgumentError(f'Column is not found: {col}')
250
+ raise WrongArgumentError(f"Column is not found: {col}")
262
251
  return col_idx
263
252
 
264
253
  def add_column(self, col, values=None):
@@ -281,10 +270,7 @@ class ResultSet:
281
270
  return self._columns
282
271
 
283
272
  def get_column_names(self):
284
- columns = [
285
- col.name if col.alias is None else col.alias
286
- for col in self._columns
287
- ]
273
+ columns = [col.name if col.alias is None else col.alias for col in self._columns]
288
274
  return columns
289
275
 
290
276
  def find_columns(self, alias=None, table_alias=None):
@@ -324,7 +310,7 @@ class ResultSet:
324
310
 
325
311
  def add_raw_df(self, df):
326
312
  if len(df.columns) != len(self._columns):
327
- raise WrongArgumentError(f'Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}')
313
+ raise WrongArgumentError(f"Record length mismatch columns length: {len(df.columns)} != {len(self.columns)}")
328
314
 
329
315
  rename_df_columns(df)
330
316
 
@@ -340,7 +326,7 @@ class ResultSet:
340
326
  convert_floating=True,
341
327
  infer_objects=False,
342
328
  convert_string=False,
343
- convert_boolean=False
329
+ convert_boolean=False,
344
330
  )
345
331
  self.add_raw_df(df)
346
332
 
@@ -367,9 +353,9 @@ class ResultSet:
367
353
  MYSQL_DATA_TYPE.BOOLEAN: sqlalchemy_types.BOOLEAN,
368
354
  MYSQL_DATA_TYPE.FLOAT: sqlalchemy_types.FLOAT,
369
355
  MYSQL_DATA_TYPE.DOUBLE: sqlalchemy_types.FLOAT,
370
- MYSQL_DATA_TYPE.TIME: sqlalchemy_types.TIME,
371
- MYSQL_DATA_TYPE.DATE: sqlalchemy_types.DATE,
372
- MYSQL_DATA_TYPE.DATETIME: sqlalchemy_types.DATETIME,
356
+ MYSQL_DATA_TYPE.TIME: sqlalchemy_types.Time,
357
+ MYSQL_DATA_TYPE.DATE: sqlalchemy_types.Date,
358
+ MYSQL_DATA_TYPE.DATETIME: sqlalchemy_types.DateTime,
373
359
  MYSQL_DATA_TYPE.TIMESTAMP: sqlalchemy_types.TIMESTAMP,
374
360
  }
375
361
 
@@ -379,7 +365,7 @@ class ResultSet:
379
365
  # infer MYSQL_DATA_TYPE if not set
380
366
  if isinstance(column_type, MYSQL_DATA_TYPE) is False:
381
367
  if column_type is not None:
382
- logger.warning(f'Unexpected column type: {column_type}')
368
+ logger.warning(f"Unexpected column type: {column_type}")
383
369
  if self._df is None:
384
370
  column_type = MYSQL_DATA_TYPE.TEXT
385
371
  else:
@@ -387,12 +373,7 @@ class ResultSet:
387
373
 
388
374
  sqlalchemy_type = type_mapping.get(column_type, sqlalchemy_types.TEXT)
389
375
 
390
- columns.append(
391
- TableColumn(
392
- name=column.alias,
393
- type=sqlalchemy_type
394
- )
395
- )
376
+ columns.append(TableColumn(name=column.alias, type=sqlalchemy_type))
396
377
  return columns
397
378
 
398
379
  def to_lists(self, json_types=False):
@@ -410,12 +391,15 @@ class ResultSet:
410
391
  for name, dtype in df.dtypes.to_dict().items():
411
392
  if pd.api.types.is_datetime64_any_dtype(dtype):
412
393
  df[name] = df[name].dt.strftime("%Y-%m-%d %H:%M:%S.%f")
413
- df = df.replace({np.nan: None})
394
+ for i, column in enumerate(self.columns):
395
+ if column.type == MYSQL_DATA_TYPE.VECTOR:
396
+ df[i] = df[i].apply(_dump_vector)
397
+ df.replace({np.nan: None}, inplace=True)
414
398
  return df.to_records(index=False).tolist()
415
399
 
416
400
  # slower but keep timestamp type
417
401
  df = self._df.replace({np.nan: None}) # TODO rework
418
- return df.to_dict('split')['data']
402
+ return df.to_dict("split")["data"]
419
403
 
420
404
  def get_column_values(self, col_idx):
421
405
  # get by column index
@@ -434,14 +418,11 @@ class ResultSet:
434
418
  self._df[col_idx] = values
435
419
 
436
420
  def add_from_result_set(self, rs):
437
-
438
421
  source_names = rs.get_column_names()
439
422
 
440
423
  col_sequence = []
441
424
  for name in self.get_column_names():
442
- col_sequence.append(
443
- source_names.index(name)
444
- )
425
+ col_sequence.append(source_names.index(name))
445
426
 
446
427
  raw_df = rs.get_raw_df()[col_sequence]
447
428
 
@@ -28,7 +28,7 @@ def get_table_alias(table_obj, default_db_name):
28
28
  elif isinstance(table_obj, Select):
29
29
  # it is subquery
30
30
  if table_obj.alias is None:
31
- name = 't'
31
+ name = "t"
32
32
  else:
33
33
  name = table_obj.alias.parts[0]
34
34
  name = (default_db_name, name)
@@ -37,10 +37,10 @@ def get_table_alias(table_obj, default_db_name):
37
37
  return get_table_alias(table_obj.left, default_db_name)
38
38
  else:
39
39
  # unknown yet object
40
- return default_db_name, 't', 't'
40
+ return default_db_name, "t", "t"
41
41
 
42
42
  if table_obj.alias is not None:
43
- name = name + ('.'.join(table_obj.alias.parts),)
43
+ name = name + (".".join(table_obj.alias.parts),)
44
44
  else:
45
45
  name = name + (name[1],)
46
46
  return name
@@ -57,7 +57,7 @@ def get_fill_param_fnc(steps_data):
57
57
  node_prev = callstack[0]
58
58
  if isinstance(node_prev, BinaryOperation):
59
59
  # Check case: 'something IN Parameter()'
60
- if node_prev.op.lower() == 'in' and node_prev.args[1] is node:
60
+ if node_prev.op.lower() == "in" and node_prev.args[1] is node:
61
61
  is_single_item = False
62
62
 
63
63
  if is_single_item and len(items) == 1:
@@ -71,32 +71,28 @@ def get_fill_param_fnc(steps_data):
71
71
  rs = steps_data[node.value.step_num]
72
72
  items = [Constant(i) for i in rs.get_column_values(col_idx=0)]
73
73
  return Tuple(items)
74
+
74
75
  return fill_params
75
76
 
76
77
 
77
78
  class FetchDataframeStepCall(BaseStepCall):
78
-
79
79
  bind = FetchDataframeStep
80
80
 
81
81
  def call(self, step):
82
-
83
82
  dn = self.session.datahub.get(step.integration)
84
83
  query = step.query
85
84
 
86
85
  if dn is None:
87
- raise UnknownError(f'Unknown integration name: {step.integration}')
86
+ raise UnknownError(f"Unknown integration name: {step.integration}")
88
87
 
89
88
  if query is None:
90
- table_alias = (self.context.get('database'), 'result', 'result')
89
+ table_alias = (self.context.get("database"), "result", "result")
91
90
 
92
91
  # fetch raw_query
93
- response: DataHubResponse = dn.query(
94
- native_query=step.raw_query,
95
- session=self.session
96
- )
92
+ response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
97
93
  df = response.data_frame
98
94
  else:
99
- table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
95
+ table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
100
96
 
101
97
  # TODO for information_schema we have 'database' = 'mindsdb'
102
98
 
@@ -106,19 +102,19 @@ class FetchDataframeStepCall(BaseStepCall):
106
102
 
107
103
  query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session)
108
104
 
109
- response: DataHubResponse = dn.query(
110
- query=query,
111
- session=self.session
112
- )
105
+ response: DataHubResponse = dn.query(query=query, session=self.session)
113
106
  df = response.data_frame
114
107
 
115
108
  if context_callback:
116
109
  context_callback(df, response.columns)
117
110
 
111
+ # if query registered, set progress
112
+ if self.sql_query.run_query is not None:
113
+ self.sql_query.run_query.set_progress(df, None)
118
114
  return ResultSet.from_df(
119
115
  df,
120
116
  table_name=table_alias[1],
121
117
  table_alias=table_alias[2],
122
118
  database=table_alias[0],
123
- mysql_types=response.mysql_types
119
+ mysql_types=response.mysql_types,
124
120
  )
@@ -57,21 +57,21 @@ class FetchDataframePartitionCall(BaseStepCall):
57
57
  # get query record
58
58
  run_query = self.sql_query.run_query
59
59
  if run_query is None:
60
- raise RuntimeError('Error with partitioning of the query')
60
+ raise RuntimeError("Error with partitioning of the query")
61
61
  run_query.set_params(step.params)
62
62
 
63
- self.table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
63
+ self.table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
64
64
  self.current_step_num = step.step_num
65
65
  self.substeps = step.steps
66
66
 
67
67
  # ml task queue enabled?
68
68
  use_threads, thread_count = False, None
69
- if config['ml_task_queue']['type'] == 'redis':
69
+ if config["ml_task_queue"]["type"] == "redis":
70
70
  use_threads = True
71
71
 
72
72
  # use threads?
73
- if 'threads' in step.params:
74
- threads = step.params['threads']
73
+ if "threads" in step.params:
74
+ threads = step.params["threads"]
75
75
  if isinstance(threads, int):
76
76
  thread_count = threads
77
77
  use_threads = True
@@ -81,7 +81,7 @@ class FetchDataframePartitionCall(BaseStepCall):
81
81
  # disable even with ml task queue
82
82
  use_threads = False
83
83
 
84
- on_error = step.params.get('error', 'raise')
84
+ on_error = step.params.get("error", "raise")
85
85
  if use_threads:
86
86
  return self.fetch_threads(run_query, query, thread_count=thread_count, on_error=on_error)
87
87
  else:
@@ -89,7 +89,7 @@ class FetchDataframePartitionCall(BaseStepCall):
89
89
 
90
90
  def fetch_iterate(self, run_query: RunningQuery, query: ASTNode, on_error: str = None) -> ResultSet:
91
91
  """
92
- Process batches one by one in circle
92
+ Process batches one by one in circle
93
93
  """
94
94
 
95
95
  results = []
@@ -99,7 +99,7 @@ class FetchDataframePartitionCall(BaseStepCall):
99
99
  sub_data = self.exec_sub_steps(df)
100
100
  results.append(sub_data)
101
101
  except Exception as e:
102
- if on_error == 'skip':
102
+ if on_error == "skip":
103
103
  logger.error(e)
104
104
  else:
105
105
  raise e
@@ -131,12 +131,12 @@ class FetchDataframePartitionCall(BaseStepCall):
131
131
  - the final result is returned and used outside to concatenate with results of other's batches
132
132
  """
133
133
  input_data = ResultSet.from_df(
134
- df,
135
- table_name=self.table_alias[1],
136
- table_alias=self.table_alias[2],
137
- database=self.table_alias[0]
134
+ df, table_name=self.table_alias[1], table_alias=self.table_alias[2], database=self.table_alias[0]
138
135
  )
139
136
 
137
+ if len(self.substeps) == 0:
138
+ return input_data
139
+
140
140
  # execute with modified previous results
141
141
  steps_data2 = self.steps_data.copy()
142
142
  steps_data2[self.current_step_num] = input_data
@@ -147,8 +147,9 @@ class FetchDataframePartitionCall(BaseStepCall):
147
147
  steps_data2[substep.step_num] = sub_data
148
148
  return sub_data
149
149
 
150
- def fetch_threads(self, run_query: RunningQuery, query: ASTNode,
151
- thread_count: int = None, on_error: str = None) -> ResultSet:
150
+ def fetch_threads(
151
+ self, run_query: RunningQuery, query: ASTNode, thread_count: int = None, on_error: str = None
152
+ ) -> ResultSet:
152
153
  """
153
154
  Process batches in threads
154
155
  - spawn required count of threads
@@ -170,9 +171,7 @@ class FetchDataframePartitionCall(BaseStepCall):
170
171
  results = []
171
172
 
172
173
  with ContextThreadPoolExecutor(max_workers=thread_count) as executor:
173
-
174
174
  for df in run_query.get_partitions(self.dn, self, query):
175
-
176
175
  # split into chunks and send to workers
177
176
  futures = []
178
177
  for df2 in split_data_frame(df, partition_size):
@@ -182,13 +181,13 @@ class FetchDataframePartitionCall(BaseStepCall):
182
181
  try:
183
182
  results.append(future.result())
184
183
  except Exception as e:
185
- if on_error == 'skip':
184
+ if on_error == "skip":
186
185
  logger.error(e)
187
186
  else:
188
187
  executor.shutdown()
189
188
  raise e
190
189
  if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
191
190
  executor.shutdown()
192
- raise RuntimeError('Query is interrupted')
191
+ raise RuntimeError("Query is interrupted")
193
192
 
194
193
  return self.concat_results(results)