MindsDB 25.4.1.0__py3-none-any.whl → 25.4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (63) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +91 -61
  3. mindsdb/api/executor/data_types/answer.py +9 -12
  4. mindsdb/api/executor/datahub/classes/response.py +11 -0
  5. mindsdb/api/executor/datahub/datanodes/datanode.py +4 -4
  6. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +10 -11
  7. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +22 -16
  8. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +43 -1
  9. mindsdb/api/executor/datahub/datanodes/project_datanode.py +20 -20
  10. mindsdb/api/executor/planner/plan_join.py +2 -2
  11. mindsdb/api/executor/planner/query_plan.py +1 -0
  12. mindsdb/api/executor/planner/query_planner.py +86 -14
  13. mindsdb/api/executor/planner/steps.py +11 -2
  14. mindsdb/api/executor/sql_query/result_set.py +10 -7
  15. mindsdb/api/executor/sql_query/sql_query.py +69 -84
  16. mindsdb/api/executor/sql_query/steps/__init__.py +1 -0
  17. mindsdb/api/executor/sql_query/steps/delete_step.py +2 -3
  18. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +5 -3
  19. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +288 -0
  20. mindsdb/api/executor/sql_query/steps/insert_step.py +2 -2
  21. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -2
  22. mindsdb/api/executor/sql_query/steps/subselect_step.py +20 -8
  23. mindsdb/api/executor/sql_query/steps/update_step.py +4 -6
  24. mindsdb/api/http/namespaces/sql.py +4 -1
  25. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/ok_packet.py +1 -1
  26. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +4 -27
  27. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +1 -0
  28. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +38 -37
  29. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +23 -13
  30. mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +17 -16
  31. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -0
  32. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +1 -1
  33. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -2
  34. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +4 -4
  35. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +26 -16
  36. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +36 -7
  37. mindsdb/integrations/handlers/redshift_handler/redshift_handler.py +1 -1
  38. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +18 -11
  39. mindsdb/integrations/libs/llm/config.py +11 -1
  40. mindsdb/integrations/libs/llm/utils.py +12 -0
  41. mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -2
  42. mindsdb/integrations/libs/response.py +9 -4
  43. mindsdb/integrations/libs/vectordatabase_handler.py +17 -5
  44. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +8 -98
  45. mindsdb/interfaces/agents/constants.py +12 -1
  46. mindsdb/interfaces/agents/langchain_agent.py +6 -0
  47. mindsdb/interfaces/database/log.py +8 -9
  48. mindsdb/interfaces/database/projects.py +1 -5
  49. mindsdb/interfaces/functions/controller.py +59 -17
  50. mindsdb/interfaces/functions/to_markdown.py +194 -0
  51. mindsdb/interfaces/jobs/jobs_controller.py +3 -3
  52. mindsdb/interfaces/knowledge_base/controller.py +223 -97
  53. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +3 -14
  54. mindsdb/interfaces/query_context/context_controller.py +224 -1
  55. mindsdb/interfaces/storage/db.py +23 -0
  56. mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py +45 -0
  57. mindsdb/utilities/context_executor.py +1 -1
  58. mindsdb/utilities/partitioning.py +35 -20
  59. {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/METADATA +227 -224
  60. {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/RECORD +63 -59
  61. {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/WHEEL +0 -0
  62. {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/licenses/LICENSE +0 -0
  63. {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from mindsdb_sql_parser.ast import (
14
14
  from mindsdb.utilities.exception import EntityNotExistsError
15
15
  from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
16
16
  from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
17
+ from mindsdb.api.executor.datahub.classes.response import DataHubResponse
17
18
  from mindsdb.utilities.partitioning import process_dataframe_in_partitions
18
19
 
19
20
 
@@ -45,10 +46,6 @@ class ProjectDataNode(DataNode):
45
46
  result = [TablesRow.from_dict(row) for row in tables]
46
47
  return result
47
48
 
48
- def has_table(self, table_name):
49
- tables = self.project.get_tables()
50
- return table_name in tables
51
-
52
49
  def get_table_columns(self, table_name, schema_name=None):
53
50
  return [
54
51
  {'name': name}
@@ -71,7 +68,7 @@ class ProjectDataNode(DataNode):
71
68
 
72
69
  return ml_handler.predict(model_name, df, project_name=self.project.name, version=version, params=params)
73
70
 
74
- def query(self, query=None, native_query=None, session=None):
71
+ def query(self, query=None, native_query=None, session=None) -> DataHubResponse:
75
72
  if query is None and native_query is not None:
76
73
  query = parse_sql(native_query)
77
74
 
@@ -81,7 +78,7 @@ class ProjectDataNode(DataNode):
81
78
  if kb_table:
82
79
  # this is the knowledge db
83
80
  kb_table.update_query(query)
84
- return pd.DataFrame(), []
81
+ return DataHubResponse()
85
82
 
86
83
  raise NotImplementedError(f"Can't update object: {query_table}")
87
84
 
@@ -91,7 +88,7 @@ class ProjectDataNode(DataNode):
91
88
  if kb_table:
92
89
  # this is the knowledge db
93
90
  kb_table.delete_query(query)
94
- return pd.DataFrame(), []
91
+ return DataHubResponse()
95
92
 
96
93
  raise NotImplementedError(f"Can't delete object: {query_table}")
97
94
 
@@ -111,8 +108,7 @@ class ProjectDataNode(DataNode):
111
108
  new_query.where,
112
109
  project_filter
113
110
  ])
114
- df, columns_info = self.information_schema.query(new_query)
115
- return df, columns_info
111
+ return self.information_schema.query(new_query)
116
112
  # endregion
117
113
 
118
114
  # other table from project
@@ -121,15 +117,15 @@ class ProjectDataNode(DataNode):
121
117
  # this is the view
122
118
  df = self.project.query_view(query, session)
123
119
 
124
- columns_info = [
125
- {
126
- 'name': k,
127
- 'type': v
128
- }
129
- for k, v in df.dtypes.items()
130
- ]
120
+ columns_info = [{
121
+ 'name': k,
122
+ 'type': v
123
+ } for k, v in df.dtypes.items()]
131
124
 
132
- return df, columns_info
125
+ return DataHubResponse(
126
+ data_frame=df,
127
+ columns=columns_info
128
+ )
133
129
 
134
130
  kb_table = session.kb_controller.get_table(query_table, self.project.id)
135
131
  if kb_table:
@@ -143,13 +139,16 @@ class ProjectDataNode(DataNode):
143
139
  for k, v in df.dtypes.items()
144
140
  ]
145
141
 
146
- return df, columns_info
142
+ return DataHubResponse(
143
+ data_frame=df,
144
+ columns=columns_info
145
+ )
147
146
 
148
147
  raise EntityNotExistsError(f"Can't select from {query_table} in project")
149
148
  else:
150
149
  raise NotImplementedError(f"Query not supported {query}")
151
150
 
152
- def create_table(self, table_name: Identifier, result_set=None, is_replace=False, **kwargs):
151
+ def create_table(self, table_name: Identifier, result_set=None, is_replace=False, **kwargs) -> DataHubResponse:
153
152
  # is_create - create table
154
153
  # is_replace - drop table if exists
155
154
  # is_create==False and is_replace==False: just insert
@@ -165,5 +164,6 @@ class ProjectDataNode(DataNode):
165
164
  kb_table.clear()
166
165
 
167
166
  df = result_set.to_df()
168
- return kb_table.insert(df)
167
+ kb_table.insert(df)
168
+ return DataHubResponse()
169
169
  raise NotImplementedError(f"Can't create table {table_name}")
@@ -119,7 +119,7 @@ class PlanJoinTablesQuery:
119
119
  query2.from_table = None
120
120
  query2.using = None
121
121
  query2.cte = None
122
- sup_select = QueryStep(query2, from_table=join_step.result)
122
+ sup_select = QueryStep(query2, from_table=join_step.result, strict_where=False)
123
123
  self.planner.plan.add_step(sup_select)
124
124
  return sup_select
125
125
  return join_step
@@ -423,7 +423,7 @@ class PlanJoinTablesQuery:
423
423
  else:
424
424
  query2.where = cond
425
425
 
426
- step = self.planner.get_integration_select_step(query2)
426
+ step = self.planner.get_integration_select_step(query2, params=query_in.using)
427
427
  self.tables_fetch_step[item.index] = step
428
428
 
429
429
  self.add_plan_step(step)
@@ -2,6 +2,7 @@
2
2
  class QueryPlan:
3
3
  def __init__(self, steps=None, **kwargs):
4
4
  self.steps = []
5
+ self.is_resumable = False
5
6
 
6
7
  if steps:
7
8
  for step in steps:
@@ -12,14 +12,13 @@ from mindsdb.api.executor.planner.exceptions import PlanningException
12
12
  from mindsdb.api.executor.planner import utils
13
13
  from mindsdb.api.executor.planner.query_plan import QueryPlan
14
14
  from mindsdb.api.executor.planner.steps import (
15
- FetchDataframeStep, ProjectStep, ApplyPredictorStep,
15
+ PlanStep, FetchDataframeStep, ProjectStep, ApplyPredictorStep,
16
16
  ApplyPredictorRowStep, UnionStep, GetPredictorColumns, SaveToTable,
17
- InsertToTable, UpdateToTable, SubSelectStep, QueryStep,
18
- DeleteStep, DataStep, CreateTableStep
17
+ InsertToTable, UpdateToTable, SubSelectStep, QueryStep, JoinStep,
18
+ DeleteStep, DataStep, CreateTableStep, FetchDataframeStepPartition
19
19
  )
20
20
  from mindsdb.api.executor.planner.utils import (
21
21
  disambiguate_predictor_column_identifier,
22
- get_deepest_select,
23
22
  recursively_extract_column_values,
24
23
  query_traversal, filters_to_bin_op
25
24
  )
@@ -166,7 +165,11 @@ class QueryPlanner:
166
165
 
167
166
  query_traversal(query, _prepare_integration_select)
168
167
 
169
- def get_integration_select_step(self, select):
168
+ def get_integration_select_step(self, select: Select, params: dict = None) -> PlanStep:
169
+ """
170
+ Generate planner step to execute query over integration or over results of previous step (if it is CTE)
171
+ """
172
+
170
173
  if isinstance(select.from_table, NativeQuery):
171
174
  integration_name = select.from_table.integration.parts[-1]
172
175
  else:
@@ -188,12 +191,22 @@ class QueryPlanner:
188
191
  if fetch_df_select.using is not None:
189
192
  fetch_df_select.using = None
190
193
 
191
- return FetchDataframeStep(integration=integration_name, query=fetch_df_select)
194
+ if params:
195
+ fetch_params = params.copy()
196
+ # remove partition parameters
197
+ for key in ('batch_size', 'track_column'):
198
+ if key in params:
199
+ del params[key]
200
+ if 'track_column' in fetch_params and isinstance(fetch_params['track_column'], Identifier):
201
+ fetch_params['track_column'] = fetch_params['track_column'].parts[-1]
202
+ else:
203
+ fetch_params = None
204
+ return FetchDataframeStep(integration=integration_name, query=fetch_df_select, params=fetch_params)
192
205
 
193
206
  def plan_integration_select(self, select):
194
207
  """Plan for a select query that can be fully executed in an integration"""
195
208
 
196
- return self.plan.add_step(self.get_integration_select_step(select))
209
+ return self.plan.add_step(self.get_integration_select_step(select, params=select.using))
197
210
 
198
211
  def resolve_database_table(self, node: Identifier):
199
212
  # resolves integration name and table name
@@ -414,12 +427,6 @@ class QueryPlanner:
414
427
 
415
428
  return self.plan_mdb_nested_select(select)
416
429
 
417
- def plan_integration_nested_select(self, select, integration_name):
418
- fetch_df_select = copy.deepcopy(select)
419
- deepest_select = get_deepest_select(fetch_df_select)
420
- self.prepare_integration_select(integration_name, deepest_select)
421
- return self.plan.add_step(FetchDataframeStep(integration=integration_name, query=fetch_df_select))
422
-
423
430
  def plan_mdb_nested_select(self, select):
424
431
  # plan nested select
425
432
 
@@ -818,7 +825,72 @@ class QueryPlanner:
818
825
  else:
819
826
  raise PlanningException(f'Unsupported query type {type(query)}')
820
827
 
821
- return self.plan
828
+ plan = self.handle_partitioning(self.plan)
829
+
830
+ return plan
831
+
832
+ def handle_partitioning(self, plan: QueryPlan) -> QueryPlan:
833
+ """
834
+ If plan has fetching in partitions:
835
+ try to rebuild plan to send fetched chunk of data through the following steps, if it is possible
836
+ """
837
+
838
+ # handle fetchdataframe partitioning
839
+ steps_out = []
840
+
841
+ partition_step = None
842
+ for step in plan.steps:
843
+ if isinstance(step, FetchDataframeStep) and step.params is not None:
844
+ batch_size = step.params.get('batch_size')
845
+ if batch_size is not None:
846
+ # found batched fetch
847
+ partition_step = FetchDataframeStepPartition(
848
+ step_num=step.step_num,
849
+ integration=step.integration,
850
+ query=step.query,
851
+ raw_query=step.raw_query,
852
+ params=step.params
853
+ )
854
+ steps_out.append(partition_step)
855
+ # mark plan
856
+ plan.is_resumable = True
857
+ continue
858
+ else:
859
+ step.params = None
860
+
861
+ if partition_step is not None:
862
+ # check and add step into partition
863
+
864
+ can_be_partitioned = False
865
+ if isinstance(step, (JoinStep, ApplyPredictorStep, InsertToTable)):
866
+ can_be_partitioned = True
867
+ elif isinstance(step, QueryStep):
868
+ query = step.query
869
+ if (
870
+ query.group_by is None and query.order_by is None and query.distinct is False
871
+ and query.limit is None and query.offset is None
872
+ ):
873
+ no_identifiers = [
874
+ target
875
+ for target in step.query.targets
876
+ if not isinstance(target, (Star, Identifier))
877
+ ]
878
+ if len(no_identifiers) == 0:
879
+ can_be_partitioned = True
880
+
881
+ if not can_be_partitioned:
882
+ if len(partition_step.steps) == 0:
883
+ # Nothing can be partitioned, failback to old plan
884
+ plan.is_resumable = False
885
+ return plan
886
+ partition_step = None
887
+ else:
888
+ partition_step.steps.append(step)
889
+ continue
890
+
891
+ steps_out.append(step)
892
+ plan.steps = steps_out
893
+ return plan
822
894
 
823
895
  def prepare_steps(self, query):
824
896
  statement_planner = PreparedStatementPlanner(self)
@@ -104,11 +104,19 @@ class LimitOffsetStep(PlanStep):
104
104
 
105
105
  class FetchDataframeStep(PlanStep):
106
106
  """Fetches a dataframe from external integration"""
107
- def __init__(self, integration, query=None, raw_query=None, *args, **kwargs):
107
+ def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs):
108
108
  super().__init__(*args, **kwargs)
109
109
  self.integration = integration
110
110
  self.query = query
111
111
  self.raw_query = raw_query
112
+ self.params = params
113
+
114
+
115
+ class FetchDataframeStepPartition(FetchDataframeStep):
116
+ """Fetches a dataframe from external integration in partitions"""
117
+ def __init__(self, *args, **kwargs):
118
+ super().__init__(*args, **kwargs)
119
+ self.steps = []
112
120
 
113
121
 
114
122
  class ApplyPredictorStep(PlanStep):
@@ -249,11 +257,12 @@ class SubSelectStep(PlanStep):
249
257
 
250
258
 
251
259
  class QueryStep(PlanStep):
252
- def __init__(self, query, from_table=None, *args, **kwargs):
260
+ def __init__(self, query, from_table=None, *args, strict_where=True, **kwargs):
253
261
  """Performs query using injected dataframe"""
254
262
  super().__init__(*args, **kwargs)
255
263
  self.query = query
256
264
  self.from_table = from_table
265
+ self.strict_where = strict_where
257
266
 
258
267
 
259
268
  class DataStep(PlanStep):
@@ -50,13 +50,14 @@ def rename_df_columns(df: pd.DataFrame, names: Optional[List] = None) -> None:
50
50
 
51
51
 
52
52
  class ResultSet:
53
- def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None):
54
- '''
55
-
56
- :param columns: list of Columns
57
- :param values: data of resultSet, have to be list of lists with length equal to column
58
- :param df: injected dataframe, have to have enumerated columns and length equal to columns
59
- '''
53
+ def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None, affected_rows: int = None):
54
+ """
55
+ Args:
56
+ columns: list of Columns
57
+ values (List[List]): data of resultSet, have to be list of lists with length equal to column
58
+ df (pd.DataFrame): injected dataframe, have to have enumerated columns and length equal to columns
59
+ affected_rows (int): number of affected rows
60
+ """
60
61
  if columns is None:
61
62
  columns = []
62
63
  self._columns = columns
@@ -67,6 +68,8 @@ class ResultSet:
67
68
  df = pd.DataFrame(values)
68
69
  self._df = df
69
70
 
71
+ self.affected_rows = affected_rows
72
+
70
73
  self.is_prediction = False
71
74
 
72
75
  def __repr__(self):
@@ -8,11 +8,11 @@
8
8
  * permission of MindsDB Inc
9
9
  *******************************************************
10
10
  """
11
- import re
12
11
  import inspect
13
12
  from textwrap import dedent
13
+ from typing import Union, Dict
14
14
 
15
- from mindsdb_sql_parser import parse_sql
15
+ from mindsdb_sql_parser import parse_sql, ASTNode
16
16
  from mindsdb.api.executor.planner.steps import (
17
17
  ApplyTimeseriesPredictorStep,
18
18
  ApplyPredictorRowStep,
@@ -23,7 +23,7 @@ from mindsdb.api.executor.planner.exceptions import PlanningException
23
23
  from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
24
24
  from mindsdb.api.executor.planner import query_planner
25
25
 
26
- from mindsdb.api.executor.utilities.sql import query_df, get_query_models
26
+ from mindsdb.api.executor.utilities.sql import get_query_models
27
27
  from mindsdb.interfaces.model.functions import get_model_record
28
28
  from mindsdb.api.executor.exceptions import (
29
29
  BadTableError,
@@ -33,19 +33,21 @@ from mindsdb.api.executor.exceptions import (
33
33
  import mindsdb.utilities.profiler as profiler
34
34
  from mindsdb.utilities.fs import create_process_mark, delete_process_mark
35
35
  from mindsdb.utilities.exception import EntityNotExistsError
36
+ from mindsdb.interfaces.query_context.context_controller import query_context_controller
37
+ from mindsdb.utilities.context import context as ctx
38
+
36
39
 
37
40
  from . import steps
38
41
  from .result_set import ResultSet, Column
39
42
  from . steps.base import BaseStepCall
40
43
 
41
- superset_subquery = re.compile(r'from[\s\n]*(\(.*\))[\s\n]*as[\s\n]*virtual_table', flags=re.IGNORECASE | re.MULTILINE | re.S)
42
-
43
44
 
44
45
  class SQLQuery:
45
46
 
46
47
  step_handlers = {}
47
48
 
48
- def __init__(self, sql, session, execute=True, database=None):
49
+ def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
50
+ database: str = None, query_id: int = None):
49
51
  self.session = session
50
52
 
51
53
  if database is not None:
@@ -59,23 +61,22 @@ class SQLQuery:
59
61
  }
60
62
 
61
63
  self.columns_list = None
62
- self.steps_data = {}
64
+ self.steps_data: Dict[int, ResultSet] = {}
63
65
 
64
- self.planner = None
66
+ self.planner: query_planner.QueryPlanner = None
65
67
  self.parameters = []
66
- self.fetched_data = None
68
+ self.fetched_data: ResultSet = None
67
69
 
68
70
  self.outer_query = None
71
+ self.run_query = None
72
+ self.query_id = query_id
73
+ if query_id is not None:
74
+ # resume query
75
+ run_query = query_context_controller.get_query(self.query_id)
76
+ run_query.clear_error()
77
+ sql = run_query.sql
69
78
 
70
79
  if isinstance(sql, str):
71
- # region workaround for subqueries in superset
72
- if 'as virtual_table' in sql.lower():
73
- subquery = re.findall(superset_subquery, sql)
74
- if isinstance(subquery, list) and len(subquery) == 1:
75
- subquery = subquery[0]
76
- self.outer_query = sql.replace(subquery, 'dataframe')
77
- sql = subquery.strip('()')
78
- # endregion
79
80
  self.query = parse_sql(sql)
80
81
  self.context['query_str'] = sql
81
82
  else:
@@ -89,7 +90,6 @@ class SQLQuery:
89
90
  self.create_planner()
90
91
 
91
92
  if execute:
92
- self.prepare_query(prepare=False)
93
93
  self.execute_query()
94
94
 
95
95
  @classmethod
@@ -190,63 +190,62 @@ class SQLQuery:
190
190
  default_namespace=database,
191
191
  )
192
192
 
193
- def fetch(self, view='result_set'):
194
- data = self.fetched_data
195
-
196
- if view == 'dataframe':
197
- result = data.to_df()
198
- else:
199
- result = data
200
-
201
- return {
202
- 'success': True,
203
- 'result': result
204
- }
193
+ def prepare_query(self):
194
+ """it is prepared statement call
195
+ """
196
+ try:
197
+ for step in self.planner.prepare_steps(self.query):
198
+ data = self.execute_step(step)
199
+ step.set_result(data)
200
+ self.steps_data[step.step_num] = data
201
+ except PlanningException as e:
202
+ raise LogicError(e)
205
203
 
206
- def prepare_query(self, prepare=True):
207
- if prepare:
208
- # it is prepared statement call
209
- try:
210
- for step in self.planner.prepare_steps(self.query):
211
- data = self.execute_step(step)
212
- step.set_result(data)
213
- self.steps_data[step.step_num] = data
214
- except PlanningException as e:
215
- raise LogicError(e)
216
-
217
- statement_info = self.planner.get_statement_info()
218
-
219
- self.columns_list = []
220
- for col in statement_info['columns']:
221
- self.columns_list.append(
222
- Column(
223
- database=col['ds'],
224
- table_name=col['table_name'],
225
- table_alias=col['table_alias'],
226
- name=col['name'],
227
- alias=col['alias'],
228
- type=col['type']
229
- )
230
- )
204
+ statement_info = self.planner.get_statement_info()
231
205
 
232
- self.parameters = [
206
+ self.columns_list = []
207
+ for col in statement_info['columns']:
208
+ self.columns_list.append(
233
209
  Column(
210
+ database=col['ds'],
211
+ table_name=col['table_name'],
212
+ table_alias=col['table_alias'],
234
213
  name=col['name'],
235
214
  alias=col['alias'],
236
215
  type=col['type']
237
216
  )
238
- for col in statement_info['parameters']
239
- ]
240
-
241
- def execute_query(self, params=None):
217
+ )
218
+
219
+ self.parameters = [
220
+ Column(
221
+ name=col['name'],
222
+ alias=col['alias'],
223
+ type=col['type']
224
+ )
225
+ for col in statement_info['parameters']
226
+ ]
227
+
228
+ def execute_query(self):
242
229
  if self.fetched_data is not None:
243
230
  # no need to execute
244
231
  return
245
232
 
233
+ try:
234
+ steps = list(self.planner.execute_steps())
235
+ except PlanningException as e:
236
+ raise LogicError(e)
237
+
238
+ if self.planner.plan.is_resumable:
239
+ # create query
240
+ if self.query_id is not None:
241
+ self.run_query = query_context_controller.get_query(self.query_id)
242
+ else:
243
+ self.run_query = query_context_controller.create_query(self.context['query_str'])
244
+ ctx.run_query_id = self.run_query.record.id
245
+
246
246
  step_result = None
247
247
  process_mark = None
248
248
  try:
249
- steps = list(self.planner.execute_steps(params))
250
249
  steps_classes = (x.__class__ for x in steps)
251
250
  predict_steps = (ApplyPredictorRowStep, ApplyPredictorStep, ApplyTimeseriesPredictorStep)
252
251
  if any(s in predict_steps for s in steps_classes):
@@ -255,10 +254,16 @@ class SQLQuery:
255
254
  with profiler.Context(f'step: {step.__class__.__name__}'):
256
255
  step_result = self.execute_step(step)
257
256
  self.steps_data[step.step_num] = step_result
258
- except PlanningException as e:
259
- raise LogicError(e)
260
257
  except Exception as e:
258
+ if self.run_query is not None:
259
+ # set error and place where it stopped
260
+ self.run_query.on_error(e, step.step_num, self.steps_data)
261
261
  raise e
262
+ else:
263
+ # mark running query as completed
264
+ if self.run_query is not None:
265
+ self.run_query.finish()
266
+ ctx.run_query_id = None
262
267
  finally:
263
268
  if process_mark is not None:
264
269
  delete_process_mark('predict', process_mark)
@@ -270,27 +275,7 @@ class SQLQuery:
270
275
  if len(self.steps_data) == 0:
271
276
  return
272
277
 
273
- try:
274
- if self.outer_query is not None:
275
- # workaround for subqueries in superset. remove it?
276
- # +++
277
- # ???
278
-
279
- result = step_result
280
- df = result.to_df()
281
-
282
- df2 = query_df(df, self.outer_query)
283
-
284
- result2 = ResultSet().from_df(df2, database='', table_name='')
285
-
286
- self.columns_list = result2.columns
287
- self.fetched_data = result2
288
-
289
- else:
290
- result = step_result
291
- self.fetched_data = result
292
- except Exception as e:
293
- raise UnknownError("error in preparing result query step") from e
278
+ self.fetched_data = step_result
294
279
 
295
280
  try:
296
281
  if hasattr(self, 'columns_list') is False:
@@ -1,6 +1,7 @@
1
1
  from .apply_predictor_step import ApplyPredictorStepCall, ApplyPredictorRowStepCall, ApplyTimeseriesPredictorStepCall
2
2
  from .delete_step import DeleteStepCall
3
3
  from .fetch_dataframe import FetchDataframeStepCall
4
+ from .fetch_dataframe_partition import FetchDataframePartitionCall
4
5
  from .insert_step import InsertToTableCall, SaveToTableCall, CreateTableCall
5
6
  from .join_step import JoinStepCall
6
7
  from .map_reduce_step import MapReduceStepCall
@@ -44,6 +44,5 @@ class DeleteStepCall(BaseStepCall):
44
44
 
45
45
  query_traversal(query.where, fill_params)
46
46
 
47
- dn.query(query=query, session=self.session)
48
-
49
- return ResultSet()
47
+ response = dn.query(query=query, session=self.session)
48
+ return ResultSet(affected_rows=response.affected_rows)
@@ -89,10 +89,11 @@ class FetchDataframeStepCall(BaseStepCall):
89
89
  table_alias = (self.context.get('database'), 'result', 'result')
90
90
 
91
91
  # fetch raw_query
92
- df, columns_info = dn.query(
92
+ response = dn.query(
93
93
  native_query=step.raw_query,
94
94
  session=self.session
95
95
  )
96
+ df = response.data_frame
96
97
  else:
97
98
  table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
98
99
 
@@ -104,13 +105,14 @@ class FetchDataframeStepCall(BaseStepCall):
104
105
 
105
106
  query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session)
106
107
 
107
- df, columns_info = dn.query(
108
+ response = dn.query(
108
109
  query=query,
109
110
  session=self.session
110
111
  )
112
+ df = response.data_frame
111
113
 
112
114
  if context_callback:
113
- context_callback(df, columns_info)
115
+ context_callback(df, response.columns)
114
116
 
115
117
  result = ResultSet()
116
118