MindsDB 25.4.1.0__py3-none-any.whl → 25.4.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +91 -61
- mindsdb/api/executor/data_types/answer.py +9 -12
- mindsdb/api/executor/datahub/classes/response.py +11 -0
- mindsdb/api/executor/datahub/datanodes/datanode.py +4 -4
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +10 -11
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +22 -16
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +43 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +20 -20
- mindsdb/api/executor/planner/plan_join.py +2 -2
- mindsdb/api/executor/planner/query_plan.py +1 -0
- mindsdb/api/executor/planner/query_planner.py +86 -14
- mindsdb/api/executor/planner/steps.py +11 -2
- mindsdb/api/executor/sql_query/result_set.py +10 -7
- mindsdb/api/executor/sql_query/sql_query.py +69 -84
- mindsdb/api/executor/sql_query/steps/__init__.py +1 -0
- mindsdb/api/executor/sql_query/steps/delete_step.py +2 -3
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +5 -3
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +288 -0
- mindsdb/api/executor/sql_query/steps/insert_step.py +2 -2
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +20 -8
- mindsdb/api/executor/sql_query/steps/update_step.py +4 -6
- mindsdb/api/http/namespaces/sql.py +4 -1
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/ok_packet.py +1 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +4 -27
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +1 -0
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +38 -37
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +23 -13
- mindsdb/integrations/handlers/langchain_embedding_handler/langchain_embedding_handler.py +17 -16
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +1 -0
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +1 -1
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +3 -2
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +4 -4
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +26 -16
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +36 -7
- mindsdb/integrations/handlers/redshift_handler/redshift_handler.py +1 -1
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +18 -11
- mindsdb/integrations/libs/llm/config.py +11 -1
- mindsdb/integrations/libs/llm/utils.py +12 -0
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +1 -2
- mindsdb/integrations/libs/response.py +9 -4
- mindsdb/integrations/libs/vectordatabase_handler.py +17 -5
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +8 -98
- mindsdb/interfaces/agents/constants.py +12 -1
- mindsdb/interfaces/agents/langchain_agent.py +6 -0
- mindsdb/interfaces/database/log.py +8 -9
- mindsdb/interfaces/database/projects.py +1 -5
- mindsdb/interfaces/functions/controller.py +59 -17
- mindsdb/interfaces/functions/to_markdown.py +194 -0
- mindsdb/interfaces/jobs/jobs_controller.py +3 -3
- mindsdb/interfaces/knowledge_base/controller.py +223 -97
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +3 -14
- mindsdb/interfaces/query_context/context_controller.py +224 -1
- mindsdb/interfaces/storage/db.py +23 -0
- mindsdb/migrations/versions/2025-03-21_fda503400e43_queries.py +45 -0
- mindsdb/utilities/context_executor.py +1 -1
- mindsdb/utilities/partitioning.py +35 -20
- {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/METADATA +227 -224
- {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/RECORD +63 -59
- {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/WHEEL +0 -0
- {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.4.1.0.dist-info → mindsdb-25.4.2.1.dist-info}/top_level.txt +0 -0
|
@@ -14,6 +14,7 @@ from mindsdb_sql_parser.ast import (
|
|
|
14
14
|
from mindsdb.utilities.exception import EntityNotExistsError
|
|
15
15
|
from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
|
|
16
16
|
from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
|
|
17
|
+
from mindsdb.api.executor.datahub.classes.response import DataHubResponse
|
|
17
18
|
from mindsdb.utilities.partitioning import process_dataframe_in_partitions
|
|
18
19
|
|
|
19
20
|
|
|
@@ -45,10 +46,6 @@ class ProjectDataNode(DataNode):
|
|
|
45
46
|
result = [TablesRow.from_dict(row) for row in tables]
|
|
46
47
|
return result
|
|
47
48
|
|
|
48
|
-
def has_table(self, table_name):
|
|
49
|
-
tables = self.project.get_tables()
|
|
50
|
-
return table_name in tables
|
|
51
|
-
|
|
52
49
|
def get_table_columns(self, table_name, schema_name=None):
|
|
53
50
|
return [
|
|
54
51
|
{'name': name}
|
|
@@ -71,7 +68,7 @@ class ProjectDataNode(DataNode):
|
|
|
71
68
|
|
|
72
69
|
return ml_handler.predict(model_name, df, project_name=self.project.name, version=version, params=params)
|
|
73
70
|
|
|
74
|
-
def query(self, query=None, native_query=None, session=None):
|
|
71
|
+
def query(self, query=None, native_query=None, session=None) -> DataHubResponse:
|
|
75
72
|
if query is None and native_query is not None:
|
|
76
73
|
query = parse_sql(native_query)
|
|
77
74
|
|
|
@@ -81,7 +78,7 @@ class ProjectDataNode(DataNode):
|
|
|
81
78
|
if kb_table:
|
|
82
79
|
# this is the knowledge db
|
|
83
80
|
kb_table.update_query(query)
|
|
84
|
-
return
|
|
81
|
+
return DataHubResponse()
|
|
85
82
|
|
|
86
83
|
raise NotImplementedError(f"Can't update object: {query_table}")
|
|
87
84
|
|
|
@@ -91,7 +88,7 @@ class ProjectDataNode(DataNode):
|
|
|
91
88
|
if kb_table:
|
|
92
89
|
# this is the knowledge db
|
|
93
90
|
kb_table.delete_query(query)
|
|
94
|
-
return
|
|
91
|
+
return DataHubResponse()
|
|
95
92
|
|
|
96
93
|
raise NotImplementedError(f"Can't delete object: {query_table}")
|
|
97
94
|
|
|
@@ -111,8 +108,7 @@ class ProjectDataNode(DataNode):
|
|
|
111
108
|
new_query.where,
|
|
112
109
|
project_filter
|
|
113
110
|
])
|
|
114
|
-
|
|
115
|
-
return df, columns_info
|
|
111
|
+
return self.information_schema.query(new_query)
|
|
116
112
|
# endregion
|
|
117
113
|
|
|
118
114
|
# other table from project
|
|
@@ -121,15 +117,15 @@ class ProjectDataNode(DataNode):
|
|
|
121
117
|
# this is the view
|
|
122
118
|
df = self.project.query_view(query, session)
|
|
123
119
|
|
|
124
|
-
columns_info = [
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
}
|
|
129
|
-
for k, v in df.dtypes.items()
|
|
130
|
-
]
|
|
120
|
+
columns_info = [{
|
|
121
|
+
'name': k,
|
|
122
|
+
'type': v
|
|
123
|
+
} for k, v in df.dtypes.items()]
|
|
131
124
|
|
|
132
|
-
return
|
|
125
|
+
return DataHubResponse(
|
|
126
|
+
data_frame=df,
|
|
127
|
+
columns=columns_info
|
|
128
|
+
)
|
|
133
129
|
|
|
134
130
|
kb_table = session.kb_controller.get_table(query_table, self.project.id)
|
|
135
131
|
if kb_table:
|
|
@@ -143,13 +139,16 @@ class ProjectDataNode(DataNode):
|
|
|
143
139
|
for k, v in df.dtypes.items()
|
|
144
140
|
]
|
|
145
141
|
|
|
146
|
-
return
|
|
142
|
+
return DataHubResponse(
|
|
143
|
+
data_frame=df,
|
|
144
|
+
columns=columns_info
|
|
145
|
+
)
|
|
147
146
|
|
|
148
147
|
raise EntityNotExistsError(f"Can't select from {query_table} in project")
|
|
149
148
|
else:
|
|
150
149
|
raise NotImplementedError(f"Query not supported {query}")
|
|
151
150
|
|
|
152
|
-
def create_table(self, table_name: Identifier, result_set=None, is_replace=False, **kwargs):
|
|
151
|
+
def create_table(self, table_name: Identifier, result_set=None, is_replace=False, **kwargs) -> DataHubResponse:
|
|
153
152
|
# is_create - create table
|
|
154
153
|
# is_replace - drop table if exists
|
|
155
154
|
# is_create==False and is_replace==False: just insert
|
|
@@ -165,5 +164,6 @@ class ProjectDataNode(DataNode):
|
|
|
165
164
|
kb_table.clear()
|
|
166
165
|
|
|
167
166
|
df = result_set.to_df()
|
|
168
|
-
|
|
167
|
+
kb_table.insert(df)
|
|
168
|
+
return DataHubResponse()
|
|
169
169
|
raise NotImplementedError(f"Can't create table {table_name}")
|
|
@@ -119,7 +119,7 @@ class PlanJoinTablesQuery:
|
|
|
119
119
|
query2.from_table = None
|
|
120
120
|
query2.using = None
|
|
121
121
|
query2.cte = None
|
|
122
|
-
sup_select = QueryStep(query2, from_table=join_step.result)
|
|
122
|
+
sup_select = QueryStep(query2, from_table=join_step.result, strict_where=False)
|
|
123
123
|
self.planner.plan.add_step(sup_select)
|
|
124
124
|
return sup_select
|
|
125
125
|
return join_step
|
|
@@ -423,7 +423,7 @@ class PlanJoinTablesQuery:
|
|
|
423
423
|
else:
|
|
424
424
|
query2.where = cond
|
|
425
425
|
|
|
426
|
-
step = self.planner.get_integration_select_step(query2)
|
|
426
|
+
step = self.planner.get_integration_select_step(query2, params=query_in.using)
|
|
427
427
|
self.tables_fetch_step[item.index] = step
|
|
428
428
|
|
|
429
429
|
self.add_plan_step(step)
|
|
@@ -12,14 +12,13 @@ from mindsdb.api.executor.planner.exceptions import PlanningException
|
|
|
12
12
|
from mindsdb.api.executor.planner import utils
|
|
13
13
|
from mindsdb.api.executor.planner.query_plan import QueryPlan
|
|
14
14
|
from mindsdb.api.executor.planner.steps import (
|
|
15
|
-
FetchDataframeStep, ProjectStep, ApplyPredictorStep,
|
|
15
|
+
PlanStep, FetchDataframeStep, ProjectStep, ApplyPredictorStep,
|
|
16
16
|
ApplyPredictorRowStep, UnionStep, GetPredictorColumns, SaveToTable,
|
|
17
|
-
InsertToTable, UpdateToTable, SubSelectStep, QueryStep,
|
|
18
|
-
DeleteStep, DataStep, CreateTableStep
|
|
17
|
+
InsertToTable, UpdateToTable, SubSelectStep, QueryStep, JoinStep,
|
|
18
|
+
DeleteStep, DataStep, CreateTableStep, FetchDataframeStepPartition
|
|
19
19
|
)
|
|
20
20
|
from mindsdb.api.executor.planner.utils import (
|
|
21
21
|
disambiguate_predictor_column_identifier,
|
|
22
|
-
get_deepest_select,
|
|
23
22
|
recursively_extract_column_values,
|
|
24
23
|
query_traversal, filters_to_bin_op
|
|
25
24
|
)
|
|
@@ -166,7 +165,11 @@ class QueryPlanner:
|
|
|
166
165
|
|
|
167
166
|
query_traversal(query, _prepare_integration_select)
|
|
168
167
|
|
|
169
|
-
def get_integration_select_step(self, select):
|
|
168
|
+
def get_integration_select_step(self, select: Select, params: dict = None) -> PlanStep:
|
|
169
|
+
"""
|
|
170
|
+
Generate planner step to execute query over integration or over results of previous step (if it is CTE)
|
|
171
|
+
"""
|
|
172
|
+
|
|
170
173
|
if isinstance(select.from_table, NativeQuery):
|
|
171
174
|
integration_name = select.from_table.integration.parts[-1]
|
|
172
175
|
else:
|
|
@@ -188,12 +191,22 @@ class QueryPlanner:
|
|
|
188
191
|
if fetch_df_select.using is not None:
|
|
189
192
|
fetch_df_select.using = None
|
|
190
193
|
|
|
191
|
-
|
|
194
|
+
if params:
|
|
195
|
+
fetch_params = params.copy()
|
|
196
|
+
# remove partition parameters
|
|
197
|
+
for key in ('batch_size', 'track_column'):
|
|
198
|
+
if key in params:
|
|
199
|
+
del params[key]
|
|
200
|
+
if 'track_column' in fetch_params and isinstance(fetch_params['track_column'], Identifier):
|
|
201
|
+
fetch_params['track_column'] = fetch_params['track_column'].parts[-1]
|
|
202
|
+
else:
|
|
203
|
+
fetch_params = None
|
|
204
|
+
return FetchDataframeStep(integration=integration_name, query=fetch_df_select, params=fetch_params)
|
|
192
205
|
|
|
193
206
|
def plan_integration_select(self, select):
|
|
194
207
|
"""Plan for a select query that can be fully executed in an integration"""
|
|
195
208
|
|
|
196
|
-
return self.plan.add_step(self.get_integration_select_step(select))
|
|
209
|
+
return self.plan.add_step(self.get_integration_select_step(select, params=select.using))
|
|
197
210
|
|
|
198
211
|
def resolve_database_table(self, node: Identifier):
|
|
199
212
|
# resolves integration name and table name
|
|
@@ -414,12 +427,6 @@ class QueryPlanner:
|
|
|
414
427
|
|
|
415
428
|
return self.plan_mdb_nested_select(select)
|
|
416
429
|
|
|
417
|
-
def plan_integration_nested_select(self, select, integration_name):
|
|
418
|
-
fetch_df_select = copy.deepcopy(select)
|
|
419
|
-
deepest_select = get_deepest_select(fetch_df_select)
|
|
420
|
-
self.prepare_integration_select(integration_name, deepest_select)
|
|
421
|
-
return self.plan.add_step(FetchDataframeStep(integration=integration_name, query=fetch_df_select))
|
|
422
|
-
|
|
423
430
|
def plan_mdb_nested_select(self, select):
|
|
424
431
|
# plan nested select
|
|
425
432
|
|
|
@@ -818,7 +825,72 @@ class QueryPlanner:
|
|
|
818
825
|
else:
|
|
819
826
|
raise PlanningException(f'Unsupported query type {type(query)}')
|
|
820
827
|
|
|
821
|
-
|
|
828
|
+
plan = self.handle_partitioning(self.plan)
|
|
829
|
+
|
|
830
|
+
return plan
|
|
831
|
+
|
|
832
|
+
def handle_partitioning(self, plan: QueryPlan) -> QueryPlan:
|
|
833
|
+
"""
|
|
834
|
+
If plan has fetching in partitions:
|
|
835
|
+
try to rebuild plan to send fetched chunk of data through the following steps, if it is possible
|
|
836
|
+
"""
|
|
837
|
+
|
|
838
|
+
# handle fetchdataframe partitioning
|
|
839
|
+
steps_out = []
|
|
840
|
+
|
|
841
|
+
partition_step = None
|
|
842
|
+
for step in plan.steps:
|
|
843
|
+
if isinstance(step, FetchDataframeStep) and step.params is not None:
|
|
844
|
+
batch_size = step.params.get('batch_size')
|
|
845
|
+
if batch_size is not None:
|
|
846
|
+
# found batched fetch
|
|
847
|
+
partition_step = FetchDataframeStepPartition(
|
|
848
|
+
step_num=step.step_num,
|
|
849
|
+
integration=step.integration,
|
|
850
|
+
query=step.query,
|
|
851
|
+
raw_query=step.raw_query,
|
|
852
|
+
params=step.params
|
|
853
|
+
)
|
|
854
|
+
steps_out.append(partition_step)
|
|
855
|
+
# mark plan
|
|
856
|
+
plan.is_resumable = True
|
|
857
|
+
continue
|
|
858
|
+
else:
|
|
859
|
+
step.params = None
|
|
860
|
+
|
|
861
|
+
if partition_step is not None:
|
|
862
|
+
# check and add step into partition
|
|
863
|
+
|
|
864
|
+
can_be_partitioned = False
|
|
865
|
+
if isinstance(step, (JoinStep, ApplyPredictorStep, InsertToTable)):
|
|
866
|
+
can_be_partitioned = True
|
|
867
|
+
elif isinstance(step, QueryStep):
|
|
868
|
+
query = step.query
|
|
869
|
+
if (
|
|
870
|
+
query.group_by is None and query.order_by is None and query.distinct is False
|
|
871
|
+
and query.limit is None and query.offset is None
|
|
872
|
+
):
|
|
873
|
+
no_identifiers = [
|
|
874
|
+
target
|
|
875
|
+
for target in step.query.targets
|
|
876
|
+
if not isinstance(target, (Star, Identifier))
|
|
877
|
+
]
|
|
878
|
+
if len(no_identifiers) == 0:
|
|
879
|
+
can_be_partitioned = True
|
|
880
|
+
|
|
881
|
+
if not can_be_partitioned:
|
|
882
|
+
if len(partition_step.steps) == 0:
|
|
883
|
+
# Nothing can be partitioned, failback to old plan
|
|
884
|
+
plan.is_resumable = False
|
|
885
|
+
return plan
|
|
886
|
+
partition_step = None
|
|
887
|
+
else:
|
|
888
|
+
partition_step.steps.append(step)
|
|
889
|
+
continue
|
|
890
|
+
|
|
891
|
+
steps_out.append(step)
|
|
892
|
+
plan.steps = steps_out
|
|
893
|
+
return plan
|
|
822
894
|
|
|
823
895
|
def prepare_steps(self, query):
|
|
824
896
|
statement_planner = PreparedStatementPlanner(self)
|
|
@@ -104,11 +104,19 @@ class LimitOffsetStep(PlanStep):
|
|
|
104
104
|
|
|
105
105
|
class FetchDataframeStep(PlanStep):
|
|
106
106
|
"""Fetches a dataframe from external integration"""
|
|
107
|
-
def __init__(self, integration, query=None, raw_query=None, *args, **kwargs):
|
|
107
|
+
def __init__(self, integration, query=None, raw_query=None, params=None, *args, **kwargs):
|
|
108
108
|
super().__init__(*args, **kwargs)
|
|
109
109
|
self.integration = integration
|
|
110
110
|
self.query = query
|
|
111
111
|
self.raw_query = raw_query
|
|
112
|
+
self.params = params
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class FetchDataframeStepPartition(FetchDataframeStep):
|
|
116
|
+
"""Fetches a dataframe from external integration in partitions"""
|
|
117
|
+
def __init__(self, *args, **kwargs):
|
|
118
|
+
super().__init__(*args, **kwargs)
|
|
119
|
+
self.steps = []
|
|
112
120
|
|
|
113
121
|
|
|
114
122
|
class ApplyPredictorStep(PlanStep):
|
|
@@ -249,11 +257,12 @@ class SubSelectStep(PlanStep):
|
|
|
249
257
|
|
|
250
258
|
|
|
251
259
|
class QueryStep(PlanStep):
|
|
252
|
-
def __init__(self, query, from_table=None, *args, **kwargs):
|
|
260
|
+
def __init__(self, query, from_table=None, *args, strict_where=True, **kwargs):
|
|
253
261
|
"""Performs query using injected dataframe"""
|
|
254
262
|
super().__init__(*args, **kwargs)
|
|
255
263
|
self.query = query
|
|
256
264
|
self.from_table = from_table
|
|
265
|
+
self.strict_where = strict_where
|
|
257
266
|
|
|
258
267
|
|
|
259
268
|
class DataStep(PlanStep):
|
|
@@ -50,13 +50,14 @@ def rename_df_columns(df: pd.DataFrame, names: Optional[List] = None) -> None:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class ResultSet:
|
|
53
|
-
def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
53
|
+
def __init__(self, columns=None, values: List[List] = None, df: pd.DataFrame = None, affected_rows: int = None):
|
|
54
|
+
"""
|
|
55
|
+
Args:
|
|
56
|
+
columns: list of Columns
|
|
57
|
+
values (List[List]): data of resultSet, have to be list of lists with length equal to column
|
|
58
|
+
df (pd.DataFrame): injected dataframe, have to have enumerated columns and length equal to columns
|
|
59
|
+
affected_rows (int): number of affected rows
|
|
60
|
+
"""
|
|
60
61
|
if columns is None:
|
|
61
62
|
columns = []
|
|
62
63
|
self._columns = columns
|
|
@@ -67,6 +68,8 @@ class ResultSet:
|
|
|
67
68
|
df = pd.DataFrame(values)
|
|
68
69
|
self._df = df
|
|
69
70
|
|
|
71
|
+
self.affected_rows = affected_rows
|
|
72
|
+
|
|
70
73
|
self.is_prediction = False
|
|
71
74
|
|
|
72
75
|
def __repr__(self):
|
|
@@ -8,11 +8,11 @@
|
|
|
8
8
|
* permission of MindsDB Inc
|
|
9
9
|
*******************************************************
|
|
10
10
|
"""
|
|
11
|
-
import re
|
|
12
11
|
import inspect
|
|
13
12
|
from textwrap import dedent
|
|
13
|
+
from typing import Union, Dict
|
|
14
14
|
|
|
15
|
-
from mindsdb_sql_parser import parse_sql
|
|
15
|
+
from mindsdb_sql_parser import parse_sql, ASTNode
|
|
16
16
|
from mindsdb.api.executor.planner.steps import (
|
|
17
17
|
ApplyTimeseriesPredictorStep,
|
|
18
18
|
ApplyPredictorRowStep,
|
|
@@ -23,7 +23,7 @@ from mindsdb.api.executor.planner.exceptions import PlanningException
|
|
|
23
23
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
24
24
|
from mindsdb.api.executor.planner import query_planner
|
|
25
25
|
|
|
26
|
-
from mindsdb.api.executor.utilities.sql import
|
|
26
|
+
from mindsdb.api.executor.utilities.sql import get_query_models
|
|
27
27
|
from mindsdb.interfaces.model.functions import get_model_record
|
|
28
28
|
from mindsdb.api.executor.exceptions import (
|
|
29
29
|
BadTableError,
|
|
@@ -33,19 +33,21 @@ from mindsdb.api.executor.exceptions import (
|
|
|
33
33
|
import mindsdb.utilities.profiler as profiler
|
|
34
34
|
from mindsdb.utilities.fs import create_process_mark, delete_process_mark
|
|
35
35
|
from mindsdb.utilities.exception import EntityNotExistsError
|
|
36
|
+
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
37
|
+
from mindsdb.utilities.context import context as ctx
|
|
38
|
+
|
|
36
39
|
|
|
37
40
|
from . import steps
|
|
38
41
|
from .result_set import ResultSet, Column
|
|
39
42
|
from . steps.base import BaseStepCall
|
|
40
43
|
|
|
41
|
-
superset_subquery = re.compile(r'from[\s\n]*(\(.*\))[\s\n]*as[\s\n]*virtual_table', flags=re.IGNORECASE | re.MULTILINE | re.S)
|
|
42
|
-
|
|
43
44
|
|
|
44
45
|
class SQLQuery:
|
|
45
46
|
|
|
46
47
|
step_handlers = {}
|
|
47
48
|
|
|
48
|
-
def __init__(self, sql, session, execute=True,
|
|
49
|
+
def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
|
|
50
|
+
database: str = None, query_id: int = None):
|
|
49
51
|
self.session = session
|
|
50
52
|
|
|
51
53
|
if database is not None:
|
|
@@ -59,23 +61,22 @@ class SQLQuery:
|
|
|
59
61
|
}
|
|
60
62
|
|
|
61
63
|
self.columns_list = None
|
|
62
|
-
self.steps_data = {}
|
|
64
|
+
self.steps_data: Dict[int, ResultSet] = {}
|
|
63
65
|
|
|
64
|
-
self.planner = None
|
|
66
|
+
self.planner: query_planner.QueryPlanner = None
|
|
65
67
|
self.parameters = []
|
|
66
|
-
self.fetched_data = None
|
|
68
|
+
self.fetched_data: ResultSet = None
|
|
67
69
|
|
|
68
70
|
self.outer_query = None
|
|
71
|
+
self.run_query = None
|
|
72
|
+
self.query_id = query_id
|
|
73
|
+
if query_id is not None:
|
|
74
|
+
# resume query
|
|
75
|
+
run_query = query_context_controller.get_query(self.query_id)
|
|
76
|
+
run_query.clear_error()
|
|
77
|
+
sql = run_query.sql
|
|
69
78
|
|
|
70
79
|
if isinstance(sql, str):
|
|
71
|
-
# region workaround for subqueries in superset
|
|
72
|
-
if 'as virtual_table' in sql.lower():
|
|
73
|
-
subquery = re.findall(superset_subquery, sql)
|
|
74
|
-
if isinstance(subquery, list) and len(subquery) == 1:
|
|
75
|
-
subquery = subquery[0]
|
|
76
|
-
self.outer_query = sql.replace(subquery, 'dataframe')
|
|
77
|
-
sql = subquery.strip('()')
|
|
78
|
-
# endregion
|
|
79
80
|
self.query = parse_sql(sql)
|
|
80
81
|
self.context['query_str'] = sql
|
|
81
82
|
else:
|
|
@@ -89,7 +90,6 @@ class SQLQuery:
|
|
|
89
90
|
self.create_planner()
|
|
90
91
|
|
|
91
92
|
if execute:
|
|
92
|
-
self.prepare_query(prepare=False)
|
|
93
93
|
self.execute_query()
|
|
94
94
|
|
|
95
95
|
@classmethod
|
|
@@ -190,63 +190,62 @@ class SQLQuery:
|
|
|
190
190
|
default_namespace=database,
|
|
191
191
|
)
|
|
192
192
|
|
|
193
|
-
def
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
'result': result
|
|
204
|
-
}
|
|
193
|
+
def prepare_query(self):
|
|
194
|
+
"""it is prepared statement call
|
|
195
|
+
"""
|
|
196
|
+
try:
|
|
197
|
+
for step in self.planner.prepare_steps(self.query):
|
|
198
|
+
data = self.execute_step(step)
|
|
199
|
+
step.set_result(data)
|
|
200
|
+
self.steps_data[step.step_num] = data
|
|
201
|
+
except PlanningException as e:
|
|
202
|
+
raise LogicError(e)
|
|
205
203
|
|
|
206
|
-
|
|
207
|
-
if prepare:
|
|
208
|
-
# it is prepared statement call
|
|
209
|
-
try:
|
|
210
|
-
for step in self.planner.prepare_steps(self.query):
|
|
211
|
-
data = self.execute_step(step)
|
|
212
|
-
step.set_result(data)
|
|
213
|
-
self.steps_data[step.step_num] = data
|
|
214
|
-
except PlanningException as e:
|
|
215
|
-
raise LogicError(e)
|
|
216
|
-
|
|
217
|
-
statement_info = self.planner.get_statement_info()
|
|
218
|
-
|
|
219
|
-
self.columns_list = []
|
|
220
|
-
for col in statement_info['columns']:
|
|
221
|
-
self.columns_list.append(
|
|
222
|
-
Column(
|
|
223
|
-
database=col['ds'],
|
|
224
|
-
table_name=col['table_name'],
|
|
225
|
-
table_alias=col['table_alias'],
|
|
226
|
-
name=col['name'],
|
|
227
|
-
alias=col['alias'],
|
|
228
|
-
type=col['type']
|
|
229
|
-
)
|
|
230
|
-
)
|
|
204
|
+
statement_info = self.planner.get_statement_info()
|
|
231
205
|
|
|
232
|
-
|
|
206
|
+
self.columns_list = []
|
|
207
|
+
for col in statement_info['columns']:
|
|
208
|
+
self.columns_list.append(
|
|
233
209
|
Column(
|
|
210
|
+
database=col['ds'],
|
|
211
|
+
table_name=col['table_name'],
|
|
212
|
+
table_alias=col['table_alias'],
|
|
234
213
|
name=col['name'],
|
|
235
214
|
alias=col['alias'],
|
|
236
215
|
type=col['type']
|
|
237
216
|
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
self.parameters = [
|
|
220
|
+
Column(
|
|
221
|
+
name=col['name'],
|
|
222
|
+
alias=col['alias'],
|
|
223
|
+
type=col['type']
|
|
224
|
+
)
|
|
225
|
+
for col in statement_info['parameters']
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
def execute_query(self):
|
|
242
229
|
if self.fetched_data is not None:
|
|
243
230
|
# no need to execute
|
|
244
231
|
return
|
|
245
232
|
|
|
233
|
+
try:
|
|
234
|
+
steps = list(self.planner.execute_steps())
|
|
235
|
+
except PlanningException as e:
|
|
236
|
+
raise LogicError(e)
|
|
237
|
+
|
|
238
|
+
if self.planner.plan.is_resumable:
|
|
239
|
+
# create query
|
|
240
|
+
if self.query_id is not None:
|
|
241
|
+
self.run_query = query_context_controller.get_query(self.query_id)
|
|
242
|
+
else:
|
|
243
|
+
self.run_query = query_context_controller.create_query(self.context['query_str'])
|
|
244
|
+
ctx.run_query_id = self.run_query.record.id
|
|
245
|
+
|
|
246
246
|
step_result = None
|
|
247
247
|
process_mark = None
|
|
248
248
|
try:
|
|
249
|
-
steps = list(self.planner.execute_steps(params))
|
|
250
249
|
steps_classes = (x.__class__ for x in steps)
|
|
251
250
|
predict_steps = (ApplyPredictorRowStep, ApplyPredictorStep, ApplyTimeseriesPredictorStep)
|
|
252
251
|
if any(s in predict_steps for s in steps_classes):
|
|
@@ -255,10 +254,16 @@ class SQLQuery:
|
|
|
255
254
|
with profiler.Context(f'step: {step.__class__.__name__}'):
|
|
256
255
|
step_result = self.execute_step(step)
|
|
257
256
|
self.steps_data[step.step_num] = step_result
|
|
258
|
-
except PlanningException as e:
|
|
259
|
-
raise LogicError(e)
|
|
260
257
|
except Exception as e:
|
|
258
|
+
if self.run_query is not None:
|
|
259
|
+
# set error and place where it stopped
|
|
260
|
+
self.run_query.on_error(e, step.step_num, self.steps_data)
|
|
261
261
|
raise e
|
|
262
|
+
else:
|
|
263
|
+
# mark running query as completed
|
|
264
|
+
if self.run_query is not None:
|
|
265
|
+
self.run_query.finish()
|
|
266
|
+
ctx.run_query_id = None
|
|
262
267
|
finally:
|
|
263
268
|
if process_mark is not None:
|
|
264
269
|
delete_process_mark('predict', process_mark)
|
|
@@ -270,27 +275,7 @@ class SQLQuery:
|
|
|
270
275
|
if len(self.steps_data) == 0:
|
|
271
276
|
return
|
|
272
277
|
|
|
273
|
-
|
|
274
|
-
if self.outer_query is not None:
|
|
275
|
-
# workaround for subqueries in superset. remove it?
|
|
276
|
-
# +++
|
|
277
|
-
# ???
|
|
278
|
-
|
|
279
|
-
result = step_result
|
|
280
|
-
df = result.to_df()
|
|
281
|
-
|
|
282
|
-
df2 = query_df(df, self.outer_query)
|
|
283
|
-
|
|
284
|
-
result2 = ResultSet().from_df(df2, database='', table_name='')
|
|
285
|
-
|
|
286
|
-
self.columns_list = result2.columns
|
|
287
|
-
self.fetched_data = result2
|
|
288
|
-
|
|
289
|
-
else:
|
|
290
|
-
result = step_result
|
|
291
|
-
self.fetched_data = result
|
|
292
|
-
except Exception as e:
|
|
293
|
-
raise UnknownError("error in preparing result query step") from e
|
|
278
|
+
self.fetched_data = step_result
|
|
294
279
|
|
|
295
280
|
try:
|
|
296
281
|
if hasattr(self, 'columns_list') is False:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .apply_predictor_step import ApplyPredictorStepCall, ApplyPredictorRowStepCall, ApplyTimeseriesPredictorStepCall
|
|
2
2
|
from .delete_step import DeleteStepCall
|
|
3
3
|
from .fetch_dataframe import FetchDataframeStepCall
|
|
4
|
+
from .fetch_dataframe_partition import FetchDataframePartitionCall
|
|
4
5
|
from .insert_step import InsertToTableCall, SaveToTableCall, CreateTableCall
|
|
5
6
|
from .join_step import JoinStepCall
|
|
6
7
|
from .map_reduce_step import MapReduceStepCall
|
|
@@ -44,6 +44,5 @@ class DeleteStepCall(BaseStepCall):
|
|
|
44
44
|
|
|
45
45
|
query_traversal(query.where, fill_params)
|
|
46
46
|
|
|
47
|
-
dn.query(query=query, session=self.session)
|
|
48
|
-
|
|
49
|
-
return ResultSet()
|
|
47
|
+
response = dn.query(query=query, session=self.session)
|
|
48
|
+
return ResultSet(affected_rows=response.affected_rows)
|
|
@@ -89,10 +89,11 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
89
89
|
table_alias = (self.context.get('database'), 'result', 'result')
|
|
90
90
|
|
|
91
91
|
# fetch raw_query
|
|
92
|
-
|
|
92
|
+
response = dn.query(
|
|
93
93
|
native_query=step.raw_query,
|
|
94
94
|
session=self.session
|
|
95
95
|
)
|
|
96
|
+
df = response.data_frame
|
|
96
97
|
else:
|
|
97
98
|
table_alias = get_table_alias(step.query.from_table, self.context.get('database'))
|
|
98
99
|
|
|
@@ -104,13 +105,14 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
104
105
|
|
|
105
106
|
query, context_callback = query_context_controller.handle_db_context_vars(query, dn, self.session)
|
|
106
107
|
|
|
107
|
-
|
|
108
|
+
response = dn.query(
|
|
108
109
|
query=query,
|
|
109
110
|
session=self.session
|
|
110
111
|
)
|
|
112
|
+
df = response.data_frame
|
|
111
113
|
|
|
112
114
|
if context_callback:
|
|
113
|
-
context_callback(df,
|
|
115
|
+
context_callback(df, response.columns)
|
|
114
116
|
|
|
115
117
|
result = ResultSet()
|
|
116
118
|
|