MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +28 -25
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/a2a/run_a2a.py +1 -1
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
- mindsdb/integrations/libs/api_handler.py +261 -57
- mindsdb/integrations/libs/base.py +100 -29
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +196 -192
- mindsdb/interfaces/agents/constants.py +7 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +220 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +63 -10
- mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +54 -36
- mindsdb/interfaces/skills/sql_agent.py +109 -86
- mindsdb/interfaces/storage/db.py +223 -79
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +9 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +49 -1
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
|
@@ -4,43 +4,72 @@ import pandas as pd
|
|
|
4
4
|
|
|
5
5
|
from mindsdb_sql_parser import ast
|
|
6
6
|
from mindsdb_sql_parser.ast import (
|
|
7
|
-
Select,
|
|
8
|
-
|
|
7
|
+
Select,
|
|
8
|
+
Identifier,
|
|
9
|
+
Join,
|
|
10
|
+
Star,
|
|
11
|
+
BinaryOperation,
|
|
12
|
+
Constant,
|
|
13
|
+
Union,
|
|
14
|
+
CreateTable,
|
|
15
|
+
Function,
|
|
16
|
+
Insert,
|
|
17
|
+
Except,
|
|
18
|
+
Intersect,
|
|
19
|
+
Update,
|
|
20
|
+
NativeQuery,
|
|
21
|
+
Parameter,
|
|
22
|
+
Delete,
|
|
9
23
|
)
|
|
10
24
|
|
|
11
25
|
from mindsdb.api.executor.planner.exceptions import PlanningException
|
|
12
26
|
from mindsdb.api.executor.planner import utils
|
|
13
27
|
from mindsdb.api.executor.planner.query_plan import QueryPlan
|
|
14
28
|
from mindsdb.api.executor.planner.steps import (
|
|
15
|
-
PlanStep,
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
29
|
+
PlanStep,
|
|
30
|
+
FetchDataframeStep,
|
|
31
|
+
ProjectStep,
|
|
32
|
+
ApplyPredictorStep,
|
|
33
|
+
ApplyPredictorRowStep,
|
|
34
|
+
UnionStep,
|
|
35
|
+
GetPredictorColumns,
|
|
36
|
+
SaveToTable,
|
|
37
|
+
InsertToTable,
|
|
38
|
+
UpdateToTable,
|
|
39
|
+
SubSelectStep,
|
|
40
|
+
QueryStep,
|
|
41
|
+
JoinStep,
|
|
42
|
+
DeleteStep,
|
|
43
|
+
DataStep,
|
|
44
|
+
CreateTableStep,
|
|
45
|
+
FetchDataframeStepPartition,
|
|
19
46
|
)
|
|
20
47
|
from mindsdb.api.executor.planner.utils import (
|
|
21
48
|
disambiguate_predictor_column_identifier,
|
|
22
49
|
recursively_extract_column_values,
|
|
23
|
-
query_traversal,
|
|
50
|
+
query_traversal,
|
|
51
|
+
filters_to_bin_op,
|
|
24
52
|
)
|
|
25
53
|
from mindsdb.api.executor.planner.plan_join import PlanJoin
|
|
26
54
|
from mindsdb.api.executor.planner.query_prepare import PreparedStatementPlanner
|
|
27
55
|
from mindsdb.utilities.config import config
|
|
28
56
|
|
|
29
57
|
|
|
30
|
-
default_project = config.get(
|
|
58
|
+
default_project = config.get("default_project")
|
|
31
59
|
|
|
32
60
|
# This includes built-in MindsDB SQL functions and functions to be executed via DuckDB consistently.
|
|
33
|
-
MINDSDB_SQL_FUNCTIONS = {
|
|
61
|
+
MINDSDB_SQL_FUNCTIONS = {"llm", "to_markdown", "hash"}
|
|
34
62
|
|
|
35
63
|
|
|
36
64
|
class QueryPlanner:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
query=None,
|
|
68
|
+
integrations: list = None,
|
|
69
|
+
predictor_namespace=None,
|
|
70
|
+
predictor_metadata: list = None,
|
|
71
|
+
default_namespace: str = None,
|
|
72
|
+
):
|
|
44
73
|
self.query = query
|
|
45
74
|
self.plan = QueryPlan()
|
|
46
75
|
|
|
@@ -49,14 +78,14 @@ class QueryPlanner:
|
|
|
49
78
|
if integrations is not None:
|
|
50
79
|
for integration in integrations:
|
|
51
80
|
if isinstance(integration, dict):
|
|
52
|
-
integration_name = integration[
|
|
81
|
+
integration_name = integration["name"].lower()
|
|
53
82
|
# it is project of system database
|
|
54
|
-
if integration[
|
|
83
|
+
if integration["type"] != "data":
|
|
55
84
|
_projects.add(integration_name)
|
|
56
85
|
continue
|
|
57
86
|
else:
|
|
58
87
|
integration_name = integration.lower()
|
|
59
|
-
integration = {
|
|
88
|
+
integration = {"name": integration}
|
|
60
89
|
self.integrations[integration_name] = integration
|
|
61
90
|
|
|
62
91
|
# allow to select from mindsdb namespace
|
|
@@ -73,24 +102,24 @@ class QueryPlanner:
|
|
|
73
102
|
if isinstance(predictor_metadata, list):
|
|
74
103
|
# convert to dict
|
|
75
104
|
for predictor in predictor_metadata:
|
|
76
|
-
if
|
|
77
|
-
integration_name = predictor[
|
|
105
|
+
if "integration_name" in predictor:
|
|
106
|
+
integration_name = predictor["integration_name"]
|
|
78
107
|
else:
|
|
79
108
|
integration_name = self.predictor_namespace
|
|
80
|
-
predictor[
|
|
81
|
-
idx = f
|
|
109
|
+
predictor["integration_name"] = integration_name
|
|
110
|
+
idx = f"{integration_name}.{predictor['name']}".lower()
|
|
82
111
|
self.predictor_info[idx] = predictor
|
|
83
112
|
_projects.add(integration_name.lower())
|
|
84
113
|
elif isinstance(predictor_metadata, dict):
|
|
85
114
|
# legacy behaviour
|
|
86
115
|
for name, predictor in predictor_metadata.items():
|
|
87
|
-
if
|
|
88
|
-
if
|
|
89
|
-
integration_name = predictor[
|
|
116
|
+
if "." not in name:
|
|
117
|
+
if "integration_name" in predictor:
|
|
118
|
+
integration_name = predictor["integration_name"]
|
|
90
119
|
else:
|
|
91
120
|
integration_name = self.predictor_namespace
|
|
92
|
-
predictor[
|
|
93
|
-
name = f
|
|
121
|
+
predictor["integration_name"] = integration_name
|
|
122
|
+
name = f"{integration_name}.{name}".lower()
|
|
94
123
|
_projects.add(integration_name.lower())
|
|
95
124
|
|
|
96
125
|
self.predictor_info[name] = predictor
|
|
@@ -129,11 +158,11 @@ class QueryPlanner:
|
|
|
129
158
|
if namespace is not None:
|
|
130
159
|
idx_ar.insert(0, namespace)
|
|
131
160
|
|
|
132
|
-
idx =
|
|
161
|
+
idx = ".".join(idx_ar).lower()
|
|
133
162
|
info = self.predictor_info.get(idx)
|
|
134
163
|
if info is not None:
|
|
135
|
-
info[
|
|
136
|
-
info[
|
|
164
|
+
info["version"] = version
|
|
165
|
+
info["name"] = name
|
|
137
166
|
return info
|
|
138
167
|
|
|
139
168
|
def prepare_integration_select(self, database, query):
|
|
@@ -149,7 +178,7 @@ class QueryPlanner:
|
|
|
149
178
|
if len(node.parts) > 1 and node.parts[0].lower() == database:
|
|
150
179
|
node.parts.pop(0)
|
|
151
180
|
|
|
152
|
-
if not hasattr(parent_query,
|
|
181
|
+
if not hasattr(parent_query, "from_table"):
|
|
153
182
|
return
|
|
154
183
|
|
|
155
184
|
table = parent_query.from_table
|
|
@@ -193,18 +222,23 @@ class QueryPlanner:
|
|
|
193
222
|
# remove predictor params
|
|
194
223
|
if fetch_df_select.using is not None:
|
|
195
224
|
fetch_df_select.using = None
|
|
225
|
+
fetch_params = self.get_fetch_params(params)
|
|
226
|
+
return FetchDataframeStep(integration=integration_name, query=fetch_df_select, params=fetch_params)
|
|
227
|
+
|
|
228
|
+
def get_fetch_params(self, params):
|
|
229
|
+
# extracts parameters for fetching
|
|
196
230
|
|
|
197
231
|
if params:
|
|
198
232
|
fetch_params = params.copy()
|
|
199
233
|
# remove partition parameters
|
|
200
|
-
for key in (
|
|
234
|
+
for key in ("batch_size", "track_column"):
|
|
201
235
|
if key in params:
|
|
202
236
|
del params[key]
|
|
203
|
-
if
|
|
204
|
-
fetch_params[
|
|
237
|
+
if "track_column" in fetch_params and isinstance(fetch_params["track_column"], Identifier):
|
|
238
|
+
fetch_params["track_column"] = fetch_params["track_column"].parts[-1]
|
|
205
239
|
else:
|
|
206
240
|
fetch_params = None
|
|
207
|
-
return
|
|
241
|
+
return fetch_params
|
|
208
242
|
|
|
209
243
|
def plan_integration_select(self, select):
|
|
210
244
|
"""Plan for a select query that can be fully executed in an integration"""
|
|
@@ -226,7 +260,7 @@ class QueryPlanner:
|
|
|
226
260
|
database = parts.pop(0).lower()
|
|
227
261
|
|
|
228
262
|
if database is None:
|
|
229
|
-
raise PlanningException(f
|
|
263
|
+
raise PlanningException(f"Integration not found for: {node}")
|
|
230
264
|
|
|
231
265
|
return database, Identifier(parts=parts, alias=alias)
|
|
232
266
|
|
|
@@ -263,21 +297,14 @@ class QueryPlanner:
|
|
|
263
297
|
|
|
264
298
|
# cte names are not mdb objects
|
|
265
299
|
if isinstance(query, Select) and query.cte:
|
|
266
|
-
cte_names = [
|
|
267
|
-
|
|
268
|
-
for cte in query.cte
|
|
269
|
-
]
|
|
270
|
-
mdb_entities = [
|
|
271
|
-
item
|
|
272
|
-
for item in mdb_entities
|
|
273
|
-
if '.'.join(item.parts) not in cte_names
|
|
274
|
-
]
|
|
300
|
+
cte_names = [cte.name.parts[-1] for cte in query.cte]
|
|
301
|
+
mdb_entities = [item for item in mdb_entities if ".".join(item.parts) not in cte_names]
|
|
275
302
|
|
|
276
303
|
return {
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
304
|
+
"mdb_entities": mdb_entities,
|
|
305
|
+
"integrations": integrations,
|
|
306
|
+
"predictors": predictors,
|
|
307
|
+
"user_functions": user_functions,
|
|
281
308
|
}
|
|
282
309
|
|
|
283
310
|
def get_nested_selects_plan_fnc(self, main_integration, force=False):
|
|
@@ -286,9 +313,9 @@ class QueryPlanner:
|
|
|
286
313
|
if isinstance(node, Select):
|
|
287
314
|
query_info2 = self.get_query_info(node)
|
|
288
315
|
if force or (
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
316
|
+
len(query_info2["integrations"]) > 1
|
|
317
|
+
or main_integration not in query_info2["integrations"]
|
|
318
|
+
or len(query_info2["mdb_entities"]) > 0
|
|
292
319
|
):
|
|
293
320
|
# need to execute in planner
|
|
294
321
|
|
|
@@ -320,7 +347,7 @@ class QueryPlanner:
|
|
|
320
347
|
|
|
321
348
|
# find subselects
|
|
322
349
|
main_integration, _ = self.resolve_database_table(query.from_table)
|
|
323
|
-
is_api_db = self.integrations.get(main_integration, {}).get(
|
|
350
|
+
is_api_db = self.integrations.get(main_integration, {}).get("class_type") == "api"
|
|
324
351
|
|
|
325
352
|
find_selects = self.get_nested_selects_plan_fnc(main_integration, force=is_api_db)
|
|
326
353
|
query.targets = query_traversal(query.targets, find_selects)
|
|
@@ -329,12 +356,12 @@ class QueryPlanner:
|
|
|
329
356
|
# get info of updated query
|
|
330
357
|
query_info = self.get_query_info(query)
|
|
331
358
|
|
|
332
|
-
if len(query_info[
|
|
359
|
+
if len(query_info["predictors"]) >= 1:
|
|
333
360
|
# select from predictor
|
|
334
361
|
return self.plan_select_from_predictor(query)
|
|
335
362
|
elif is_api_db:
|
|
336
363
|
return self.plan_api_db_select(query)
|
|
337
|
-
elif len(query_info[
|
|
364
|
+
elif len(query_info["user_functions"]) > 0:
|
|
338
365
|
return self.plan_integration_select_with_functions(query)
|
|
339
366
|
else:
|
|
340
367
|
# fallback to integration
|
|
@@ -366,7 +393,7 @@ class QueryPlanner:
|
|
|
366
393
|
# clear
|
|
367
394
|
skipped_conditions.append(node)
|
|
368
395
|
node.args = [Constant(0), Constant(0)]
|
|
369
|
-
node.op =
|
|
396
|
+
node.op = "="
|
|
370
397
|
|
|
371
398
|
query_traversal(query2.where, replace_functions)
|
|
372
399
|
|
|
@@ -411,7 +438,6 @@ class QueryPlanner:
|
|
|
411
438
|
return self.plan_sub_select(query, prev_step)
|
|
412
439
|
|
|
413
440
|
def plan_nested_select(self, select):
|
|
414
|
-
|
|
415
441
|
# query_info = self.get_query_info(select)
|
|
416
442
|
# # get all predictors
|
|
417
443
|
#
|
|
@@ -445,11 +471,11 @@ class QueryPlanner:
|
|
|
445
471
|
new_identifier = copy.deepcopy(identifier)
|
|
446
472
|
|
|
447
473
|
info = self.get_predictor(identifier)
|
|
448
|
-
namespace = info[
|
|
474
|
+
namespace = info["integration_name"]
|
|
449
475
|
|
|
450
|
-
parts = [namespace, info[
|
|
451
|
-
if info[
|
|
452
|
-
parts.append(info[
|
|
476
|
+
parts = [namespace, info["name"]]
|
|
477
|
+
if info["version"] is not None:
|
|
478
|
+
parts.append(info["version"])
|
|
453
479
|
new_identifier.parts = parts
|
|
454
480
|
|
|
455
481
|
return namespace, new_identifier
|
|
@@ -457,33 +483,31 @@ class QueryPlanner:
|
|
|
457
483
|
def plan_select_from_predictor(self, select):
|
|
458
484
|
predictor_namespace, predictor = self.get_predictor_namespace_and_name_from_identifier(select.from_table)
|
|
459
485
|
|
|
460
|
-
if select.where == BinaryOperation(
|
|
486
|
+
if select.where == BinaryOperation("=", args=[Constant(1), Constant(0)]):
|
|
461
487
|
# Hardcoded mysql way of getting predictor columns
|
|
462
488
|
predictor_identifier = utils.get_predictor_name_identifier(predictor)
|
|
463
489
|
predictor_step = self.plan.add_step(
|
|
464
|
-
GetPredictorColumns(
|
|
465
|
-
namespace=predictor_namespace,
|
|
466
|
-
predictor=predictor_identifier
|
|
467
|
-
)
|
|
490
|
+
GetPredictorColumns(namespace=predictor_namespace, predictor=predictor_identifier)
|
|
468
491
|
)
|
|
469
492
|
else:
|
|
470
493
|
new_query_targets = []
|
|
471
494
|
for target in select.targets:
|
|
472
495
|
if isinstance(target, Identifier):
|
|
473
|
-
new_query_targets.append(
|
|
474
|
-
disambiguate_predictor_column_identifier(target, predictor))
|
|
496
|
+
new_query_targets.append(disambiguate_predictor_column_identifier(target, predictor))
|
|
475
497
|
elif type(target) in (Star, Constant, Function):
|
|
476
498
|
new_query_targets.append(target)
|
|
477
499
|
else:
|
|
478
|
-
raise PlanningException(f
|
|
500
|
+
raise PlanningException(f"Unknown select target {type(target)}")
|
|
479
501
|
|
|
480
502
|
if select.group_by or select.having:
|
|
481
|
-
raise PlanningException(
|
|
503
|
+
raise PlanningException(
|
|
504
|
+
"Unsupported operation when querying predictor. Only WHERE is allowed and required."
|
|
505
|
+
)
|
|
482
506
|
|
|
483
507
|
row_dict = {}
|
|
484
508
|
where_clause = select.where
|
|
485
509
|
if not where_clause:
|
|
486
|
-
raise PlanningException(
|
|
510
|
+
raise PlanningException("WHERE clause required when selecting from predictor")
|
|
487
511
|
|
|
488
512
|
predictor_identifier = utils.get_predictor_name_identifier(predictor)
|
|
489
513
|
recursively_extract_column_values(where_clause, row_dict, predictor_identifier)
|
|
@@ -493,10 +517,7 @@ class QueryPlanner:
|
|
|
493
517
|
params = select.using
|
|
494
518
|
predictor_step = self.plan.add_step(
|
|
495
519
|
ApplyPredictorRowStep(
|
|
496
|
-
namespace=predictor_namespace,
|
|
497
|
-
predictor=predictor_identifier,
|
|
498
|
-
row_dict=row_dict,
|
|
499
|
-
params=params
|
|
520
|
+
namespace=predictor_namespace, predictor=predictor_identifier, row_dict=row_dict, params=params
|
|
500
521
|
)
|
|
501
522
|
)
|
|
502
523
|
project_step = self.plan_project(select, predictor_step.result)
|
|
@@ -527,7 +548,7 @@ class QueryPlanner:
|
|
|
527
548
|
|
|
528
549
|
binary_ops.append(op)
|
|
529
550
|
|
|
530
|
-
if op in [
|
|
551
|
+
if op in ["and", "or"]:
|
|
531
552
|
return
|
|
532
553
|
|
|
533
554
|
arg1, arg2 = node.args
|
|
@@ -537,12 +558,7 @@ class QueryPlanner:
|
|
|
537
558
|
if isinstance(arg1, Identifier) and isinstance(arg2, (Constant, Parameter)) and len(arg1.parts) > 1:
|
|
538
559
|
model = Identifier(parts=arg1.parts[:-1])
|
|
539
560
|
|
|
540
|
-
if (
|
|
541
|
-
self.is_predictor(model)
|
|
542
|
-
or (
|
|
543
|
-
len(model.parts) == 1 and model.parts[0] == predictor_alias
|
|
544
|
-
)
|
|
545
|
-
):
|
|
561
|
+
if self.is_predictor(model) or (len(model.parts) == 1 and model.parts[0] == predictor_alias):
|
|
546
562
|
model_filters.append(node)
|
|
547
563
|
return
|
|
548
564
|
table_filters.append(node)
|
|
@@ -555,7 +571,7 @@ class QueryPlanner:
|
|
|
555
571
|
# split conditions
|
|
556
572
|
query_traversal(int_select.where, split_filters)
|
|
557
573
|
|
|
558
|
-
if len(model_filters) > 0 and
|
|
574
|
+
if len(model_filters) > 0 and "or" not in binary_ops:
|
|
559
575
|
int_select.where = filters_to_bin_op(table_filters)
|
|
560
576
|
|
|
561
577
|
integration_select_step = self.plan_integration_select(int_select)
|
|
@@ -569,21 +585,23 @@ class QueryPlanner:
|
|
|
569
585
|
if model_filters:
|
|
570
586
|
row_dict = {}
|
|
571
587
|
for el in model_filters:
|
|
572
|
-
if isinstance(el.args[0], Identifier) and el.op ==
|
|
588
|
+
if isinstance(el.args[0], Identifier) and el.op == "=":
|
|
573
589
|
if isinstance(el.args[1], (Constant, Parameter)):
|
|
574
590
|
row_dict[el.args[0].parts[-1]] = el.args[1].value
|
|
575
591
|
|
|
576
|
-
last_step = self.plan.add_step(
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
592
|
+
last_step = self.plan.add_step(
|
|
593
|
+
ApplyPredictorStep(
|
|
594
|
+
namespace=predictor_namespace,
|
|
595
|
+
dataframe=integration_select_step.result,
|
|
596
|
+
predictor=predictor_identifier,
|
|
597
|
+
params=params,
|
|
598
|
+
row_dict=row_dict,
|
|
599
|
+
)
|
|
600
|
+
)
|
|
583
601
|
|
|
584
602
|
return {
|
|
585
|
-
|
|
586
|
-
|
|
603
|
+
"predictor": last_step,
|
|
604
|
+
"data": integration_select_step,
|
|
587
605
|
}
|
|
588
606
|
|
|
589
607
|
# def plan_group(self, query, last_step):
|
|
@@ -617,25 +635,31 @@ class QueryPlanner:
|
|
|
617
635
|
return last_step
|
|
618
636
|
|
|
619
637
|
for target in query.targets:
|
|
620
|
-
if
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
638
|
+
if (
|
|
639
|
+
isinstance(target, Identifier)
|
|
640
|
+
or isinstance(target, Star)
|
|
641
|
+
or isinstance(target, Function)
|
|
642
|
+
or isinstance(target, Constant)
|
|
643
|
+
or isinstance(target, BinaryOperation)
|
|
644
|
+
):
|
|
625
645
|
out_identifiers.append(target)
|
|
626
646
|
else:
|
|
627
647
|
new_identifier = Identifier(str(target.to_string(alias=False)), alias=target.alias)
|
|
628
648
|
out_identifiers.append(new_identifier)
|
|
629
|
-
return self.plan.add_step(
|
|
649
|
+
return self.plan.add_step(
|
|
650
|
+
ProjectStep(dataframe=dataframe, columns=out_identifiers, ignore_doubles=ignore_doubles)
|
|
651
|
+
)
|
|
630
652
|
|
|
631
653
|
def plan_create_table(self, query: CreateTable):
|
|
632
654
|
if query.from_select is None:
|
|
633
655
|
if query.columns is not None:
|
|
634
|
-
self.plan.add_step(
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
656
|
+
self.plan.add_step(
|
|
657
|
+
CreateTableStep(
|
|
658
|
+
table=query.name,
|
|
659
|
+
columns=query.columns,
|
|
660
|
+
is_replace=query.is_replace,
|
|
661
|
+
)
|
|
662
|
+
)
|
|
639
663
|
return
|
|
640
664
|
|
|
641
665
|
raise PlanningException(f'Not implemented "create table": {query.to_string()}')
|
|
@@ -645,11 +669,13 @@ class QueryPlanner:
|
|
|
645
669
|
last_step = self.plan_select(query.from_select, integration=integration_name)
|
|
646
670
|
|
|
647
671
|
# create table step
|
|
648
|
-
self.plan.add_step(
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
672
|
+
self.plan.add_step(
|
|
673
|
+
SaveToTable(
|
|
674
|
+
table=query.name,
|
|
675
|
+
dataframe=last_step.result,
|
|
676
|
+
is_replace=query.is_replace,
|
|
677
|
+
)
|
|
678
|
+
)
|
|
653
679
|
|
|
654
680
|
def plan_insert(self, query):
|
|
655
681
|
table = query.table
|
|
@@ -664,19 +690,23 @@ class QueryPlanner:
|
|
|
664
690
|
params = {}
|
|
665
691
|
if isinstance(select, Select) and select.using is not None:
|
|
666
692
|
for k, v in select.using.items():
|
|
667
|
-
if k.startswith(
|
|
693
|
+
if k.startswith("kb_"):
|
|
668
694
|
params[k] = v
|
|
669
695
|
|
|
670
|
-
self.plan.add_step(
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
696
|
+
self.plan.add_step(
|
|
697
|
+
InsertToTable(
|
|
698
|
+
table=table,
|
|
699
|
+
dataframe=last_step.result,
|
|
700
|
+
params=params,
|
|
701
|
+
)
|
|
702
|
+
)
|
|
675
703
|
else:
|
|
676
|
-
self.plan.add_step(
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
704
|
+
self.plan.add_step(
|
|
705
|
+
InsertToTable(
|
|
706
|
+
table=table,
|
|
707
|
+
query=query,
|
|
708
|
+
)
|
|
709
|
+
)
|
|
680
710
|
|
|
681
711
|
def plan_update(self, query):
|
|
682
712
|
last_step = None
|
|
@@ -690,31 +720,22 @@ class QueryPlanner:
|
|
|
690
720
|
update_command.from_select = None
|
|
691
721
|
|
|
692
722
|
table = query.table
|
|
693
|
-
self.plan.add_step(UpdateToTable(
|
|
694
|
-
table=table,
|
|
695
|
-
dataframe=last_step,
|
|
696
|
-
update_command=update_command
|
|
697
|
-
))
|
|
723
|
+
self.plan.add_step(UpdateToTable(table=table, dataframe=last_step, update_command=update_command))
|
|
698
724
|
|
|
699
725
|
def plan_delete(self, query: Delete):
|
|
700
|
-
|
|
701
726
|
# find subselects
|
|
702
727
|
main_integration, _ = self.resolve_database_table(query.table)
|
|
703
728
|
|
|
704
|
-
is_api_db = self.integrations.get(main_integration, {}).get(
|
|
729
|
+
is_api_db = self.integrations.get(main_integration, {}).get("class_type") == "api"
|
|
705
730
|
|
|
706
731
|
find_selects = self.get_nested_selects_plan_fnc(main_integration, force=is_api_db)
|
|
707
732
|
query_traversal(query.where, find_selects)
|
|
708
733
|
|
|
709
734
|
self.prepare_integration_select(main_integration, query.where)
|
|
710
735
|
|
|
711
|
-
return self.plan.add_step(DeleteStep(
|
|
712
|
-
table=query.table,
|
|
713
|
-
where=query.where
|
|
714
|
-
))
|
|
736
|
+
return self.plan.add_step(DeleteStep(table=query.table, where=query.where))
|
|
715
737
|
|
|
716
738
|
def plan_cte(self, query):
|
|
717
|
-
|
|
718
739
|
for cte in query.cte:
|
|
719
740
|
step = self.plan_select(cte.query)
|
|
720
741
|
name = cte.name.parts[-1]
|
|
@@ -727,17 +748,15 @@ class QueryPlanner:
|
|
|
727
748
|
|
|
728
749
|
# one integration and not mindsdb objects in query
|
|
729
750
|
if (
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
751
|
+
len(query_info["mdb_entities"]) == 0
|
|
752
|
+
and len(query_info["integrations"]) == 1
|
|
753
|
+
and "files" not in query_info["integrations"]
|
|
754
|
+
and "views" not in query_info["integrations"]
|
|
755
|
+
and len(query_info["user_functions"]) == 0
|
|
735
756
|
):
|
|
736
|
-
|
|
737
|
-
int_name = list(query_info['integrations'])[0]
|
|
757
|
+
int_name = list(query_info["integrations"])[0]
|
|
738
758
|
# if is sql database
|
|
739
|
-
if self.integrations.get(int_name, {}).get(
|
|
740
|
-
|
|
759
|
+
if self.integrations.get(int_name, {}).get("class_type") != "api":
|
|
741
760
|
# send to this integration
|
|
742
761
|
self.prepare_integration_select(int_name, query)
|
|
743
762
|
|
|
@@ -745,7 +764,6 @@ class QueryPlanner:
|
|
|
745
764
|
return last_step
|
|
746
765
|
|
|
747
766
|
def plan_select(self, query, integration=None):
|
|
748
|
-
|
|
749
767
|
if isinstance(query, (Union, Except, Intersect)):
|
|
750
768
|
return self.plan_union(query, integration=integration)
|
|
751
769
|
|
|
@@ -776,7 +794,7 @@ class QueryPlanner:
|
|
|
776
794
|
step = QueryStep(query, from_table=pd.DataFrame([None]))
|
|
777
795
|
return self.plan.add_step(step)
|
|
778
796
|
else:
|
|
779
|
-
raise PlanningException(f
|
|
797
|
+
raise PlanningException(f"Unsupported from_table {type(from_table)}")
|
|
780
798
|
|
|
781
799
|
def plan_sub_select(self, query, prev_step, add_absent_cols=False):
|
|
782
800
|
if (
|
|
@@ -807,13 +825,15 @@ class QueryPlanner:
|
|
|
807
825
|
def plan_union(self, query, integration=None):
|
|
808
826
|
step1 = self.plan_select(query.left, integration=integration)
|
|
809
827
|
step2 = self.plan_select(query.right, integration=integration)
|
|
810
|
-
operation =
|
|
828
|
+
operation = "union"
|
|
811
829
|
if isinstance(query, Except):
|
|
812
|
-
operation =
|
|
830
|
+
operation = "except"
|
|
813
831
|
elif isinstance(query, Intersect):
|
|
814
|
-
operation =
|
|
832
|
+
operation = "intersect"
|
|
815
833
|
|
|
816
|
-
return self.plan.add_step(
|
|
834
|
+
return self.plan.add_step(
|
|
835
|
+
UnionStep(left=step1.result, right=step2.result, unique=query.unique, operation=operation)
|
|
836
|
+
)
|
|
817
837
|
|
|
818
838
|
# method for compatibility
|
|
819
839
|
def from_query(self, query=None):
|
|
@@ -835,7 +855,7 @@ class QueryPlanner:
|
|
|
835
855
|
elif isinstance(query, Delete):
|
|
836
856
|
self.plan_delete(query)
|
|
837
857
|
else:
|
|
838
|
-
raise PlanningException(f
|
|
858
|
+
raise PlanningException(f"Unsupported query type {type(query)}")
|
|
839
859
|
|
|
840
860
|
plan = self.handle_partitioning(self.plan)
|
|
841
861
|
|
|
@@ -855,7 +875,7 @@ class QueryPlanner:
|
|
|
855
875
|
partition_step = None
|
|
856
876
|
for step in steps_in:
|
|
857
877
|
if isinstance(step, FetchDataframeStep) and step.params is not None:
|
|
858
|
-
batch_size = step.params.get(
|
|
878
|
+
batch_size = step.params.get("batch_size")
|
|
859
879
|
if batch_size is not None:
|
|
860
880
|
# found batched fetch
|
|
861
881
|
partition_step = FetchDataframeStepPartition(
|
|
@@ -863,7 +883,7 @@ class QueryPlanner:
|
|
|
863
883
|
integration=step.integration,
|
|
864
884
|
query=step.query,
|
|
865
885
|
raw_query=step.raw_query,
|
|
866
|
-
params=step.params
|
|
886
|
+
params=step.params,
|
|
867
887
|
)
|
|
868
888
|
steps_out.append(partition_step)
|
|
869
889
|
# mark plan
|
|
@@ -881,13 +901,14 @@ class QueryPlanner:
|
|
|
881
901
|
elif isinstance(step, QueryStep):
|
|
882
902
|
query = step.query
|
|
883
903
|
if (
|
|
884
|
-
query.group_by is None
|
|
885
|
-
and query.
|
|
904
|
+
query.group_by is None
|
|
905
|
+
and query.order_by is None
|
|
906
|
+
and query.distinct is False
|
|
907
|
+
and query.limit is None
|
|
908
|
+
and query.offset is None
|
|
886
909
|
):
|
|
887
910
|
no_identifiers = [
|
|
888
|
-
target
|
|
889
|
-
for target in step.query.targets
|
|
890
|
-
if not isinstance(target, (Star, Identifier))
|
|
911
|
+
target for target in step.query.targets if not isinstance(target, (Star, Identifier))
|
|
891
912
|
]
|
|
892
913
|
if len(no_identifiers) == 0:
|
|
893
914
|
can_be_partitioned = True
|