MindsDB 25.5.4.2__py3-none-any.whl → 25.6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/a2a/agent.py +28 -25
- mindsdb/api/a2a/common/server/server.py +32 -26
- mindsdb/api/executor/command_executor.py +69 -14
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
- mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
- mindsdb/api/executor/planner/plan_join.py +67 -77
- mindsdb/api/executor/planner/query_planner.py +176 -155
- mindsdb/api/executor/planner/steps.py +37 -12
- mindsdb/api/executor/sql_query/result_set.py +45 -64
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
- mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
- mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
- mindsdb/api/executor/utilities/sql.py +42 -48
- mindsdb/api/http/namespaces/config.py +1 -1
- mindsdb/api/http/namespaces/file.py +14 -23
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
- mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
- mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
- mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
- mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
- mindsdb/integrations/libs/api_handler.py +261 -57
- mindsdb/integrations/libs/base.py +100 -29
- mindsdb/integrations/utilities/files/file_reader.py +99 -73
- mindsdb/integrations/utilities/handler_utils.py +23 -8
- mindsdb/integrations/utilities/sql_utils.py +35 -40
- mindsdb/interfaces/agents/agents_controller.py +196 -192
- mindsdb/interfaces/agents/constants.py +7 -1
- mindsdb/interfaces/agents/langchain_agent.py +42 -11
- mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
- mindsdb/interfaces/data_catalog/__init__.py +0 -0
- mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
- mindsdb/interfaces/database/database.py +81 -57
- mindsdb/interfaces/database/integrations.py +220 -234
- mindsdb/interfaces/database/log.py +72 -104
- mindsdb/interfaces/database/projects.py +156 -193
- mindsdb/interfaces/file/file_controller.py +21 -65
- mindsdb/interfaces/knowledge_base/controller.py +63 -10
- mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
- mindsdb/interfaces/skills/skills_controller.py +54 -36
- mindsdb/interfaces/skills/sql_agent.py +109 -86
- mindsdb/interfaces/storage/db.py +223 -79
- mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
- mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
- mindsdb/utilities/config.py +9 -2
- mindsdb/utilities/log.py +35 -26
- mindsdb/utilities/ml_task_queue/task.py +19 -22
- mindsdb/utilities/render/sqlalchemy_render.py +129 -181
- mindsdb/utilities/starters.py +40 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +253 -253
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +69 -61
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
|
@@ -2,23 +2,15 @@ from mindsdb_sql_parser.ast import (
|
|
|
2
2
|
Identifier,
|
|
3
3
|
)
|
|
4
4
|
|
|
5
|
-
from mindsdb.api.executor.planner.steps import
|
|
6
|
-
SaveToTable,
|
|
7
|
-
InsertToTable,
|
|
8
|
-
CreateTableStep
|
|
9
|
-
)
|
|
5
|
+
from mindsdb.api.executor.planner.steps import SaveToTable, InsertToTable, CreateTableStep
|
|
10
6
|
from mindsdb.api.executor.sql_query.result_set import ResultSet, Column
|
|
11
|
-
from mindsdb.api.executor.exceptions import
|
|
12
|
-
NotSupportedYet,
|
|
13
|
-
LogicError
|
|
14
|
-
)
|
|
7
|
+
from mindsdb.api.executor.exceptions import NotSupportedYet, LogicError
|
|
15
8
|
from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
|
|
16
9
|
|
|
17
10
|
from .base import BaseStepCall
|
|
18
11
|
|
|
19
12
|
|
|
20
13
|
class InsertToTableCall(BaseStepCall):
|
|
21
|
-
|
|
22
14
|
bind = InsertToTable
|
|
23
15
|
|
|
24
16
|
def call(self, step):
|
|
@@ -35,16 +27,16 @@ class InsertToTableCall(BaseStepCall):
|
|
|
35
27
|
integration_name = step.table.parts[0]
|
|
36
28
|
table_name = Identifier(parts=step.table.parts[1:])
|
|
37
29
|
else:
|
|
38
|
-
integration_name = self.context[
|
|
30
|
+
integration_name = self.context["database"]
|
|
39
31
|
table_name = step.table
|
|
40
32
|
|
|
41
33
|
dn = self.session.datahub.get(integration_name)
|
|
42
34
|
|
|
43
|
-
if hasattr(dn,
|
|
35
|
+
if hasattr(dn, "create_table") is False:
|
|
44
36
|
raise NotSupportedYet(f"Creating table in '{integration_name}' is not supported")
|
|
45
37
|
|
|
46
38
|
if step.dataframe is not None:
|
|
47
|
-
data = self.steps_data[step.dataframe.
|
|
39
|
+
data = self.steps_data[step.dataframe.step_num]
|
|
48
40
|
elif step.query is not None:
|
|
49
41
|
data = ResultSet()
|
|
50
42
|
if step.query.columns is None:
|
|
@@ -62,7 +54,7 @@ class InsertToTableCall(BaseStepCall):
|
|
|
62
54
|
for row in step.query.values:
|
|
63
55
|
record = []
|
|
64
56
|
for v in row:
|
|
65
|
-
if isinstance(v, Identifier) and v.parts[0] ==
|
|
57
|
+
if isinstance(v, Identifier) and v.parts[0] == "None":
|
|
66
58
|
# Allow explicitly inserting NULL values.
|
|
67
59
|
record.append(None)
|
|
68
60
|
continue
|
|
@@ -72,12 +64,12 @@ class InsertToTableCall(BaseStepCall):
|
|
|
72
64
|
|
|
73
65
|
data.add_raw_values(records)
|
|
74
66
|
else:
|
|
75
|
-
raise LogicError(f
|
|
67
|
+
raise LogicError(f"Data not found for insert: {step}")
|
|
76
68
|
|
|
77
69
|
# del 'service' columns
|
|
78
|
-
for col in data.find_columns(
|
|
70
|
+
for col in data.find_columns("__mindsdb_row_id"):
|
|
79
71
|
data.del_column(col)
|
|
80
|
-
for col in data.find_columns(
|
|
72
|
+
for col in data.find_columns("__mdb_forecast_offset"):
|
|
81
73
|
data.del_column(col)
|
|
82
74
|
|
|
83
75
|
# region del columns filtered at projection step
|
|
@@ -85,7 +77,7 @@ class InsertToTableCall(BaseStepCall):
|
|
|
85
77
|
if columns_list is not None:
|
|
86
78
|
filtered_column_names = [x.name for x in columns_list]
|
|
87
79
|
for col in data.columns:
|
|
88
|
-
if col.name.startswith(
|
|
80
|
+
if col.name.startswith("predictor."):
|
|
89
81
|
continue
|
|
90
82
|
if col.name in filtered_column_names:
|
|
91
83
|
continue
|
|
@@ -101,39 +93,27 @@ class InsertToTableCall(BaseStepCall):
|
|
|
101
93
|
col_names.add(col.alias)
|
|
102
94
|
|
|
103
95
|
response = dn.create_table(
|
|
104
|
-
table_name=table_name,
|
|
105
|
-
result_set=data,
|
|
106
|
-
is_replace=is_replace,
|
|
107
|
-
is_create=is_create,
|
|
108
|
-
params=step.params
|
|
96
|
+
table_name=table_name, result_set=data, is_replace=is_replace, is_create=is_create, params=step.params
|
|
109
97
|
)
|
|
110
98
|
return ResultSet(affected_rows=response.affected_rows)
|
|
111
99
|
|
|
112
100
|
|
|
113
101
|
class SaveToTableCall(InsertToTableCall):
|
|
114
|
-
|
|
115
102
|
bind = SaveToTable
|
|
116
103
|
|
|
117
104
|
|
|
118
105
|
class CreateTableCall(BaseStepCall):
|
|
119
|
-
|
|
120
106
|
bind = CreateTableStep
|
|
121
107
|
|
|
122
108
|
def call(self, step):
|
|
123
|
-
|
|
124
109
|
if len(step.table.parts) > 1:
|
|
125
110
|
integration_name = step.table.parts[0]
|
|
126
111
|
table_name = Identifier(parts=step.table.parts[1:])
|
|
127
112
|
else:
|
|
128
|
-
integration_name = self.context[
|
|
113
|
+
integration_name = self.context["database"]
|
|
129
114
|
table_name = step.table
|
|
130
115
|
|
|
131
116
|
dn = self.session.datahub.get(integration_name)
|
|
132
117
|
|
|
133
|
-
dn.create_table(
|
|
134
|
-
table_name=table_name,
|
|
135
|
-
columns=step.columns,
|
|
136
|
-
is_replace=step.is_replace,
|
|
137
|
-
is_create=True
|
|
138
|
-
)
|
|
118
|
+
dn.create_table(table_name=table_name, columns=step.columns, is_replace=step.is_replace, is_create=True)
|
|
139
119
|
return ResultSet()
|
|
@@ -2,29 +2,22 @@ from collections import defaultdict
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
-
from mindsdb_sql_parser.ast import
|
|
6
|
-
Identifier, Select, Star, Constant, Parameter, Function, Variable, BinaryOperation
|
|
7
|
-
)
|
|
5
|
+
from mindsdb_sql_parser.ast import Identifier, Select, Star, Constant, Parameter, Function, Variable, BinaryOperation
|
|
8
6
|
|
|
9
7
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import SERVER_VARIABLES
|
|
10
|
-
|
|
11
8
|
from mindsdb.api.executor.planner.step_result import Result
|
|
12
9
|
from mindsdb.api.executor.planner.steps import SubSelectStep, QueryStep
|
|
13
|
-
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
|
-
|
|
15
10
|
from mindsdb.api.executor.sql_query.result_set import ResultSet, Column
|
|
16
11
|
from mindsdb.api.executor.utilities.sql import query_df
|
|
17
|
-
|
|
18
|
-
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
19
|
-
|
|
20
12
|
from mindsdb.api.executor.exceptions import KeyColumnDoesNotExist
|
|
13
|
+
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
14
|
+
from mindsdb.interfaces.query_context.context_controller import query_context_controller
|
|
21
15
|
|
|
22
16
|
from .base import BaseStepCall
|
|
23
17
|
from .fetch_dataframe import get_fill_param_fnc
|
|
24
18
|
|
|
25
19
|
|
|
26
20
|
class SubSelectStepCall(BaseStepCall):
|
|
27
|
-
|
|
28
21
|
bind = SubSelectStep
|
|
29
22
|
|
|
30
23
|
def call(self, step):
|
|
@@ -32,12 +25,12 @@ class SubSelectStepCall(BaseStepCall):
|
|
|
32
25
|
|
|
33
26
|
table_name = step.table_name
|
|
34
27
|
if table_name is None:
|
|
35
|
-
table_name =
|
|
28
|
+
table_name = "df_table"
|
|
36
29
|
else:
|
|
37
30
|
table_name = table_name
|
|
38
31
|
|
|
39
32
|
query = step.query
|
|
40
|
-
query.from_table = Identifier(
|
|
33
|
+
query.from_table = Identifier("df_table")
|
|
41
34
|
|
|
42
35
|
if step.add_absent_cols and isinstance(query, Select):
|
|
43
36
|
query_cols = set()
|
|
@@ -64,6 +57,7 @@ class SubSelectStepCall(BaseStepCall):
|
|
|
64
57
|
if isinstance(node, Parameter) and isinstance(node.value, Result):
|
|
65
58
|
prev_result = self.steps_data[node.value.step_num]
|
|
66
59
|
return Constant(prev_result.get_column_values(col_idx=0)[0])
|
|
60
|
+
|
|
67
61
|
query_traversal(query, inject_values)
|
|
68
62
|
|
|
69
63
|
df = result.to_df()
|
|
@@ -76,7 +70,6 @@ class SubSelectStepCall(BaseStepCall):
|
|
|
76
70
|
|
|
77
71
|
|
|
78
72
|
class QueryStepCall(BaseStepCall):
|
|
79
|
-
|
|
80
73
|
bind = QueryStep
|
|
81
74
|
|
|
82
75
|
def call(self, step: QueryStep):
|
|
@@ -103,6 +96,15 @@ class QueryStepCall(BaseStepCall):
|
|
|
103
96
|
if col.table_name != col.table_alias:
|
|
104
97
|
tbl_idx[col.table_alias].append(name)
|
|
105
98
|
|
|
99
|
+
lower_col_idx = {}
|
|
100
|
+
for key, value in col_idx.items():
|
|
101
|
+
if isinstance(key, int):
|
|
102
|
+
key = str(key)
|
|
103
|
+
if isinstance(key, str):
|
|
104
|
+
lower_col_idx[key.lower()] = value
|
|
105
|
+
continue
|
|
106
|
+
lower_col_idx[tuple(str(x).lower() for x in key)] = value
|
|
107
|
+
|
|
106
108
|
# get aliases of first level
|
|
107
109
|
aliases = []
|
|
108
110
|
for col in query.targets:
|
|
@@ -120,7 +122,8 @@ class QueryStepCall(BaseStepCall):
|
|
|
120
122
|
"user": self.session.username,
|
|
121
123
|
"version": "8.0.17",
|
|
122
124
|
"current_schema": "public",
|
|
123
|
-
"
|
|
125
|
+
"schema": "public",
|
|
126
|
+
"connection_id": self.context.get("connection_id"),
|
|
124
127
|
}
|
|
125
128
|
if function_name in functions_results:
|
|
126
129
|
return Constant(functions_results[function_name], alias=Identifier(parts=[function_name]))
|
|
@@ -144,14 +147,11 @@ class QueryStepCall(BaseStepCall):
|
|
|
144
147
|
else:
|
|
145
148
|
# replace with all columns from table
|
|
146
149
|
table_name = node.parts[-2]
|
|
147
|
-
return [
|
|
148
|
-
Identifier(parts=[col])
|
|
149
|
-
for col in tbl_idx.get(table_name, [])
|
|
150
|
-
]
|
|
150
|
+
return [Identifier(parts=[col]) for col in tbl_idx.get(table_name, [])]
|
|
151
151
|
|
|
152
152
|
if node.parts[-1].lower() == "session_user":
|
|
153
153
|
return Constant(self.session.username, alias=node)
|
|
154
|
-
if node.parts[-1].lower() ==
|
|
154
|
+
if node.parts[-1].lower() == "$$":
|
|
155
155
|
# NOTE: sinve version 9.0 mysql client sends query 'select $$'.
|
|
156
156
|
# Connection can be continued only if answer is parse error.
|
|
157
157
|
raise ValueError(
|
|
@@ -159,23 +159,31 @@ class QueryStepCall(BaseStepCall):
|
|
|
159
159
|
"version for the right syntax to use near '$$' at line 1"
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
else:
|
|
168
|
-
table_name = node.parts[-2]
|
|
169
|
-
key = (table_name, col_name)
|
|
162
|
+
match node.parts, node.is_quoted:
|
|
163
|
+
case [column_name], [column_quoted]:
|
|
164
|
+
if column_name in aliases:
|
|
165
|
+
# key is defined as alias
|
|
166
|
+
return
|
|
170
167
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
168
|
+
key = column_name if column_quoted else column_name.lower()
|
|
169
|
+
|
|
170
|
+
if key not in col_idx and key not in lower_col_idx:
|
|
171
|
+
# it can be local alias of a query, like:
|
|
172
|
+
# SELECT t1.a + t2.a col1, min(t1.a) c
|
|
173
|
+
# FROM dummy_data.tbl1 as t1
|
|
174
|
+
# JOIN pg.tbl2 as t2 on t1.c=t2.c
|
|
175
|
+
# group by col1
|
|
176
|
+
# order by c -- <--- "с" is alias
|
|
177
|
+
return
|
|
178
|
+
case [*_, table_name, column_name], [*_, column_quoted]:
|
|
179
|
+
key = (table_name, column_name) if column_quoted else (table_name.lower(), column_name.lower())
|
|
180
|
+
|
|
181
|
+
search_idx = col_idx if column_quoted else lower_col_idx
|
|
175
182
|
|
|
176
|
-
|
|
183
|
+
if key not in search_idx:
|
|
184
|
+
raise KeyColumnDoesNotExist(f"Table not found for column: {key}")
|
|
177
185
|
|
|
178
|
-
new_name =
|
|
186
|
+
new_name = search_idx[key]
|
|
179
187
|
return Identifier(parts=[new_name], alias=node.alias)
|
|
180
188
|
|
|
181
189
|
# fill params
|
|
@@ -196,14 +204,14 @@ class QueryStepCall(BaseStepCall):
|
|
|
196
204
|
if key not in col_idx:
|
|
197
205
|
# exclude
|
|
198
206
|
node.args = [Constant(0), Constant(0)]
|
|
199
|
-
node.op =
|
|
207
|
+
node.op = "="
|
|
200
208
|
|
|
201
209
|
query_traversal(query.where, remove_not_used_conditions)
|
|
202
210
|
|
|
203
211
|
query_traversal(query, check_fields)
|
|
204
212
|
query.where = query_context_controller.remove_lasts(query.where)
|
|
205
213
|
|
|
206
|
-
query.from_table = Identifier(
|
|
214
|
+
query.from_table = Identifier("df_table")
|
|
207
215
|
res = query_df(df, query, session=self.session)
|
|
208
216
|
|
|
209
217
|
return ResultSet.from_df_cols(df=res, columns_dict=col_names, strict=False)
|
|
@@ -8,10 +8,7 @@ import numpy as np
|
|
|
8
8
|
from mindsdb_sql_parser import parse_sql
|
|
9
9
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
10
10
|
from mindsdb.integrations.utilities.query_traversal import query_traversal
|
|
11
|
-
from mindsdb_sql_parser.ast import
|
|
12
|
-
ASTNode, Select, Identifier,
|
|
13
|
-
Function, Constant
|
|
14
|
-
)
|
|
11
|
+
from mindsdb_sql_parser.ast import ASTNode, Select, Identifier, Function, Constant
|
|
15
12
|
from mindsdb.utilities.functions import resolve_table_identifier, resolve_model_identifier
|
|
16
13
|
|
|
17
14
|
from mindsdb.utilities import log
|
|
@@ -53,37 +50,38 @@ def get_query_models(query: ASTNode, default_database: str = None) -> List[tuple
|
|
|
53
50
|
|
|
54
51
|
|
|
55
52
|
def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_functions=None):
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
"""Duckdb need to infer column types if column.dtype == object. By default it take 1000 rows,
|
|
54
|
+
but that may be not sufficient for some cases. This func try to run query multiple times
|
|
55
|
+
increasing butch size for type infer
|
|
59
56
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
Args:
|
|
58
|
+
query_str (str): query to execute
|
|
59
|
+
dataframes (dict): dataframes
|
|
60
|
+
user_functions: functions controller which register new functions in connection
|
|
64
61
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
62
|
+
Returns:
|
|
63
|
+
pandas.DataFrame
|
|
64
|
+
pandas.columns
|
|
65
|
+
"""
|
|
69
66
|
|
|
70
67
|
for name, value in dataframes.items():
|
|
71
68
|
locals()[name] = value
|
|
72
69
|
|
|
73
|
-
con = duckdb.connect(database=
|
|
70
|
+
con = duckdb.connect(database=":memory:")
|
|
74
71
|
if user_functions:
|
|
75
72
|
user_functions.register(con)
|
|
76
73
|
|
|
74
|
+
exception = None
|
|
77
75
|
for sample_size in [1000, 10000, 1000000]:
|
|
78
76
|
try:
|
|
79
|
-
con.execute(f
|
|
77
|
+
con.execute(f"set global pandas_analyze_sample={sample_size};")
|
|
80
78
|
result_df = con.execute(query_str).fetchdf()
|
|
81
|
-
except InvalidInputException:
|
|
82
|
-
|
|
79
|
+
except InvalidInputException as e:
|
|
80
|
+
exception = e
|
|
83
81
|
else:
|
|
84
82
|
break
|
|
85
83
|
else:
|
|
86
|
-
raise
|
|
84
|
+
raise exception
|
|
87
85
|
description = con.description
|
|
88
86
|
con.close()
|
|
89
87
|
|
|
@@ -91,14 +89,14 @@ def query_df_with_type_infer_fallback(query_str: str, dataframes: dict, user_fun
|
|
|
91
89
|
|
|
92
90
|
|
|
93
91
|
def query_df(df, query, session=None):
|
|
94
|
-
"""
|
|
92
|
+
"""Perform simple query ('select' from one table, without subqueries and joins) on DataFrame.
|
|
95
93
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
94
|
+
Args:
|
|
95
|
+
df (pandas.DataFrame): data
|
|
96
|
+
query (mindsdb_sql_parser.ast.Select | str): select query
|
|
99
97
|
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
Returns:
|
|
99
|
+
pandas.DataFrame
|
|
102
100
|
"""
|
|
103
101
|
|
|
104
102
|
if isinstance(query, str):
|
|
@@ -106,14 +104,11 @@ def query_df(df, query, session=None):
|
|
|
106
104
|
else:
|
|
107
105
|
query_ast = copy.deepcopy(query)
|
|
108
106
|
|
|
109
|
-
if isinstance(query_ast, Select) is False
|
|
110
|
-
|
|
111
|
-
raise Exception(
|
|
112
|
-
"Only 'SELECT from TABLE' statements supported for internal query"
|
|
113
|
-
)
|
|
107
|
+
if isinstance(query_ast, Select) is False or isinstance(query_ast.from_table, Identifier) is False:
|
|
108
|
+
raise Exception("Only 'SELECT from TABLE' statements supported for internal query")
|
|
114
109
|
|
|
115
110
|
table_name = query_ast.from_table.parts[0]
|
|
116
|
-
query_ast.from_table.parts = [
|
|
111
|
+
query_ast.from_table.parts = ["df"]
|
|
117
112
|
|
|
118
113
|
json_columns = set()
|
|
119
114
|
|
|
@@ -131,18 +126,18 @@ def query_df(df, query, session=None):
|
|
|
131
126
|
return node
|
|
132
127
|
if isinstance(node, Function):
|
|
133
128
|
fnc_name = node.op.lower()
|
|
134
|
-
if fnc_name ==
|
|
129
|
+
if fnc_name == "database" and len(node.args) == 0:
|
|
135
130
|
if session is not None:
|
|
136
131
|
cur_db = session.database
|
|
137
132
|
else:
|
|
138
133
|
cur_db = None
|
|
139
134
|
return Constant(cur_db)
|
|
140
|
-
elif fnc_name ==
|
|
135
|
+
elif fnc_name == "truncate":
|
|
141
136
|
# replace mysql 'truncate' function to duckdb 'round'
|
|
142
|
-
node.op =
|
|
137
|
+
node.op = "round"
|
|
143
138
|
if len(node.args) == 1:
|
|
144
139
|
node.args.append(0)
|
|
145
|
-
elif fnc_name ==
|
|
140
|
+
elif fnc_name == "json_extract":
|
|
146
141
|
json_columns.add(node.args[0].parts[-1])
|
|
147
142
|
else:
|
|
148
143
|
if user_functions is not None:
|
|
@@ -160,28 +155,27 @@ def query_df(df, query, session=None):
|
|
|
160
155
|
except Exception:
|
|
161
156
|
pass
|
|
162
157
|
return v
|
|
158
|
+
|
|
163
159
|
for column in json_columns:
|
|
164
160
|
df[column] = df[column].apply(_convert)
|
|
165
161
|
|
|
166
|
-
render = SqlalchemyRender(
|
|
162
|
+
render = SqlalchemyRender("postgres")
|
|
167
163
|
try:
|
|
168
164
|
query_str = render.get_string(query_ast, with_failback=False)
|
|
169
165
|
except Exception as e:
|
|
170
|
-
logger.error(
|
|
171
|
-
f"Exception during query casting to 'postgres' dialect. Query: {str(query)}. Error: {e}"
|
|
172
|
-
)
|
|
166
|
+
logger.error(f"Exception during query casting to 'postgres' dialect. Query: {str(query)}. Error: {e}")
|
|
173
167
|
query_str = render.get_string(query_ast, with_failback=True)
|
|
174
168
|
|
|
175
169
|
# workaround to prevent duckdb.TypeMismatchException
|
|
176
170
|
if len(df) > 0:
|
|
177
|
-
if table_name.lower() in (
|
|
178
|
-
if
|
|
179
|
-
df = df.astype({
|
|
180
|
-
if table_name.lower() ==
|
|
181
|
-
if
|
|
182
|
-
df = df.astype({
|
|
183
|
-
|
|
184
|
-
result_df, description = query_df_with_type_infer_fallback(query_str, {
|
|
171
|
+
if table_name.lower() in ("models", "predictors"):
|
|
172
|
+
if "TRAINING_OPTIONS" in df.columns:
|
|
173
|
+
df = df.astype({"TRAINING_OPTIONS": "string"})
|
|
174
|
+
if table_name.lower() == "ml_engines":
|
|
175
|
+
if "CONNECTION_DATA" in df.columns:
|
|
176
|
+
df = df.astype({"CONNECTION_DATA": "string"})
|
|
177
|
+
|
|
178
|
+
result_df, description = query_df_with_type_infer_fallback(query_str, {"df": df}, user_functions=user_functions)
|
|
185
179
|
result_df.replace({np.nan: None}, inplace=True)
|
|
186
180
|
result_df.columns = [x[0] for x in description]
|
|
187
181
|
return result_df
|
|
@@ -28,7 +28,7 @@ class GetConfig(Resource):
|
|
|
28
28
|
def get(self):
|
|
29
29
|
config = Config()
|
|
30
30
|
resp = {"auth": {"http_auth_enabled": config["auth"]["http_auth_enabled"]}}
|
|
31
|
-
for key in ["default_llm", "default_embedding_model", "default_reranking_model"]:
|
|
31
|
+
for key in ["default_llm", "default_embedding_model", "default_reranking_model", "a2a"]:
|
|
32
32
|
value = config.get(key)
|
|
33
33
|
if value is not None:
|
|
34
34
|
resp[key] = value
|
|
@@ -18,6 +18,7 @@ from mindsdb.utilities.context import context as ctx
|
|
|
18
18
|
from mindsdb.utilities import log
|
|
19
19
|
from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
|
|
20
20
|
from mindsdb.utilities.fs import safe_extract
|
|
21
|
+
from mindsdb.integrations.utilities.files.file_reader import FileProcessingError
|
|
21
22
|
|
|
22
23
|
logger = log.getLogger(__name__)
|
|
23
24
|
MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb
|
|
@@ -26,7 +27,7 @@ MAX_FILE_SIZE = 1024 * 1024 * 100 # 100Mb
|
|
|
26
27
|
@ns_conf.route("/")
|
|
27
28
|
class FilesList(Resource):
|
|
28
29
|
@ns_conf.doc("get_files_list")
|
|
29
|
-
@api_endpoint_metrics(
|
|
30
|
+
@api_endpoint_metrics("GET", "/files")
|
|
30
31
|
def get(self):
|
|
31
32
|
"""List all files"""
|
|
32
33
|
return ca.file_controller.get_files()
|
|
@@ -36,7 +37,7 @@ class FilesList(Resource):
|
|
|
36
37
|
@ns_conf.param("name", "MindsDB's name for file")
|
|
37
38
|
class File(Resource):
|
|
38
39
|
@ns_conf.doc("put_file")
|
|
39
|
-
@api_endpoint_metrics(
|
|
40
|
+
@api_endpoint_metrics("PUT", "/files/file")
|
|
40
41
|
def put(self, name: str):
|
|
41
42
|
"""add new file
|
|
42
43
|
params in FormData:
|
|
@@ -105,15 +106,13 @@ class File(Resource):
|
|
|
105
106
|
if data.get("source_type") == "url":
|
|
106
107
|
url = data["source"]
|
|
107
108
|
config = Config()
|
|
108
|
-
allowed_urls = config.get(
|
|
109
|
+
allowed_urls = config.get("file_upload_domains", [])
|
|
109
110
|
if allowed_urls and not validate_urls(url, allowed_urls):
|
|
110
111
|
return http_error(400, "Invalid File URL source.", f"Allowed hosts are: {', '.join(allowed_urls)}.")
|
|
111
112
|
data["file"] = clear_filename(data["name"])
|
|
112
113
|
is_cloud = config.get("cloud", False)
|
|
113
114
|
if is_cloud and is_private_url(url):
|
|
114
|
-
return http_error(
|
|
115
|
-
400, f'URL is private: {url}'
|
|
116
|
-
)
|
|
115
|
+
return http_error(400, f"URL is private: {url}")
|
|
117
116
|
|
|
118
117
|
if is_cloud is True and ctx.user_class != 1:
|
|
119
118
|
info = requests.head(url)
|
|
@@ -130,14 +129,10 @@ class File(Resource):
|
|
|
130
129
|
"Сan't determine remote file size",
|
|
131
130
|
)
|
|
132
131
|
if file_size > MAX_FILE_SIZE:
|
|
133
|
-
return http_error(
|
|
134
|
-
400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB"
|
|
135
|
-
)
|
|
132
|
+
return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
|
|
136
133
|
with requests.get(url, stream=True) as r:
|
|
137
134
|
if r.status_code != 200:
|
|
138
|
-
return http_error(
|
|
139
|
-
400, "Error getting file", f"Got status code: {r.status_code}"
|
|
140
|
-
)
|
|
135
|
+
return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
|
|
141
136
|
file_path = os.path.join(temp_dir_path, data["file"])
|
|
142
137
|
with open(file_path, "wb") as f:
|
|
143
138
|
for chunk in r.iter_content(chunk_size=8192):
|
|
@@ -158,30 +153,26 @@ class File(Resource):
|
|
|
158
153
|
files = os.listdir(temp_dir_path)
|
|
159
154
|
if len(files) != 1:
|
|
160
155
|
os.rmdir(temp_dir_path)
|
|
161
|
-
return http_error(
|
|
162
|
-
400, "Wrong content.", "Archive must contain only one data file."
|
|
163
|
-
)
|
|
156
|
+
return http_error(400, "Wrong content.", "Archive must contain only one data file.")
|
|
164
157
|
file_path = os.path.join(temp_dir_path, files[0])
|
|
165
158
|
mindsdb_file_name = files[0]
|
|
166
159
|
if not os.path.isfile(file_path):
|
|
167
160
|
os.rmdir(temp_dir_path)
|
|
168
|
-
return http_error(
|
|
169
|
-
400, "Wrong content.", "Archive must contain data file in root."
|
|
170
|
-
)
|
|
161
|
+
return http_error(400, "Wrong content.", "Archive must contain data file in root.")
|
|
171
162
|
|
|
172
163
|
try:
|
|
173
|
-
ca.file_controller.save_file(
|
|
174
|
-
|
|
175
|
-
)
|
|
164
|
+
ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name)
|
|
165
|
+
except FileProcessingError as e:
|
|
166
|
+
return http_error(400, "Error", str(e))
|
|
176
167
|
except Exception as e:
|
|
177
|
-
return http_error(500,
|
|
168
|
+
return http_error(500, "Error", str(e))
|
|
178
169
|
finally:
|
|
179
170
|
shutil.rmtree(temp_dir_path, ignore_errors=True)
|
|
180
171
|
|
|
181
172
|
return "", 200
|
|
182
173
|
|
|
183
174
|
@ns_conf.doc("delete_file")
|
|
184
|
-
@api_endpoint_metrics(
|
|
175
|
+
@api_endpoint_metrics("DELETE", "/files/file")
|
|
185
176
|
def delete(self, name: str):
|
|
186
177
|
"""delete file"""
|
|
187
178
|
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* permission of MindsDB Inc
|
|
9
9
|
*******************************************************
|
|
10
10
|
"""
|
|
11
|
+
|
|
11
12
|
import struct
|
|
12
13
|
|
|
13
14
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import (
|
|
@@ -25,16 +26,16 @@ NULL_VALUE_INT = ord(NULL_VALUE)
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class Datum:
|
|
28
|
-
__slots__ = [
|
|
29
|
+
__slots__ = ["value", "var_type", "var_len"]
|
|
29
30
|
|
|
30
31
|
def __init__(self, var_type, value=None, var_len=None):
|
|
31
32
|
# TODO other types: float, timestamp
|
|
32
33
|
self.value = b""
|
|
33
34
|
|
|
34
35
|
if var_len is None:
|
|
35
|
-
idx = var_type.find(
|
|
36
|
-
var_len = var_type[idx + 1: -1]
|
|
37
|
-
var_type = var_type[:
|
|
36
|
+
idx = var_type.find("<")
|
|
37
|
+
var_len = var_type[idx + 1 : -1]
|
|
38
|
+
var_type = var_type[:idx]
|
|
38
39
|
self.var_type = var_type
|
|
39
40
|
self.var_len = var_len
|
|
40
41
|
|
|
@@ -128,7 +129,7 @@ class Datum:
|
|
|
128
129
|
return self.get_serializer()(self.value)
|
|
129
130
|
|
|
130
131
|
def get_serializer(self):
|
|
131
|
-
if self.var_type
|
|
132
|
+
if self.var_type in ("string", "byte"):
|
|
132
133
|
if self.var_len == "lenenc":
|
|
133
134
|
if isinstance(self.value, bytes):
|
|
134
135
|
return self.serialize_bytes
|
|
@@ -140,15 +141,13 @@ class Datum:
|
|
|
140
141
|
if self.var_len == "packet":
|
|
141
142
|
return lambda v: v.get_packet_string()
|
|
142
143
|
else:
|
|
143
|
-
return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[
|
|
144
|
-
:int(self.var_len)
|
|
145
|
-
]
|
|
144
|
+
return lambda v: struct.pack(self.var_len + "s", bytes(v, "utf-8"))[: int(self.var_len)]
|
|
146
145
|
|
|
147
146
|
if self.var_type == "int":
|
|
148
147
|
if self.var_len == "lenenc":
|
|
149
148
|
return self.serialize_int
|
|
150
149
|
else:
|
|
151
|
-
return lambda v: struct.pack("Q", v)[:int(self.var_len)]
|
|
150
|
+
return lambda v: struct.pack("Q", v)[: int(self.var_len)]
|
|
152
151
|
|
|
153
152
|
@classmethod
|
|
154
153
|
def serialize_str_eof(cls, value):
|
|
@@ -157,9 +156,7 @@ class Datum:
|
|
|
157
156
|
if length == 0:
|
|
158
157
|
return b""
|
|
159
158
|
else:
|
|
160
|
-
return struct.pack(
|
|
161
|
-
"{len}s".format(len=var_len), bytes(value, "utf-8")
|
|
162
|
-
)[:length]
|
|
159
|
+
return struct.pack("{len}s".format(len=var_len), bytes(value, "utf-8"))[:length]
|
|
163
160
|
|
|
164
161
|
# def serialize_obj(self, value):
|
|
165
162
|
# return self.serialize_str(str(value))
|
|
@@ -170,7 +167,6 @@ class Datum:
|
|
|
170
167
|
|
|
171
168
|
@classmethod
|
|
172
169
|
def serialize_bytes(cls, value):
|
|
173
|
-
|
|
174
170
|
val_len = len(value)
|
|
175
171
|
|
|
176
172
|
if val_len == 0:
|
|
@@ -181,23 +177,11 @@ class Datum:
|
|
|
181
177
|
|
|
182
178
|
byte_count = -(val_len.bit_length() // (-8))
|
|
183
179
|
if byte_count <= 2:
|
|
184
|
-
return (
|
|
185
|
-
TWO_BYTE_ENC
|
|
186
|
-
+ struct.pack("H", val_len)
|
|
187
|
-
+ value
|
|
188
|
-
)
|
|
180
|
+
return TWO_BYTE_ENC + struct.pack("H", val_len) + value
|
|
189
181
|
if byte_count <= 3:
|
|
190
|
-
return (
|
|
191
|
-
THREE_BYTE_ENC
|
|
192
|
-
+ struct.pack("i", val_len)[:3]
|
|
193
|
-
+ value
|
|
194
|
-
)
|
|
182
|
+
return THREE_BYTE_ENC + struct.pack("i", val_len)[:3] + value
|
|
195
183
|
if byte_count <= 8:
|
|
196
|
-
return (
|
|
197
|
-
THREE_BYTE_ENC
|
|
198
|
-
+ struct.pack("Q", val_len)
|
|
199
|
-
+ value
|
|
200
|
-
)
|
|
184
|
+
return THREE_BYTE_ENC + struct.pack("Q", val_len) + value
|
|
201
185
|
|
|
202
186
|
|
|
203
187
|
def test():
|