MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/langchain_agent.py +7 -5
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
- mindsdb/interfaces/knowledge_base/controller.py +115 -89
- mindsdb/interfaces/knowledge_base/evaluate.py +16 -4
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +12 -1
- mindsdb/utilities/exception.py +47 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +248 -262
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +46 -45
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = "MindsDB"
|
|
2
2
|
__package_name__ = "mindsdb"
|
|
3
|
-
__version__ = "25.
|
|
3
|
+
__version__ = "25.7.1.0"
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = "MindsDB Inc"
|
|
@@ -36,6 +36,8 @@ from mindsdb_sql_parser.ast import (
|
|
|
36
36
|
Tuple,
|
|
37
37
|
Function,
|
|
38
38
|
Variable,
|
|
39
|
+
Intersect,
|
|
40
|
+
Except,
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
# typed models
|
|
@@ -580,9 +582,6 @@ class ExecuteCommands:
|
|
|
580
582
|
return ret
|
|
581
583
|
query = SQLQuery(statement, session=self.session, database=database_name)
|
|
582
584
|
return self.answer_select(query)
|
|
583
|
-
elif statement_type is Union:
|
|
584
|
-
query = SQLQuery(statement, session=self.session, database=database_name)
|
|
585
|
-
return self.answer_select(query)
|
|
586
585
|
elif statement_type is Explain:
|
|
587
586
|
return self.answer_show_columns(statement.target, database_name=database_name)
|
|
588
587
|
elif statement_type is CreateTable:
|
|
@@ -627,6 +626,9 @@ class ExecuteCommands:
|
|
|
627
626
|
return self.answer_create_kb_index(statement, database_name)
|
|
628
627
|
elif statement_type is EvaluateKnowledgeBase:
|
|
629
628
|
return self.answer_evaluate_kb(statement, database_name)
|
|
629
|
+
elif statement_type in (Union, Intersect, Except):
|
|
630
|
+
query = SQLQuery(statement, session=self.session, database=database_name)
|
|
631
|
+
return self.answer_select(query)
|
|
630
632
|
else:
|
|
631
633
|
logger.warning(f"Unknown SQL statement: {sql}")
|
|
632
634
|
raise NotSupportedYet(f"Unknown SQL statement: {sql}")
|
|
@@ -1554,9 +1556,9 @@ class ExecuteCommands:
|
|
|
1554
1556
|
if is_full:
|
|
1555
1557
|
targets.extend(
|
|
1556
1558
|
[
|
|
1557
|
-
Constant(
|
|
1558
|
-
Constant("
|
|
1559
|
-
Constant(
|
|
1559
|
+
Constant(None, alias=Identifier("Collation")),
|
|
1560
|
+
Constant("select", alias=Identifier("Privileges")),
|
|
1561
|
+
Constant(None, alias=Identifier("Comment")),
|
|
1560
1562
|
]
|
|
1561
1563
|
)
|
|
1562
1564
|
new_statement = Select(
|
|
@@ -177,7 +177,7 @@ class InformationSchemaDataNode(DataNode):
|
|
|
177
177
|
if table_name not in self.tables:
|
|
178
178
|
raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
|
|
179
179
|
table_columns_names = self.tables[table_name].columns
|
|
180
|
-
df = pd.DataFrame(
|
|
180
|
+
df = pd.DataFrame(pd.Series(table_columns_names, name=INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME))
|
|
181
181
|
for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
|
|
182
182
|
if column_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME:
|
|
183
183
|
continue
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import inspect
|
|
3
|
-
from textwrap import dedent
|
|
4
3
|
from dataclasses import astuple
|
|
5
4
|
from typing import Iterable, List
|
|
6
5
|
|
|
@@ -21,6 +20,7 @@ from mindsdb.integrations.utilities.utils import get_class_name
|
|
|
21
20
|
from mindsdb.metrics import metrics
|
|
22
21
|
from mindsdb.utilities import log
|
|
23
22
|
from mindsdb.utilities.profiler import profiler
|
|
23
|
+
from mindsdb.utilities.exception import format_db_error_message
|
|
24
24
|
from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type
|
|
25
25
|
|
|
26
26
|
logger = log.getLogger(__name__)
|
|
@@ -244,18 +244,16 @@ class IntegrationDataNode(DataNode):
|
|
|
244
244
|
failed_sql_query = native_query
|
|
245
245
|
if query is not None:
|
|
246
246
|
failed_sql_query = query.to_string()
|
|
247
|
+
|
|
247
248
|
raise Exception(
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
Error: {result.error_message}
|
|
256
|
-
Failed Query: {failed_sql_query}
|
|
257
|
-
""")
|
|
249
|
+
format_db_error_message(
|
|
250
|
+
db_name=self.integration_handler.name,
|
|
251
|
+
db_type=self.integration_handler.__class__.name,
|
|
252
|
+
db_error_msg=result.error_message,
|
|
253
|
+
failed_query=failed_sql_query,
|
|
254
|
+
)
|
|
258
255
|
)
|
|
256
|
+
|
|
259
257
|
if result.type == RESPONSE_TYPE.OK:
|
|
260
258
|
return DataHubResponse(affected_rows=result.affected_rows)
|
|
261
259
|
|
|
@@ -623,7 +623,7 @@ class MetaColumnStatisticsTable(Table):
|
|
|
623
623
|
columns = record.meta_columns
|
|
624
624
|
|
|
625
625
|
for column in columns:
|
|
626
|
-
column_statistics = column.meta_column_statistics[0]
|
|
626
|
+
column_statistics = column.meta_column_statistics[0] if column.meta_column_statistics else None
|
|
627
627
|
|
|
628
628
|
item = {
|
|
629
629
|
"TABLE_SCHEMA": database_name,
|
|
@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
|
|
|
8
8
|
|
|
9
9
|
def to_string(identifier):
|
|
10
10
|
# alternative to AST.to_string() but without quoting
|
|
11
|
-
return
|
|
11
|
+
return ".".join(identifier.parts)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Table:
|
|
@@ -32,7 +32,6 @@ class Column:
|
|
|
32
32
|
def __init__(self, node=None, table=None, name=None, type=None):
|
|
33
33
|
alias = None
|
|
34
34
|
if node is not None:
|
|
35
|
-
|
|
36
35
|
if isinstance(node, ast.Identifier):
|
|
37
36
|
# set name
|
|
38
37
|
name = node.parts[-1] # ???
|
|
@@ -67,26 +66,25 @@ class Statement:
|
|
|
67
66
|
self.offset = 0
|
|
68
67
|
|
|
69
68
|
|
|
70
|
-
class PreparedStatementPlanner
|
|
71
|
-
|
|
69
|
+
class PreparedStatementPlanner:
|
|
72
70
|
def __init__(self, planner):
|
|
73
71
|
self.planner = planner
|
|
74
72
|
|
|
75
73
|
def get_type_of_var(self, v):
|
|
76
74
|
if isinstance(v, str):
|
|
77
|
-
return
|
|
75
|
+
return "str"
|
|
78
76
|
elif isinstance(v, float):
|
|
79
|
-
return
|
|
77
|
+
return "float"
|
|
80
78
|
elif isinstance(v, int):
|
|
81
|
-
return
|
|
79
|
+
return "integer"
|
|
82
80
|
|
|
83
|
-
return
|
|
81
|
+
return "str"
|
|
84
82
|
|
|
85
83
|
def get_statement_info(self):
|
|
86
84
|
stmt = self.planner.statement
|
|
87
85
|
|
|
88
86
|
if stmt is None:
|
|
89
|
-
raise PlanningException(
|
|
87
|
+
raise PlanningException("Statement is not prepared")
|
|
90
88
|
|
|
91
89
|
columns_result = []
|
|
92
90
|
|
|
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
|
|
|
95
93
|
if column.table is not None:
|
|
96
94
|
table = column.table.name
|
|
97
95
|
ds = column.table.ds
|
|
98
|
-
columns_result.append(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
96
|
+
columns_result.append(
|
|
97
|
+
dict(
|
|
98
|
+
alias=column.alias,
|
|
99
|
+
type=column.type,
|
|
100
|
+
name=column.name,
|
|
101
|
+
table_name=table,
|
|
102
|
+
table_alias=table,
|
|
103
|
+
ds=ds,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
106
|
|
|
107
107
|
parameters = []
|
|
108
108
|
for param in stmt.params:
|
|
109
|
-
name =
|
|
110
|
-
parameters.append(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
'parameters': parameters,
|
|
118
|
-
'columns': columns_result
|
|
119
|
-
}
|
|
109
|
+
name = "?"
|
|
110
|
+
parameters.append(
|
|
111
|
+
dict(
|
|
112
|
+
alias=name,
|
|
113
|
+
type="str",
|
|
114
|
+
name=name,
|
|
115
|
+
)
|
|
116
|
+
)
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
return {"parameters": parameters, "columns": columns_result}
|
|
122
119
|
|
|
120
|
+
def get_table_of_column(self, t):
|
|
123
121
|
tables_map = self.planner.statement.tables_map
|
|
124
122
|
|
|
125
123
|
# get tables to check
|
|
126
124
|
if len(t.parts) > 1:
|
|
127
125
|
# try to find table
|
|
128
126
|
table_parts = t.parts[:-1]
|
|
129
|
-
table_name =
|
|
127
|
+
table_name = ".".join(table_parts)
|
|
130
128
|
if table_name in tables_map:
|
|
131
129
|
return tables_map[table_name]
|
|
132
130
|
|
|
133
131
|
elif len(table_parts) > 1:
|
|
134
132
|
# maybe datasource is 1st part
|
|
135
133
|
table_parts = table_parts[1:]
|
|
136
|
-
table_name =
|
|
134
|
+
table_name = ".".join(table_parts)
|
|
137
135
|
if table_name in tables_map:
|
|
138
136
|
return tables_map[table_name]
|
|
139
137
|
|
|
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
|
|
|
158
156
|
# in reverse order
|
|
159
157
|
for p in table.parts[::-1]:
|
|
160
158
|
parts.insert(0, p)
|
|
161
|
-
keys.append(
|
|
159
|
+
keys.append(".".join(parts))
|
|
162
160
|
|
|
163
161
|
# remember table
|
|
164
|
-
tbl = Table(
|
|
165
|
-
ds=ds,
|
|
166
|
-
node=table,
|
|
167
|
-
is_predictor=is_predictor
|
|
168
|
-
)
|
|
162
|
+
tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
|
|
169
163
|
tbl.keys = keys
|
|
170
164
|
|
|
171
165
|
return tbl
|
|
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
|
|
|
189
183
|
stmt.tables_map = {}
|
|
190
184
|
stmt.tables_lvl1 = []
|
|
191
185
|
if query.from_table is not None:
|
|
192
|
-
|
|
193
186
|
if isinstance(query.from_table, ast.Join):
|
|
194
187
|
# get all tables
|
|
195
188
|
join_tables = utils.convert_join_to_list(query.from_table)
|
|
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
|
|
|
198
191
|
|
|
199
192
|
if isinstance(query.from_table, ast.Select):
|
|
200
193
|
# nested select, get only last select
|
|
201
|
-
join_tables = [
|
|
202
|
-
dict(
|
|
203
|
-
table=utils.get_deepest_select(query.from_table).from_table
|
|
204
|
-
)
|
|
205
|
-
]
|
|
194
|
+
join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
|
|
206
195
|
|
|
207
196
|
for i, join_table in enumerate(join_tables):
|
|
208
|
-
table = join_table[
|
|
197
|
+
table = join_table["table"]
|
|
209
198
|
if isinstance(table, ast.Identifier):
|
|
210
199
|
tbl = self.table_from_identifier(table)
|
|
211
200
|
|
|
212
201
|
if tbl.is_predictor:
|
|
213
202
|
# Is the last table?
|
|
214
203
|
if i + 1 < len(join_tables):
|
|
215
|
-
raise PlanningException(
|
|
204
|
+
raise PlanningException("Predictor must be last table in query")
|
|
216
205
|
|
|
217
206
|
stmt.tables_lvl1.append(tbl)
|
|
218
207
|
for key in tbl.keys:
|
|
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
|
|
|
225
214
|
# is there any predictors at other levels?
|
|
226
215
|
lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
|
|
227
216
|
if len(query_predictors) != len(lvl1_predictors):
|
|
228
|
-
raise PlanningException(
|
|
217
|
+
raise PlanningException("Predictor is not at first level")
|
|
229
218
|
|
|
230
219
|
# === get targets ===
|
|
231
220
|
columns = []
|
|
232
221
|
get_all_tables = False
|
|
233
222
|
for t in query.targets:
|
|
234
|
-
|
|
235
223
|
column = Column(t)
|
|
236
224
|
|
|
237
225
|
# column alias
|
|
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
|
|
|
264
252
|
column.type = self.get_type_of_var(t.value)
|
|
265
253
|
elif isinstance(t, ast.Function):
|
|
266
254
|
# mysql function
|
|
267
|
-
if t.op ==
|
|
268
|
-
column.type =
|
|
255
|
+
if t.op == "connection_id":
|
|
256
|
+
column.type = "integer"
|
|
269
257
|
else:
|
|
270
|
-
column.type =
|
|
258
|
+
column.type = "str"
|
|
271
259
|
else:
|
|
272
260
|
# TODO go down into lower level.
|
|
273
261
|
# It can be function, operation, select.
|
|
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
|
|
|
276
264
|
# TODO add several known types for function, i.e ABS-int
|
|
277
265
|
|
|
278
266
|
# TODO TypeCast - as casted type
|
|
279
|
-
column.type =
|
|
267
|
+
column.type = "str"
|
|
280
268
|
|
|
281
269
|
if alias is not None:
|
|
282
270
|
column.alias = alias
|
|
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
|
|
|
299
287
|
if step.result_data is not None:
|
|
300
288
|
# save results
|
|
301
289
|
|
|
302
|
-
if len(step.result_data[
|
|
303
|
-
table_info = step.result_data[
|
|
304
|
-
columns_info = step.result_data[
|
|
290
|
+
if len(step.result_data["tables"]) > 0:
|
|
291
|
+
table_info = step.result_data["tables"][0]
|
|
292
|
+
columns_info = step.result_data["columns"][table_info]
|
|
305
293
|
|
|
306
294
|
table.columns = []
|
|
307
295
|
table.ds = table_info[0]
|
|
308
296
|
for col in columns_info:
|
|
309
297
|
if isinstance(col, tuple):
|
|
310
298
|
# is predictor
|
|
311
|
-
col = dict(name=col[0], type=
|
|
299
|
+
col = dict(name=col[0], type="str")
|
|
312
300
|
table.columns.append(
|
|
313
301
|
Column(
|
|
314
|
-
name=col[
|
|
315
|
-
type=col[
|
|
302
|
+
name=col["name"],
|
|
303
|
+
type=col["type"],
|
|
316
304
|
)
|
|
317
305
|
)
|
|
318
306
|
|
|
319
307
|
# map by names
|
|
320
|
-
table.columns_map = {
|
|
321
|
-
i.name.upper(): i
|
|
322
|
-
for i in table.columns
|
|
323
|
-
}
|
|
308
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
324
309
|
|
|
325
310
|
# === create columns list ===
|
|
326
311
|
columns_result = []
|
|
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
|
|
|
329
314
|
# add data from all tables
|
|
330
315
|
for table in stmt.tables_lvl1:
|
|
331
316
|
if table.columns is None:
|
|
332
|
-
raise PlanningException(f
|
|
317
|
+
raise PlanningException(f"Table is not found {table.name}")
|
|
333
318
|
|
|
334
319
|
for col in table.columns:
|
|
335
320
|
# col = {name: 'col', type: 'str'}
|
|
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
|
|
|
354
339
|
column.type = table.columns_map[col_name].type
|
|
355
340
|
else:
|
|
356
341
|
# continue
|
|
357
|
-
raise PlanningException(f
|
|
342
|
+
raise PlanningException(f"Column not found {col_name}")
|
|
358
343
|
|
|
359
344
|
else:
|
|
360
345
|
# table is not found, looking for in all tables
|
|
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
|
|
|
368
353
|
|
|
369
354
|
# forcing alias
|
|
370
355
|
if column.alias is None:
|
|
371
|
-
column.alias = f
|
|
356
|
+
column.alias = f"column_{i}"
|
|
372
357
|
|
|
373
358
|
# forcing type
|
|
374
359
|
if column.type is None:
|
|
375
|
-
column.type =
|
|
360
|
+
column.type = "str"
|
|
376
361
|
|
|
377
362
|
columns_result.append(column)
|
|
378
363
|
|
|
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
|
|
|
393
378
|
if step.result_data is not None:
|
|
394
379
|
# save results
|
|
395
380
|
|
|
396
|
-
if len(step.result_data[
|
|
397
|
-
table_info = step.result_data[
|
|
398
|
-
columns_info = step.result_data[
|
|
381
|
+
if len(step.result_data["tables"]) > 0:
|
|
382
|
+
table_info = step.result_data["tables"][0]
|
|
383
|
+
columns_info = step.result_data["columns"][table_info]
|
|
399
384
|
|
|
400
385
|
table.columns = []
|
|
401
386
|
table.ds = table_info[0]
|
|
402
387
|
for col in columns_info:
|
|
403
388
|
if isinstance(col, tuple):
|
|
404
389
|
# is predictor
|
|
405
|
-
col = dict(name=col[0], type=
|
|
390
|
+
col = dict(name=col[0], type="str")
|
|
406
391
|
table.columns.append(
|
|
407
392
|
Column(
|
|
408
|
-
name=col[
|
|
409
|
-
type=col[
|
|
393
|
+
name=col["name"],
|
|
394
|
+
type=col["type"],
|
|
410
395
|
)
|
|
411
396
|
)
|
|
412
397
|
|
|
413
398
|
# map by names
|
|
414
|
-
table.columns_map = {
|
|
415
|
-
i.name.upper(): i
|
|
416
|
-
for i in table.columns
|
|
417
|
-
}
|
|
399
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
418
400
|
|
|
419
401
|
# save results
|
|
420
402
|
columns_result = []
|
|
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
|
|
|
430
412
|
|
|
431
413
|
if column.type is None:
|
|
432
414
|
# forcing type
|
|
433
|
-
column.type =
|
|
415
|
+
column.type = "str"
|
|
434
416
|
|
|
435
417
|
columns_result.append(column)
|
|
436
418
|
|
|
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
|
|
|
440
422
|
stmt = self.planner.statement
|
|
441
423
|
|
|
442
424
|
stmt.columns = [
|
|
443
|
-
Column(name=
|
|
444
|
-
Column(name=
|
|
425
|
+
Column(name="Variable_name", type="str"),
|
|
426
|
+
Column(name="Value", type="str"),
|
|
445
427
|
]
|
|
446
428
|
return []
|
|
447
429
|
|
|
448
430
|
def prepare_steps(self, query):
|
|
449
|
-
|
|
450
431
|
stmt = Statement()
|
|
451
432
|
self.planner.statement = stmt
|
|
452
433
|
|
|
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
|
|
|
476
457
|
if isinstance(query, ast.Show):
|
|
477
458
|
return self.prepare_show(query)
|
|
478
459
|
else:
|
|
479
|
-
|
|
480
460
|
# do nothing
|
|
481
461
|
return []
|
|
482
462
|
# raise NotImplementedError(query.__name__)
|
|
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
|
|
|
496
476
|
query = self.planner.query
|
|
497
477
|
|
|
498
478
|
if params is not None:
|
|
499
|
-
|
|
500
479
|
if len(params) != len(stmt.params):
|
|
501
480
|
raise PlanningException("Count of execution parameters don't match prepared statement")
|
|
502
481
|
|
|
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
|
|
|
508
487
|
stmt.params = None
|
|
509
488
|
|
|
510
489
|
if (
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
490
|
+
isinstance(query, ast.Select)
|
|
491
|
+
or isinstance(query, ast.Union)
|
|
492
|
+
or isinstance(query, ast.CreateTable)
|
|
493
|
+
or isinstance(query, ast.Insert)
|
|
494
|
+
or isinstance(query, ast.Update)
|
|
495
|
+
or isinstance(query, ast.Delete)
|
|
496
|
+
or isinstance(query, ast.Intersect)
|
|
497
|
+
or isinstance(query, ast.Except)
|
|
517
498
|
):
|
|
518
499
|
return self.plan_query(query)
|
|
519
500
|
else:
|
|
@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
|
|
|
6
6
|
Parameter,
|
|
7
7
|
BinaryOperation,
|
|
8
8
|
Tuple,
|
|
9
|
+
Union,
|
|
10
|
+
Intersect,
|
|
9
11
|
)
|
|
10
12
|
|
|
11
13
|
from mindsdb.api.executor.planner.steps import FetchDataframeStep
|
|
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
92
94
|
response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
|
|
93
95
|
df = response.data_frame
|
|
94
96
|
else:
|
|
95
|
-
|
|
97
|
+
if isinstance(step.query, (Union, Intersect)):
|
|
98
|
+
table_alias = ["", "", ""]
|
|
99
|
+
else:
|
|
100
|
+
table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
|
|
96
101
|
|
|
97
102
|
# TODO for information_schema we have 'database' = 'mindsdb'
|
|
98
103
|
|
|
@@ -9,7 +9,6 @@ from .base import BaseStepCall
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class UnionStepCall(BaseStepCall):
|
|
12
|
-
|
|
13
12
|
bind = UnionStep
|
|
14
13
|
|
|
15
14
|
def call(self, step):
|
|
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
|
|
|
19
18
|
# count of columns have to match
|
|
20
19
|
if len(left_result.columns) != len(right_result.columns):
|
|
21
20
|
raise WrongArgumentError(
|
|
22
|
-
f
|
|
21
|
+
f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
|
|
22
|
+
)
|
|
23
23
|
|
|
24
24
|
# types have to match
|
|
25
25
|
# TODO: return checking type later
|
|
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
|
|
|
33
33
|
table_a, names = left_result.to_df_cols()
|
|
34
34
|
table_b, _ = right_result.to_df_cols()
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
if step.operation.lower() == "intersect":
|
|
37
|
+
op = "INTERSECT"
|
|
38
|
+
else:
|
|
39
|
+
op = "UNION"
|
|
40
|
+
|
|
41
|
+
if step.unique is not True:
|
|
42
|
+
op += " ALL"
|
|
43
|
+
|
|
39
44
|
query = f"""
|
|
40
45
|
SELECT * FROM table_a
|
|
41
46
|
{op}
|
|
42
47
|
SELECT * FROM table_b
|
|
43
48
|
"""
|
|
44
49
|
|
|
45
|
-
resp_df, _description = query_df_with_type_infer_fallback(query, {
|
|
46
|
-
'table_a': table_a,
|
|
47
|
-
'table_b': table_b
|
|
48
|
-
})
|
|
50
|
+
resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
|
|
49
51
|
resp_df.replace({np.nan: None}, inplace=True)
|
|
50
52
|
|
|
51
53
|
return ResultSet.from_df_cols(df=resp_df, columns_dict=names)
|
|
@@ -3,6 +3,7 @@ import shutil
|
|
|
3
3
|
import tarfile
|
|
4
4
|
import tempfile
|
|
5
5
|
import zipfile
|
|
6
|
+
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
import multipart
|
|
8
9
|
import requests
|
|
@@ -13,7 +14,7 @@ from flask_restx import Resource
|
|
|
13
14
|
from mindsdb.api.http.namespaces.configs.files import ns_conf
|
|
14
15
|
from mindsdb.api.http.utils import http_error
|
|
15
16
|
from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
16
|
-
from mindsdb.utilities.config import
|
|
17
|
+
from mindsdb.utilities.config import config
|
|
17
18
|
from mindsdb.utilities.context import context as ctx
|
|
18
19
|
from mindsdb.utilities import log
|
|
19
20
|
from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
|
|
@@ -105,31 +106,55 @@ class File(Resource):
|
|
|
105
106
|
|
|
106
107
|
if data.get("source_type") == "url":
|
|
107
108
|
url = data["source"]
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
try:
|
|
110
|
+
url = urlparse(url)
|
|
111
|
+
if not (url.scheme and url.netloc):
|
|
112
|
+
raise ValueError()
|
|
113
|
+
url = url.geturl()
|
|
114
|
+
except Exception:
|
|
115
|
+
return http_error(
|
|
116
|
+
400,
|
|
117
|
+
"Invalid URL",
|
|
118
|
+
f"The URL is not valid: {data['source']}",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
url_file_upload_enabled = config["url_file_upload"]["enabled"]
|
|
122
|
+
if url_file_upload_enabled is False:
|
|
123
|
+
return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
|
|
124
|
+
|
|
125
|
+
allowed_origins = config["url_file_upload"]["allowed_origins"]
|
|
126
|
+
disallowed_origins = config["url_file_upload"]["disallowed_origins"]
|
|
127
|
+
|
|
128
|
+
if validate_urls(url, allowed_origins, disallowed_origins) is False:
|
|
129
|
+
return http_error(
|
|
130
|
+
400,
|
|
131
|
+
"Invalid URL",
|
|
132
|
+
"URL is not allowed for security reasons. Allowed hosts are: "
|
|
133
|
+
f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
|
|
134
|
+
)
|
|
135
|
+
|
|
112
136
|
data["file"] = clear_filename(data["name"])
|
|
113
137
|
is_cloud = config.get("cloud", False)
|
|
114
|
-
if is_cloud
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
138
|
+
if is_cloud:
|
|
139
|
+
if is_private_url(url):
|
|
140
|
+
return http_error(400, f"URL is private: {url}")
|
|
141
|
+
|
|
142
|
+
if ctx.user_class != 1:
|
|
143
|
+
info = requests.head(url, timeout=30)
|
|
144
|
+
file_size = info.headers.get("Content-Length")
|
|
145
|
+
try:
|
|
146
|
+
file_size = int(file_size)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
if file_size is None:
|
|
151
|
+
return http_error(
|
|
152
|
+
400,
|
|
153
|
+
"Error getting file info",
|
|
154
|
+
"Сan't determine remote file size",
|
|
155
|
+
)
|
|
156
|
+
if file_size > MAX_FILE_SIZE:
|
|
157
|
+
return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
|
|
133
158
|
with requests.get(url, stream=True) as r:
|
|
134
159
|
if r.status_code != 200:
|
|
135
160
|
return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
|