MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (46) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  6. mindsdb/api/executor/planner/query_prepare.py +68 -87
  7. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  8. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  9. mindsdb/api/http/namespaces/file.py +49 -24
  10. mindsdb/api/mcp/start.py +45 -31
  11. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  12. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  13. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  14. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  15. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  16. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  17. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  18. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  19. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  20. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  21. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  22. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  23. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  24. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  25. mindsdb/interfaces/agents/agents_controller.py +29 -9
  26. mindsdb/interfaces/agents/langchain_agent.py +7 -5
  27. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  28. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  29. mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
  30. mindsdb/interfaces/knowledge_base/controller.py +115 -89
  31. mindsdb/interfaces/knowledge_base/evaluate.py +16 -4
  32. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  33. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  34. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  35. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  36. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
  37. mindsdb/interfaces/skills/sql_agent.py +181 -130
  38. mindsdb/interfaces/storage/db.py +9 -7
  39. mindsdb/utilities/config.py +12 -1
  40. mindsdb/utilities/exception.py +47 -7
  41. mindsdb/utilities/security.py +54 -11
  42. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +248 -262
  43. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +46 -45
  44. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  45. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  46. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = "MindsDB"
2
2
  __package_name__ = "mindsdb"
3
- __version__ = "25.6.4.0"
3
+ __version__ = "25.7.1.0"
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = "MindsDB Inc"
@@ -36,6 +36,8 @@ from mindsdb_sql_parser.ast import (
36
36
  Tuple,
37
37
  Function,
38
38
  Variable,
39
+ Intersect,
40
+ Except,
39
41
  )
40
42
 
41
43
  # typed models
@@ -580,9 +582,6 @@ class ExecuteCommands:
580
582
  return ret
581
583
  query = SQLQuery(statement, session=self.session, database=database_name)
582
584
  return self.answer_select(query)
583
- elif statement_type is Union:
584
- query = SQLQuery(statement, session=self.session, database=database_name)
585
- return self.answer_select(query)
586
585
  elif statement_type is Explain:
587
586
  return self.answer_show_columns(statement.target, database_name=database_name)
588
587
  elif statement_type is CreateTable:
@@ -627,6 +626,9 @@ class ExecuteCommands:
627
626
  return self.answer_create_kb_index(statement, database_name)
628
627
  elif statement_type is EvaluateKnowledgeBase:
629
628
  return self.answer_evaluate_kb(statement, database_name)
629
+ elif statement_type in (Union, Intersect, Except):
630
+ query = SQLQuery(statement, session=self.session, database=database_name)
631
+ return self.answer_select(query)
630
632
  else:
631
633
  logger.warning(f"Unknown SQL statement: {sql}")
632
634
  raise NotSupportedYet(f"Unknown SQL statement: {sql}")
@@ -1554,9 +1556,9 @@ class ExecuteCommands:
1554
1556
  if is_full:
1555
1557
  targets.extend(
1556
1558
  [
1557
- Constant("COLLATION", alias=Identifier("Collation")),
1558
- Constant("PRIVILEGES", alias=Identifier("Privileges")),
1559
- Constant("COMMENT", alias=Identifier("Comment")),
1559
+ Constant(None, alias=Identifier("Collation")),
1560
+ Constant("select", alias=Identifier("Privileges")),
1561
+ Constant(None, alias=Identifier("Comment")),
1560
1562
  ]
1561
1563
  )
1562
1564
  new_statement = Select(
@@ -177,7 +177,7 @@ class InformationSchemaDataNode(DataNode):
177
177
  if table_name not in self.tables:
178
178
  raise exc.TableNotExistError(f"Table information_schema.{table_name} does not exists")
179
179
  table_columns_names = self.tables[table_name].columns
180
- df = pd.DataFrame([[table_columns_names]], columns=[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME])
180
+ df = pd.DataFrame(pd.Series(table_columns_names, name=INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME))
181
181
  for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
182
182
  if column_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME:
183
183
  continue
@@ -1,6 +1,5 @@
1
1
  import time
2
2
  import inspect
3
- from textwrap import dedent
4
3
  from dataclasses import astuple
5
4
  from typing import Iterable, List
6
5
 
@@ -21,6 +20,7 @@ from mindsdb.integrations.utilities.utils import get_class_name
21
20
  from mindsdb.metrics import metrics
22
21
  from mindsdb.utilities import log
23
22
  from mindsdb.utilities.profiler import profiler
23
+ from mindsdb.utilities.exception import format_db_error_message
24
24
  from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type
25
25
 
26
26
  logger = log.getLogger(__name__)
@@ -244,18 +244,16 @@ class IntegrationDataNode(DataNode):
244
244
  failed_sql_query = native_query
245
245
  if query is not None:
246
246
  failed_sql_query = query.to_string()
247
+
247
248
  raise Exception(
248
- dedent(f"""\
249
- Failed to execute external database query during query processing.
250
-
251
- Database Details:
252
- - Name: {self.integration_handler.name}
253
- - Type: {self.integration_handler.__class__.name}
254
-
255
- Error: {result.error_message}
256
- Failed Query: {failed_sql_query}
257
- """)
249
+ format_db_error_message(
250
+ db_name=self.integration_handler.name,
251
+ db_type=self.integration_handler.__class__.name,
252
+ db_error_msg=result.error_message,
253
+ failed_query=failed_sql_query,
254
+ )
258
255
  )
256
+
259
257
  if result.type == RESPONSE_TYPE.OK:
260
258
  return DataHubResponse(affected_rows=result.affected_rows)
261
259
 
@@ -623,7 +623,7 @@ class MetaColumnStatisticsTable(Table):
623
623
  columns = record.meta_columns
624
624
 
625
625
  for column in columns:
626
- column_statistics = column.meta_column_statistics[0]
626
+ column_statistics = column.meta_column_statistics[0] if column.meta_column_statistics else None
627
627
 
628
628
  item = {
629
629
  "TABLE_SCHEMA": database_name,
@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
8
8
 
9
9
  def to_string(identifier):
10
10
  # alternative to AST.to_string() but without quoting
11
- return '.'.join(identifier.parts)
11
+ return ".".join(identifier.parts)
12
12
 
13
13
 
14
14
  class Table:
@@ -32,7 +32,6 @@ class Column:
32
32
  def __init__(self, node=None, table=None, name=None, type=None):
33
33
  alias = None
34
34
  if node is not None:
35
-
36
35
  if isinstance(node, ast.Identifier):
37
36
  # set name
38
37
  name = node.parts[-1] # ???
@@ -67,26 +66,25 @@ class Statement:
67
66
  self.offset = 0
68
67
 
69
68
 
70
- class PreparedStatementPlanner():
71
-
69
+ class PreparedStatementPlanner:
72
70
  def __init__(self, planner):
73
71
  self.planner = planner
74
72
 
75
73
  def get_type_of_var(self, v):
76
74
  if isinstance(v, str):
77
- return 'str'
75
+ return "str"
78
76
  elif isinstance(v, float):
79
- return 'float'
77
+ return "float"
80
78
  elif isinstance(v, int):
81
- return 'integer'
79
+ return "integer"
82
80
 
83
- return 'str'
81
+ return "str"
84
82
 
85
83
  def get_statement_info(self):
86
84
  stmt = self.planner.statement
87
85
 
88
86
  if stmt is None:
89
- raise PlanningException('Statement is not prepared')
87
+ raise PlanningException("Statement is not prepared")
90
88
 
91
89
  columns_result = []
92
90
 
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
95
93
  if column.table is not None:
96
94
  table = column.table.name
97
95
  ds = column.table.ds
98
- columns_result.append(dict(
99
- alias=column.alias,
100
- type=column.type,
101
- name=column.name,
102
- table_name=table,
103
- table_alias=table,
104
- ds=ds,
105
- ))
96
+ columns_result.append(
97
+ dict(
98
+ alias=column.alias,
99
+ type=column.type,
100
+ name=column.name,
101
+ table_name=table,
102
+ table_alias=table,
103
+ ds=ds,
104
+ )
105
+ )
106
106
 
107
107
  parameters = []
108
108
  for param in stmt.params:
109
- name = '?'
110
- parameters.append(dict(
111
- alias=name,
112
- type='str',
113
- name=name,
114
- ))
115
-
116
- return {
117
- 'parameters': parameters,
118
- 'columns': columns_result
119
- }
109
+ name = "?"
110
+ parameters.append(
111
+ dict(
112
+ alias=name,
113
+ type="str",
114
+ name=name,
115
+ )
116
+ )
120
117
 
121
- def get_table_of_column(self, t):
118
+ return {"parameters": parameters, "columns": columns_result}
122
119
 
120
+ def get_table_of_column(self, t):
123
121
  tables_map = self.planner.statement.tables_map
124
122
 
125
123
  # get tables to check
126
124
  if len(t.parts) > 1:
127
125
  # try to find table
128
126
  table_parts = t.parts[:-1]
129
- table_name = '.'.join(table_parts)
127
+ table_name = ".".join(table_parts)
130
128
  if table_name in tables_map:
131
129
  return tables_map[table_name]
132
130
 
133
131
  elif len(table_parts) > 1:
134
132
  # maybe datasource is 1st part
135
133
  table_parts = table_parts[1:]
136
- table_name = '.'.join(table_parts)
134
+ table_name = ".".join(table_parts)
137
135
  if table_name in tables_map:
138
136
  return tables_map[table_name]
139
137
 
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
158
156
  # in reverse order
159
157
  for p in table.parts[::-1]:
160
158
  parts.insert(0, p)
161
- keys.append('.'.join(parts))
159
+ keys.append(".".join(parts))
162
160
 
163
161
  # remember table
164
- tbl = Table(
165
- ds=ds,
166
- node=table,
167
- is_predictor=is_predictor
168
- )
162
+ tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
169
163
  tbl.keys = keys
170
164
 
171
165
  return tbl
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
189
183
  stmt.tables_map = {}
190
184
  stmt.tables_lvl1 = []
191
185
  if query.from_table is not None:
192
-
193
186
  if isinstance(query.from_table, ast.Join):
194
187
  # get all tables
195
188
  join_tables = utils.convert_join_to_list(query.from_table)
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
198
191
 
199
192
  if isinstance(query.from_table, ast.Select):
200
193
  # nested select, get only last select
201
- join_tables = [
202
- dict(
203
- table=utils.get_deepest_select(query.from_table).from_table
204
- )
205
- ]
194
+ join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
206
195
 
207
196
  for i, join_table in enumerate(join_tables):
208
- table = join_table['table']
197
+ table = join_table["table"]
209
198
  if isinstance(table, ast.Identifier):
210
199
  tbl = self.table_from_identifier(table)
211
200
 
212
201
  if tbl.is_predictor:
213
202
  # Is the last table?
214
203
  if i + 1 < len(join_tables):
215
- raise PlanningException('Predictor must be last table in query')
204
+ raise PlanningException("Predictor must be last table in query")
216
205
 
217
206
  stmt.tables_lvl1.append(tbl)
218
207
  for key in tbl.keys:
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
225
214
  # is there any predictors at other levels?
226
215
  lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
227
216
  if len(query_predictors) != len(lvl1_predictors):
228
- raise PlanningException('Predictor is not at first level')
217
+ raise PlanningException("Predictor is not at first level")
229
218
 
230
219
  # === get targets ===
231
220
  columns = []
232
221
  get_all_tables = False
233
222
  for t in query.targets:
234
-
235
223
  column = Column(t)
236
224
 
237
225
  # column alias
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
264
252
  column.type = self.get_type_of_var(t.value)
265
253
  elif isinstance(t, ast.Function):
266
254
  # mysql function
267
- if t.op == 'connection_id':
268
- column.type = 'integer'
255
+ if t.op == "connection_id":
256
+ column.type = "integer"
269
257
  else:
270
- column.type = 'str'
258
+ column.type = "str"
271
259
  else:
272
260
  # TODO go down into lower level.
273
261
  # It can be function, operation, select.
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
276
264
  # TODO add several known types for function, i.e ABS-int
277
265
 
278
266
  # TODO TypeCast - as casted type
279
- column.type = 'str'
267
+ column.type = "str"
280
268
 
281
269
  if alias is not None:
282
270
  column.alias = alias
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
299
287
  if step.result_data is not None:
300
288
  # save results
301
289
 
302
- if len(step.result_data['tables']) > 0:
303
- table_info = step.result_data['tables'][0]
304
- columns_info = step.result_data['columns'][table_info]
290
+ if len(step.result_data["tables"]) > 0:
291
+ table_info = step.result_data["tables"][0]
292
+ columns_info = step.result_data["columns"][table_info]
305
293
 
306
294
  table.columns = []
307
295
  table.ds = table_info[0]
308
296
  for col in columns_info:
309
297
  if isinstance(col, tuple):
310
298
  # is predictor
311
- col = dict(name=col[0], type='str')
299
+ col = dict(name=col[0], type="str")
312
300
  table.columns.append(
313
301
  Column(
314
- name=col['name'],
315
- type=col['type'],
302
+ name=col["name"],
303
+ type=col["type"],
316
304
  )
317
305
  )
318
306
 
319
307
  # map by names
320
- table.columns_map = {
321
- i.name.upper(): i
322
- for i in table.columns
323
- }
308
+ table.columns_map = {i.name.upper(): i for i in table.columns}
324
309
 
325
310
  # === create columns list ===
326
311
  columns_result = []
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
329
314
  # add data from all tables
330
315
  for table in stmt.tables_lvl1:
331
316
  if table.columns is None:
332
- raise PlanningException(f'Table is not found {table.name}')
317
+ raise PlanningException(f"Table is not found {table.name}")
333
318
 
334
319
  for col in table.columns:
335
320
  # col = {name: 'col', type: 'str'}
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
354
339
  column.type = table.columns_map[col_name].type
355
340
  else:
356
341
  # continue
357
- raise PlanningException(f'Column not found {col_name}')
342
+ raise PlanningException(f"Column not found {col_name}")
358
343
 
359
344
  else:
360
345
  # table is not found, looking for in all tables
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
368
353
 
369
354
  # forcing alias
370
355
  if column.alias is None:
371
- column.alias = f'column_{i}'
356
+ column.alias = f"column_{i}"
372
357
 
373
358
  # forcing type
374
359
  if column.type is None:
375
- column.type = 'str'
360
+ column.type = "str"
376
361
 
377
362
  columns_result.append(column)
378
363
 
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
393
378
  if step.result_data is not None:
394
379
  # save results
395
380
 
396
- if len(step.result_data['tables']) > 0:
397
- table_info = step.result_data['tables'][0]
398
- columns_info = step.result_data['columns'][table_info]
381
+ if len(step.result_data["tables"]) > 0:
382
+ table_info = step.result_data["tables"][0]
383
+ columns_info = step.result_data["columns"][table_info]
399
384
 
400
385
  table.columns = []
401
386
  table.ds = table_info[0]
402
387
  for col in columns_info:
403
388
  if isinstance(col, tuple):
404
389
  # is predictor
405
- col = dict(name=col[0], type='str')
390
+ col = dict(name=col[0], type="str")
406
391
  table.columns.append(
407
392
  Column(
408
- name=col['name'],
409
- type=col['type'],
393
+ name=col["name"],
394
+ type=col["type"],
410
395
  )
411
396
  )
412
397
 
413
398
  # map by names
414
- table.columns_map = {
415
- i.name.upper(): i
416
- for i in table.columns
417
- }
399
+ table.columns_map = {i.name.upper(): i for i in table.columns}
418
400
 
419
401
  # save results
420
402
  columns_result = []
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
430
412
 
431
413
  if column.type is None:
432
414
  # forcing type
433
- column.type = 'str'
415
+ column.type = "str"
434
416
 
435
417
  columns_result.append(column)
436
418
 
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
440
422
  stmt = self.planner.statement
441
423
 
442
424
  stmt.columns = [
443
- Column(name='Variable_name', type='str'),
444
- Column(name='Value', type='str'),
425
+ Column(name="Variable_name", type="str"),
426
+ Column(name="Value", type="str"),
445
427
  ]
446
428
  return []
447
429
 
448
430
  def prepare_steps(self, query):
449
-
450
431
  stmt = Statement()
451
432
  self.planner.statement = stmt
452
433
 
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
476
457
  if isinstance(query, ast.Show):
477
458
  return self.prepare_show(query)
478
459
  else:
479
-
480
460
  # do nothing
481
461
  return []
482
462
  # raise NotImplementedError(query.__name__)
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
496
476
  query = self.planner.query
497
477
 
498
478
  if params is not None:
499
-
500
479
  if len(params) != len(stmt.params):
501
480
  raise PlanningException("Count of execution parameters don't match prepared statement")
502
481
 
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
508
487
  stmt.params = None
509
488
 
510
489
  if (
511
- isinstance(query, ast.Select)
512
- or isinstance(query, ast.Union)
513
- or isinstance(query, ast.CreateTable)
514
- or isinstance(query, ast.Insert)
515
- or isinstance(query, ast.Update)
516
- or isinstance(query, ast.Delete)
490
+ isinstance(query, ast.Select)
491
+ or isinstance(query, ast.Union)
492
+ or isinstance(query, ast.CreateTable)
493
+ or isinstance(query, ast.Insert)
494
+ or isinstance(query, ast.Update)
495
+ or isinstance(query, ast.Delete)
496
+ or isinstance(query, ast.Intersect)
497
+ or isinstance(query, ast.Except)
517
498
  ):
518
499
  return self.plan_query(query)
519
500
  else:
@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
6
6
  Parameter,
7
7
  BinaryOperation,
8
8
  Tuple,
9
+ Union,
10
+ Intersect,
9
11
  )
10
12
 
11
13
  from mindsdb.api.executor.planner.steps import FetchDataframeStep
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
92
94
  response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
93
95
  df = response.data_frame
94
96
  else:
95
- table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
97
+ if isinstance(step.query, (Union, Intersect)):
98
+ table_alias = ["", "", ""]
99
+ else:
100
+ table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
96
101
 
97
102
  # TODO for information_schema we have 'database' = 'mindsdb'
98
103
 
@@ -9,7 +9,6 @@ from .base import BaseStepCall
9
9
 
10
10
 
11
11
  class UnionStepCall(BaseStepCall):
12
-
13
12
  bind = UnionStep
14
13
 
15
14
  def call(self, step):
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
19
18
  # count of columns have to match
20
19
  if len(left_result.columns) != len(right_result.columns):
21
20
  raise WrongArgumentError(
22
- f'UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} ')
21
+ f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
22
+ )
23
23
 
24
24
  # types have to match
25
25
  # TODO: return checking type later
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
33
33
  table_a, names = left_result.to_df_cols()
34
34
  table_b, _ = right_result.to_df_cols()
35
35
 
36
- op = 'UNION ALL'
37
- if step.unique:
38
- op = 'UNION'
36
+ if step.operation.lower() == "intersect":
37
+ op = "INTERSECT"
38
+ else:
39
+ op = "UNION"
40
+
41
+ if step.unique is not True:
42
+ op += " ALL"
43
+
39
44
  query = f"""
40
45
  SELECT * FROM table_a
41
46
  {op}
42
47
  SELECT * FROM table_b
43
48
  """
44
49
 
45
- resp_df, _description = query_df_with_type_infer_fallback(query, {
46
- 'table_a': table_a,
47
- 'table_b': table_b
48
- })
50
+ resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
49
51
  resp_df.replace({np.nan: None}, inplace=True)
50
52
 
51
53
  return ResultSet.from_df_cols(df=resp_df, columns_dict=names)
@@ -3,6 +3,7 @@ import shutil
3
3
  import tarfile
4
4
  import tempfile
5
5
  import zipfile
6
+ from urllib.parse import urlparse
6
7
 
7
8
  import multipart
8
9
  import requests
@@ -13,7 +14,7 @@ from flask_restx import Resource
13
14
  from mindsdb.api.http.namespaces.configs.files import ns_conf
14
15
  from mindsdb.api.http.utils import http_error
15
16
  from mindsdb.metrics.metrics import api_endpoint_metrics
16
- from mindsdb.utilities.config import Config
17
+ from mindsdb.utilities.config import config
17
18
  from mindsdb.utilities.context import context as ctx
18
19
  from mindsdb.utilities import log
19
20
  from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
@@ -105,31 +106,55 @@ class File(Resource):
105
106
 
106
107
  if data.get("source_type") == "url":
107
108
  url = data["source"]
108
- config = Config()
109
- allowed_urls = config.get("file_upload_domains", [])
110
- if allowed_urls and not validate_urls(url, allowed_urls):
111
- return http_error(400, "Invalid File URL source.", f"Allowed hosts are: {', '.join(allowed_urls)}.")
109
+ try:
110
+ url = urlparse(url)
111
+ if not (url.scheme and url.netloc):
112
+ raise ValueError()
113
+ url = url.geturl()
114
+ except Exception:
115
+ return http_error(
116
+ 400,
117
+ "Invalid URL",
118
+ f"The URL is not valid: {data['source']}",
119
+ )
120
+
121
+ url_file_upload_enabled = config["url_file_upload"]["enabled"]
122
+ if url_file_upload_enabled is False:
123
+ return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
124
+
125
+ allowed_origins = config["url_file_upload"]["allowed_origins"]
126
+ disallowed_origins = config["url_file_upload"]["disallowed_origins"]
127
+
128
+ if validate_urls(url, allowed_origins, disallowed_origins) is False:
129
+ return http_error(
130
+ 400,
131
+ "Invalid URL",
132
+ "URL is not allowed for security reasons. Allowed hosts are: "
133
+ f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
134
+ )
135
+
112
136
  data["file"] = clear_filename(data["name"])
113
137
  is_cloud = config.get("cloud", False)
114
- if is_cloud and is_private_url(url):
115
- return http_error(400, f"URL is private: {url}")
116
-
117
- if is_cloud is True and ctx.user_class != 1:
118
- info = requests.head(url)
119
- file_size = info.headers.get("Content-Length")
120
- try:
121
- file_size = int(file_size)
122
- except Exception:
123
- pass
124
-
125
- if file_size is None:
126
- return http_error(
127
- 400,
128
- "Error getting file info",
129
- "Сan't determine remote file size",
130
- )
131
- if file_size > MAX_FILE_SIZE:
132
- return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
138
+ if is_cloud:
139
+ if is_private_url(url):
140
+ return http_error(400, f"URL is private: {url}")
141
+
142
+ if ctx.user_class != 1:
143
+ info = requests.head(url, timeout=30)
144
+ file_size = info.headers.get("Content-Length")
145
+ try:
146
+ file_size = int(file_size)
147
+ except Exception:
148
+ pass
149
+
150
+ if file_size is None:
151
+ return http_error(
152
+ 400,
153
+ "Error getting file info",
154
+ "Сan't determine remote file size",
155
+ )
156
+ if file_size > MAX_FILE_SIZE:
157
+ return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
133
158
  with requests.get(url, stream=True) as r:
134
159
  if r.status_code != 200:
135
160
  return http_error(400, "Error getting file", f"Got status code: {r.status_code}")