MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
- mindsdb/api/executor/planner/plan_join.py +1 -1
- mindsdb/api/executor/planner/query_planner.py +7 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/api_handler.py +6 -7
- mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/constants.py +44 -0
- mindsdb/interfaces/agents/langchain_agent.py +15 -6
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
- mindsdb/interfaces/knowledge_base/controller.py +121 -102
- mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
- mindsdb/interfaces/skills/skill_tool.py +91 -88
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +12 -1
- mindsdb/utilities/exception.py +47 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
|
|
|
8
8
|
|
|
9
9
|
def to_string(identifier):
|
|
10
10
|
# alternative to AST.to_string() but without quoting
|
|
11
|
-
return
|
|
11
|
+
return ".".join(identifier.parts)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Table:
|
|
@@ -32,7 +32,6 @@ class Column:
|
|
|
32
32
|
def __init__(self, node=None, table=None, name=None, type=None):
|
|
33
33
|
alias = None
|
|
34
34
|
if node is not None:
|
|
35
|
-
|
|
36
35
|
if isinstance(node, ast.Identifier):
|
|
37
36
|
# set name
|
|
38
37
|
name = node.parts[-1] # ???
|
|
@@ -67,26 +66,25 @@ class Statement:
|
|
|
67
66
|
self.offset = 0
|
|
68
67
|
|
|
69
68
|
|
|
70
|
-
class PreparedStatementPlanner
|
|
71
|
-
|
|
69
|
+
class PreparedStatementPlanner:
|
|
72
70
|
def __init__(self, planner):
|
|
73
71
|
self.planner = planner
|
|
74
72
|
|
|
75
73
|
def get_type_of_var(self, v):
|
|
76
74
|
if isinstance(v, str):
|
|
77
|
-
return
|
|
75
|
+
return "str"
|
|
78
76
|
elif isinstance(v, float):
|
|
79
|
-
return
|
|
77
|
+
return "float"
|
|
80
78
|
elif isinstance(v, int):
|
|
81
|
-
return
|
|
79
|
+
return "integer"
|
|
82
80
|
|
|
83
|
-
return
|
|
81
|
+
return "str"
|
|
84
82
|
|
|
85
83
|
def get_statement_info(self):
|
|
86
84
|
stmt = self.planner.statement
|
|
87
85
|
|
|
88
86
|
if stmt is None:
|
|
89
|
-
raise PlanningException(
|
|
87
|
+
raise PlanningException("Statement is not prepared")
|
|
90
88
|
|
|
91
89
|
columns_result = []
|
|
92
90
|
|
|
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
|
|
|
95
93
|
if column.table is not None:
|
|
96
94
|
table = column.table.name
|
|
97
95
|
ds = column.table.ds
|
|
98
|
-
columns_result.append(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
96
|
+
columns_result.append(
|
|
97
|
+
dict(
|
|
98
|
+
alias=column.alias,
|
|
99
|
+
type=column.type,
|
|
100
|
+
name=column.name,
|
|
101
|
+
table_name=table,
|
|
102
|
+
table_alias=table,
|
|
103
|
+
ds=ds,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
106
|
|
|
107
107
|
parameters = []
|
|
108
108
|
for param in stmt.params:
|
|
109
|
-
name =
|
|
110
|
-
parameters.append(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
'parameters': parameters,
|
|
118
|
-
'columns': columns_result
|
|
119
|
-
}
|
|
109
|
+
name = "?"
|
|
110
|
+
parameters.append(
|
|
111
|
+
dict(
|
|
112
|
+
alias=name,
|
|
113
|
+
type="str",
|
|
114
|
+
name=name,
|
|
115
|
+
)
|
|
116
|
+
)
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
return {"parameters": parameters, "columns": columns_result}
|
|
122
119
|
|
|
120
|
+
def get_table_of_column(self, t):
|
|
123
121
|
tables_map = self.planner.statement.tables_map
|
|
124
122
|
|
|
125
123
|
# get tables to check
|
|
126
124
|
if len(t.parts) > 1:
|
|
127
125
|
# try to find table
|
|
128
126
|
table_parts = t.parts[:-1]
|
|
129
|
-
table_name =
|
|
127
|
+
table_name = ".".join(table_parts)
|
|
130
128
|
if table_name in tables_map:
|
|
131
129
|
return tables_map[table_name]
|
|
132
130
|
|
|
133
131
|
elif len(table_parts) > 1:
|
|
134
132
|
# maybe datasource is 1st part
|
|
135
133
|
table_parts = table_parts[1:]
|
|
136
|
-
table_name =
|
|
134
|
+
table_name = ".".join(table_parts)
|
|
137
135
|
if table_name in tables_map:
|
|
138
136
|
return tables_map[table_name]
|
|
139
137
|
|
|
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
|
|
|
158
156
|
# in reverse order
|
|
159
157
|
for p in table.parts[::-1]:
|
|
160
158
|
parts.insert(0, p)
|
|
161
|
-
keys.append(
|
|
159
|
+
keys.append(".".join(parts))
|
|
162
160
|
|
|
163
161
|
# remember table
|
|
164
|
-
tbl = Table(
|
|
165
|
-
ds=ds,
|
|
166
|
-
node=table,
|
|
167
|
-
is_predictor=is_predictor
|
|
168
|
-
)
|
|
162
|
+
tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
|
|
169
163
|
tbl.keys = keys
|
|
170
164
|
|
|
171
165
|
return tbl
|
|
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
|
|
|
189
183
|
stmt.tables_map = {}
|
|
190
184
|
stmt.tables_lvl1 = []
|
|
191
185
|
if query.from_table is not None:
|
|
192
|
-
|
|
193
186
|
if isinstance(query.from_table, ast.Join):
|
|
194
187
|
# get all tables
|
|
195
188
|
join_tables = utils.convert_join_to_list(query.from_table)
|
|
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
|
|
|
198
191
|
|
|
199
192
|
if isinstance(query.from_table, ast.Select):
|
|
200
193
|
# nested select, get only last select
|
|
201
|
-
join_tables = [
|
|
202
|
-
dict(
|
|
203
|
-
table=utils.get_deepest_select(query.from_table).from_table
|
|
204
|
-
)
|
|
205
|
-
]
|
|
194
|
+
join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
|
|
206
195
|
|
|
207
196
|
for i, join_table in enumerate(join_tables):
|
|
208
|
-
table = join_table[
|
|
197
|
+
table = join_table["table"]
|
|
209
198
|
if isinstance(table, ast.Identifier):
|
|
210
199
|
tbl = self.table_from_identifier(table)
|
|
211
200
|
|
|
212
201
|
if tbl.is_predictor:
|
|
213
202
|
# Is the last table?
|
|
214
203
|
if i + 1 < len(join_tables):
|
|
215
|
-
raise PlanningException(
|
|
204
|
+
raise PlanningException("Predictor must be last table in query")
|
|
216
205
|
|
|
217
206
|
stmt.tables_lvl1.append(tbl)
|
|
218
207
|
for key in tbl.keys:
|
|
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
|
|
|
225
214
|
# is there any predictors at other levels?
|
|
226
215
|
lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
|
|
227
216
|
if len(query_predictors) != len(lvl1_predictors):
|
|
228
|
-
raise PlanningException(
|
|
217
|
+
raise PlanningException("Predictor is not at first level")
|
|
229
218
|
|
|
230
219
|
# === get targets ===
|
|
231
220
|
columns = []
|
|
232
221
|
get_all_tables = False
|
|
233
222
|
for t in query.targets:
|
|
234
|
-
|
|
235
223
|
column = Column(t)
|
|
236
224
|
|
|
237
225
|
# column alias
|
|
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
|
|
|
264
252
|
column.type = self.get_type_of_var(t.value)
|
|
265
253
|
elif isinstance(t, ast.Function):
|
|
266
254
|
# mysql function
|
|
267
|
-
if t.op ==
|
|
268
|
-
column.type =
|
|
255
|
+
if t.op == "connection_id":
|
|
256
|
+
column.type = "integer"
|
|
269
257
|
else:
|
|
270
|
-
column.type =
|
|
258
|
+
column.type = "str"
|
|
271
259
|
else:
|
|
272
260
|
# TODO go down into lower level.
|
|
273
261
|
# It can be function, operation, select.
|
|
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
|
|
|
276
264
|
# TODO add several known types for function, i.e ABS-int
|
|
277
265
|
|
|
278
266
|
# TODO TypeCast - as casted type
|
|
279
|
-
column.type =
|
|
267
|
+
column.type = "str"
|
|
280
268
|
|
|
281
269
|
if alias is not None:
|
|
282
270
|
column.alias = alias
|
|
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
|
|
|
299
287
|
if step.result_data is not None:
|
|
300
288
|
# save results
|
|
301
289
|
|
|
302
|
-
if len(step.result_data[
|
|
303
|
-
table_info = step.result_data[
|
|
304
|
-
columns_info = step.result_data[
|
|
290
|
+
if len(step.result_data["tables"]) > 0:
|
|
291
|
+
table_info = step.result_data["tables"][0]
|
|
292
|
+
columns_info = step.result_data["columns"][table_info]
|
|
305
293
|
|
|
306
294
|
table.columns = []
|
|
307
295
|
table.ds = table_info[0]
|
|
308
296
|
for col in columns_info:
|
|
309
297
|
if isinstance(col, tuple):
|
|
310
298
|
# is predictor
|
|
311
|
-
col = dict(name=col[0], type=
|
|
299
|
+
col = dict(name=col[0], type="str")
|
|
312
300
|
table.columns.append(
|
|
313
301
|
Column(
|
|
314
|
-
name=col[
|
|
315
|
-
type=col[
|
|
302
|
+
name=col["name"],
|
|
303
|
+
type=col["type"],
|
|
316
304
|
)
|
|
317
305
|
)
|
|
318
306
|
|
|
319
307
|
# map by names
|
|
320
|
-
table.columns_map = {
|
|
321
|
-
i.name.upper(): i
|
|
322
|
-
for i in table.columns
|
|
323
|
-
}
|
|
308
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
324
309
|
|
|
325
310
|
# === create columns list ===
|
|
326
311
|
columns_result = []
|
|
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
|
|
|
329
314
|
# add data from all tables
|
|
330
315
|
for table in stmt.tables_lvl1:
|
|
331
316
|
if table.columns is None:
|
|
332
|
-
raise PlanningException(f
|
|
317
|
+
raise PlanningException(f"Table is not found {table.name}")
|
|
333
318
|
|
|
334
319
|
for col in table.columns:
|
|
335
320
|
# col = {name: 'col', type: 'str'}
|
|
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
|
|
|
354
339
|
column.type = table.columns_map[col_name].type
|
|
355
340
|
else:
|
|
356
341
|
# continue
|
|
357
|
-
raise PlanningException(f
|
|
342
|
+
raise PlanningException(f"Column not found {col_name}")
|
|
358
343
|
|
|
359
344
|
else:
|
|
360
345
|
# table is not found, looking for in all tables
|
|
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
|
|
|
368
353
|
|
|
369
354
|
# forcing alias
|
|
370
355
|
if column.alias is None:
|
|
371
|
-
column.alias = f
|
|
356
|
+
column.alias = f"column_{i}"
|
|
372
357
|
|
|
373
358
|
# forcing type
|
|
374
359
|
if column.type is None:
|
|
375
|
-
column.type =
|
|
360
|
+
column.type = "str"
|
|
376
361
|
|
|
377
362
|
columns_result.append(column)
|
|
378
363
|
|
|
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
|
|
|
393
378
|
if step.result_data is not None:
|
|
394
379
|
# save results
|
|
395
380
|
|
|
396
|
-
if len(step.result_data[
|
|
397
|
-
table_info = step.result_data[
|
|
398
|
-
columns_info = step.result_data[
|
|
381
|
+
if len(step.result_data["tables"]) > 0:
|
|
382
|
+
table_info = step.result_data["tables"][0]
|
|
383
|
+
columns_info = step.result_data["columns"][table_info]
|
|
399
384
|
|
|
400
385
|
table.columns = []
|
|
401
386
|
table.ds = table_info[0]
|
|
402
387
|
for col in columns_info:
|
|
403
388
|
if isinstance(col, tuple):
|
|
404
389
|
# is predictor
|
|
405
|
-
col = dict(name=col[0], type=
|
|
390
|
+
col = dict(name=col[0], type="str")
|
|
406
391
|
table.columns.append(
|
|
407
392
|
Column(
|
|
408
|
-
name=col[
|
|
409
|
-
type=col[
|
|
393
|
+
name=col["name"],
|
|
394
|
+
type=col["type"],
|
|
410
395
|
)
|
|
411
396
|
)
|
|
412
397
|
|
|
413
398
|
# map by names
|
|
414
|
-
table.columns_map = {
|
|
415
|
-
i.name.upper(): i
|
|
416
|
-
for i in table.columns
|
|
417
|
-
}
|
|
399
|
+
table.columns_map = {i.name.upper(): i for i in table.columns}
|
|
418
400
|
|
|
419
401
|
# save results
|
|
420
402
|
columns_result = []
|
|
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
|
|
|
430
412
|
|
|
431
413
|
if column.type is None:
|
|
432
414
|
# forcing type
|
|
433
|
-
column.type =
|
|
415
|
+
column.type = "str"
|
|
434
416
|
|
|
435
417
|
columns_result.append(column)
|
|
436
418
|
|
|
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
|
|
|
440
422
|
stmt = self.planner.statement
|
|
441
423
|
|
|
442
424
|
stmt.columns = [
|
|
443
|
-
Column(name=
|
|
444
|
-
Column(name=
|
|
425
|
+
Column(name="Variable_name", type="str"),
|
|
426
|
+
Column(name="Value", type="str"),
|
|
445
427
|
]
|
|
446
428
|
return []
|
|
447
429
|
|
|
448
430
|
def prepare_steps(self, query):
|
|
449
|
-
|
|
450
431
|
stmt = Statement()
|
|
451
432
|
self.planner.statement = stmt
|
|
452
433
|
|
|
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
|
|
|
476
457
|
if isinstance(query, ast.Show):
|
|
477
458
|
return self.prepare_show(query)
|
|
478
459
|
else:
|
|
479
|
-
|
|
480
460
|
# do nothing
|
|
481
461
|
return []
|
|
482
462
|
# raise NotImplementedError(query.__name__)
|
|
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
|
|
|
496
476
|
query = self.planner.query
|
|
497
477
|
|
|
498
478
|
if params is not None:
|
|
499
|
-
|
|
500
479
|
if len(params) != len(stmt.params):
|
|
501
480
|
raise PlanningException("Count of execution parameters don't match prepared statement")
|
|
502
481
|
|
|
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
|
|
|
508
487
|
stmt.params = None
|
|
509
488
|
|
|
510
489
|
if (
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
490
|
+
isinstance(query, ast.Select)
|
|
491
|
+
or isinstance(query, ast.Union)
|
|
492
|
+
or isinstance(query, ast.CreateTable)
|
|
493
|
+
or isinstance(query, ast.Insert)
|
|
494
|
+
or isinstance(query, ast.Update)
|
|
495
|
+
or isinstance(query, ast.Delete)
|
|
496
|
+
or isinstance(query, ast.Intersect)
|
|
497
|
+
or isinstance(query, ast.Except)
|
|
517
498
|
):
|
|
518
499
|
return self.plan_query(query)
|
|
519
500
|
else:
|
|
@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
|
|
|
6
6
|
Parameter,
|
|
7
7
|
BinaryOperation,
|
|
8
8
|
Tuple,
|
|
9
|
+
Union,
|
|
10
|
+
Intersect,
|
|
9
11
|
)
|
|
10
12
|
|
|
11
13
|
from mindsdb.api.executor.planner.steps import FetchDataframeStep
|
|
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
|
|
|
92
94
|
response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
|
|
93
95
|
df = response.data_frame
|
|
94
96
|
else:
|
|
95
|
-
|
|
97
|
+
if isinstance(step.query, (Union, Intersect)):
|
|
98
|
+
table_alias = ["", "", ""]
|
|
99
|
+
else:
|
|
100
|
+
table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
|
|
96
101
|
|
|
97
102
|
# TODO for information_schema we have 'database' = 'mindsdb'
|
|
98
103
|
|
|
@@ -9,7 +9,6 @@ from .base import BaseStepCall
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class UnionStepCall(BaseStepCall):
|
|
12
|
-
|
|
13
12
|
bind = UnionStep
|
|
14
13
|
|
|
15
14
|
def call(self, step):
|
|
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
|
|
|
19
18
|
# count of columns have to match
|
|
20
19
|
if len(left_result.columns) != len(right_result.columns):
|
|
21
20
|
raise WrongArgumentError(
|
|
22
|
-
f
|
|
21
|
+
f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
|
|
22
|
+
)
|
|
23
23
|
|
|
24
24
|
# types have to match
|
|
25
25
|
# TODO: return checking type later
|
|
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
|
|
|
33
33
|
table_a, names = left_result.to_df_cols()
|
|
34
34
|
table_b, _ = right_result.to_df_cols()
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
if step.operation.lower() == "intersect":
|
|
37
|
+
op = "INTERSECT"
|
|
38
|
+
else:
|
|
39
|
+
op = "UNION"
|
|
40
|
+
|
|
41
|
+
if step.unique is not True:
|
|
42
|
+
op += " ALL"
|
|
43
|
+
|
|
39
44
|
query = f"""
|
|
40
45
|
SELECT * FROM table_a
|
|
41
46
|
{op}
|
|
42
47
|
SELECT * FROM table_b
|
|
43
48
|
"""
|
|
44
49
|
|
|
45
|
-
resp_df, _description = query_df_with_type_infer_fallback(query, {
|
|
46
|
-
'table_a': table_a,
|
|
47
|
-
'table_b': table_b
|
|
48
|
-
})
|
|
50
|
+
resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
|
|
49
51
|
resp_df.replace({np.nan: None}, inplace=True)
|
|
50
52
|
|
|
51
53
|
return ResultSet.from_df_cols(df=resp_df, columns_dict=names)
|
|
@@ -3,6 +3,7 @@ import shutil
|
|
|
3
3
|
import tarfile
|
|
4
4
|
import tempfile
|
|
5
5
|
import zipfile
|
|
6
|
+
from urllib.parse import urlparse
|
|
6
7
|
|
|
7
8
|
import multipart
|
|
8
9
|
import requests
|
|
@@ -13,7 +14,7 @@ from flask_restx import Resource
|
|
|
13
14
|
from mindsdb.api.http.namespaces.configs.files import ns_conf
|
|
14
15
|
from mindsdb.api.http.utils import http_error
|
|
15
16
|
from mindsdb.metrics.metrics import api_endpoint_metrics
|
|
16
|
-
from mindsdb.utilities.config import
|
|
17
|
+
from mindsdb.utilities.config import config
|
|
17
18
|
from mindsdb.utilities.context import context as ctx
|
|
18
19
|
from mindsdb.utilities import log
|
|
19
20
|
from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
|
|
@@ -105,31 +106,55 @@ class File(Resource):
|
|
|
105
106
|
|
|
106
107
|
if data.get("source_type") == "url":
|
|
107
108
|
url = data["source"]
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
try:
|
|
110
|
+
url = urlparse(url)
|
|
111
|
+
if not (url.scheme and url.netloc):
|
|
112
|
+
raise ValueError()
|
|
113
|
+
url = url.geturl()
|
|
114
|
+
except Exception:
|
|
115
|
+
return http_error(
|
|
116
|
+
400,
|
|
117
|
+
"Invalid URL",
|
|
118
|
+
f"The URL is not valid: {data['source']}",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
url_file_upload_enabled = config["url_file_upload"]["enabled"]
|
|
122
|
+
if url_file_upload_enabled is False:
|
|
123
|
+
return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
|
|
124
|
+
|
|
125
|
+
allowed_origins = config["url_file_upload"]["allowed_origins"]
|
|
126
|
+
disallowed_origins = config["url_file_upload"]["disallowed_origins"]
|
|
127
|
+
|
|
128
|
+
if validate_urls(url, allowed_origins, disallowed_origins) is False:
|
|
129
|
+
return http_error(
|
|
130
|
+
400,
|
|
131
|
+
"Invalid URL",
|
|
132
|
+
"URL is not allowed for security reasons. Allowed hosts are: "
|
|
133
|
+
f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
|
|
134
|
+
)
|
|
135
|
+
|
|
112
136
|
data["file"] = clear_filename(data["name"])
|
|
113
137
|
is_cloud = config.get("cloud", False)
|
|
114
|
-
if is_cloud
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
138
|
+
if is_cloud:
|
|
139
|
+
if is_private_url(url):
|
|
140
|
+
return http_error(400, f"URL is private: {url}")
|
|
141
|
+
|
|
142
|
+
if ctx.user_class != 1:
|
|
143
|
+
info = requests.head(url, timeout=30)
|
|
144
|
+
file_size = info.headers.get("Content-Length")
|
|
145
|
+
try:
|
|
146
|
+
file_size = int(file_size)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
if file_size is None:
|
|
151
|
+
return http_error(
|
|
152
|
+
400,
|
|
153
|
+
"Error getting file info",
|
|
154
|
+
"Сan't determine remote file size",
|
|
155
|
+
)
|
|
156
|
+
if file_size > MAX_FILE_SIZE:
|
|
157
|
+
return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
|
|
133
158
|
with requests.get(url, stream=True) as r:
|
|
134
159
|
if r.status_code != 200:
|
|
135
160
|
return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
|
mindsdb/api/mcp/start.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Any
|
|
3
|
+
from textwrap import dedent
|
|
2
4
|
from contextlib import asynccontextmanager
|
|
3
5
|
from collections.abc import AsyncIterator
|
|
4
|
-
from typing import Optional, Dict, Any
|
|
5
6
|
from dataclasses import dataclass
|
|
6
7
|
|
|
7
8
|
import uvicorn
|
|
@@ -41,16 +42,32 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
|
|
|
41
42
|
mcp = FastMCP(
|
|
42
43
|
"MindsDB",
|
|
43
44
|
lifespan=app_lifespan,
|
|
44
|
-
dependencies=["mindsdb"] # Add any additional dependencies
|
|
45
|
+
dependencies=["mindsdb"], # Add any additional dependencies
|
|
45
46
|
)
|
|
46
47
|
# MCP Queries
|
|
47
48
|
LISTING_QUERY = "SHOW DATABASES"
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
query_tool_description = dedent("""\
|
|
52
|
+
Executes a SQL query against MindsDB.
|
|
53
|
+
|
|
54
|
+
A database must be specified either in the `context` parameter or directly in the query string (e.g., `SELECT * FROM my_database.my_table`). Queries like `SELECT * FROM my_table` will fail without a `context`.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
query (str): The SQL query to execute.
|
|
58
|
+
context (dict, optional): The default database context. For example, `{"db": "my_postgres"}`.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
A dictionary describing the result.
|
|
62
|
+
- For a successful query with no data to return (e.g., an `UPDATE` statement), the response is `{"type": "ok"}`.
|
|
63
|
+
- If the query returns tabular data, the response is a dictionary containing `data` (a list of rows) and `column_names` (a list of column names). For example: `{"type": "table", "data": [[1, "a"], [2, "b"]], "column_names": ["column_a", "column_b"]}`.
|
|
64
|
+
- In case of an error, a response is `{"type": "error", "error_message": "the error message"}`.
|
|
65
|
+
""")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@mcp.tool(name="query", description=query_tool_description)
|
|
69
|
+
def query(query: str, context: dict | None = None) -> dict[str, Any]:
|
|
70
|
+
"""Execute a SQL query against MindsDB
|
|
54
71
|
|
|
55
72
|
Args:
|
|
56
73
|
query: The SQL query to execute
|
|
@@ -63,7 +80,7 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
|
|
|
63
80
|
if context is None:
|
|
64
81
|
context = {}
|
|
65
82
|
|
|
66
|
-
logger.debug(f
|
|
83
|
+
logger.debug(f"Incoming MCP query: {query}")
|
|
67
84
|
|
|
68
85
|
mysql_proxy = FakeMysqlProxy()
|
|
69
86
|
mysql_proxy.set_context(context)
|
|
@@ -78,34 +95,30 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
|
|
|
78
95
|
return {
|
|
79
96
|
"type": SQL_RESPONSE_TYPE.TABLE,
|
|
80
97
|
"data": result.result_set.to_lists(json_types=True),
|
|
81
|
-
"column_names": [
|
|
82
|
-
column.alias or column.name
|
|
83
|
-
for column in result.result_set.columns
|
|
84
|
-
],
|
|
98
|
+
"column_names": [column.alias or column.name for column in result.result_set.columns],
|
|
85
99
|
}
|
|
86
100
|
else:
|
|
87
|
-
return {
|
|
88
|
-
"type": SQL_RESPONSE_TYPE.ERROR,
|
|
89
|
-
"error_code": 0,
|
|
90
|
-
"error_message": "Unknown response type"
|
|
91
|
-
}
|
|
101
|
+
return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": "Unknown response type"}
|
|
92
102
|
|
|
93
103
|
except Exception as e:
|
|
94
104
|
logger.error(f"Error processing query: {str(e)}")
|
|
95
|
-
return {
|
|
96
|
-
"type": SQL_RESPONSE_TYPE.ERROR,
|
|
97
|
-
"error_code": 0,
|
|
98
|
-
"error_message": str(e)
|
|
99
|
-
}
|
|
105
|
+
return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": str(e)}
|
|
100
106
|
|
|
101
107
|
|
|
102
|
-
|
|
103
|
-
|
|
108
|
+
list_databases_tool_description = (
|
|
109
|
+
"Returns a list of all database connections currently available in MindsDB. "
|
|
110
|
+
+ "The tool takes no parameters and responds with a list of database names, "
|
|
111
|
+
+ 'for example: ["my_postgres", "my_mysql", "test_db"].'
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@mcp.tool(name="list_databases", description=list_databases_tool_description)
|
|
116
|
+
def list_databases() -> list[str]:
|
|
104
117
|
"""
|
|
105
|
-
List all databases in MindsDB
|
|
118
|
+
List all databases in MindsDB
|
|
106
119
|
|
|
107
120
|
Returns:
|
|
108
|
-
|
|
121
|
+
list[str]: list of databases
|
|
109
122
|
"""
|
|
110
123
|
|
|
111
124
|
mysql_proxy = FakeMysqlProxy()
|
|
@@ -124,6 +137,7 @@ def list_databases() -> Dict[str, Any]:
|
|
|
124
137
|
|
|
125
138
|
elif result.type == SQL_RESPONSE_TYPE.TABLE:
|
|
126
139
|
data = result.result_set.to_lists(json_types=True)
|
|
140
|
+
data = [val[0] for val in data]
|
|
127
141
|
return data
|
|
128
142
|
|
|
129
143
|
except Exception as e:
|
|
@@ -135,12 +149,12 @@ def list_databases() -> Dict[str, Any]:
|
|
|
135
149
|
|
|
136
150
|
|
|
137
151
|
class CustomAuthMiddleware(BaseHTTPMiddleware):
|
|
138
|
-
"""Custom middleware to handle authentication basing on header 'Authorization'
|
|
139
|
-
|
|
152
|
+
"""Custom middleware to handle authentication basing on header 'Authorization'"""
|
|
153
|
+
|
|
140
154
|
async def dispatch(self, request: Request, call_next):
|
|
141
|
-
mcp_access_token = os.environ.get(
|
|
155
|
+
mcp_access_token = os.environ.get("MINDSDB_MCP_ACCESS_TOKEN")
|
|
142
156
|
if mcp_access_token is not None:
|
|
143
|
-
auth_token = request.headers.get(
|
|
157
|
+
auth_token = request.headers.get("Authorization", "").partition("Bearer ")[-1]
|
|
144
158
|
if mcp_access_token != auth_token:
|
|
145
159
|
return Response(status_code=401, content="Unauthorized", media_type="text/plain")
|
|
146
160
|
|
|
@@ -171,8 +185,8 @@ def start(*args, **kwargs):
|
|
|
171
185
|
port (int): Port to listen on
|
|
172
186
|
"""
|
|
173
187
|
config = Config()
|
|
174
|
-
port = int(config[
|
|
175
|
-
host = config[
|
|
188
|
+
port = int(config["api"].get("mcp", {}).get("port", 47337))
|
|
189
|
+
host = config["api"].get("mcp", {}).get("host", "127.0.0.1")
|
|
176
190
|
|
|
177
191
|
logger.info(f"Starting MCP server on {host}:{port}")
|
|
178
192
|
mcp.settings.host = host
|