MindsDB 25.6.3.1__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +72 -44
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +14 -1
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +1 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +314 -1
  7. mindsdb/api/executor/planner/plan_join.py +1 -1
  8. mindsdb/api/executor/planner/query_planner.py +7 -1
  9. mindsdb/api/executor/planner/query_prepare.py +68 -87
  10. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  11. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  12. mindsdb/api/http/namespaces/file.py +49 -24
  13. mindsdb/api/mcp/start.py +45 -31
  14. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  15. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  16. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  17. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  18. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  19. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  20. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  21. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  22. mindsdb/integrations/handlers/ludwig_handler/requirements.txt +1 -1
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  25. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +2 -0
  26. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  27. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  28. mindsdb/integrations/libs/api_handler.py +6 -7
  29. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  30. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  31. mindsdb/interfaces/agents/agents_controller.py +29 -9
  32. mindsdb/interfaces/agents/constants.py +44 -0
  33. mindsdb/interfaces/agents/langchain_agent.py +15 -6
  34. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  35. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  36. mindsdb/interfaces/data_catalog/data_catalog_reader.py +22 -3
  37. mindsdb/interfaces/knowledge_base/controller.py +121 -102
  38. mindsdb/interfaces/knowledge_base/evaluate.py +19 -7
  39. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  40. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  41. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  42. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  43. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +26 -22
  44. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +40 -28
  45. mindsdb/interfaces/skills/skill_tool.py +91 -88
  46. mindsdb/interfaces/skills/sql_agent.py +181 -130
  47. mindsdb/interfaces/storage/db.py +9 -7
  48. mindsdb/utilities/config.py +12 -1
  49. mindsdb/utilities/exception.py +47 -7
  50. mindsdb/utilities/security.py +54 -11
  51. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +239 -251
  52. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +55 -54
  53. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  54. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  55. {mindsdb-25.6.3.1.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from mindsdb.api.executor.planner import utils
8
8
 
9
9
  def to_string(identifier):
10
10
  # alternative to AST.to_string() but without quoting
11
- return '.'.join(identifier.parts)
11
+ return ".".join(identifier.parts)
12
12
 
13
13
 
14
14
  class Table:
@@ -32,7 +32,6 @@ class Column:
32
32
  def __init__(self, node=None, table=None, name=None, type=None):
33
33
  alias = None
34
34
  if node is not None:
35
-
36
35
  if isinstance(node, ast.Identifier):
37
36
  # set name
38
37
  name = node.parts[-1] # ???
@@ -67,26 +66,25 @@ class Statement:
67
66
  self.offset = 0
68
67
 
69
68
 
70
- class PreparedStatementPlanner():
71
-
69
+ class PreparedStatementPlanner:
72
70
  def __init__(self, planner):
73
71
  self.planner = planner
74
72
 
75
73
  def get_type_of_var(self, v):
76
74
  if isinstance(v, str):
77
- return 'str'
75
+ return "str"
78
76
  elif isinstance(v, float):
79
- return 'float'
77
+ return "float"
80
78
  elif isinstance(v, int):
81
- return 'integer'
79
+ return "integer"
82
80
 
83
- return 'str'
81
+ return "str"
84
82
 
85
83
  def get_statement_info(self):
86
84
  stmt = self.planner.statement
87
85
 
88
86
  if stmt is None:
89
- raise PlanningException('Statement is not prepared')
87
+ raise PlanningException("Statement is not prepared")
90
88
 
91
89
  columns_result = []
92
90
 
@@ -95,45 +93,45 @@ class PreparedStatementPlanner():
95
93
  if column.table is not None:
96
94
  table = column.table.name
97
95
  ds = column.table.ds
98
- columns_result.append(dict(
99
- alias=column.alias,
100
- type=column.type,
101
- name=column.name,
102
- table_name=table,
103
- table_alias=table,
104
- ds=ds,
105
- ))
96
+ columns_result.append(
97
+ dict(
98
+ alias=column.alias,
99
+ type=column.type,
100
+ name=column.name,
101
+ table_name=table,
102
+ table_alias=table,
103
+ ds=ds,
104
+ )
105
+ )
106
106
 
107
107
  parameters = []
108
108
  for param in stmt.params:
109
- name = '?'
110
- parameters.append(dict(
111
- alias=name,
112
- type='str',
113
- name=name,
114
- ))
115
-
116
- return {
117
- 'parameters': parameters,
118
- 'columns': columns_result
119
- }
109
+ name = "?"
110
+ parameters.append(
111
+ dict(
112
+ alias=name,
113
+ type="str",
114
+ name=name,
115
+ )
116
+ )
120
117
 
121
- def get_table_of_column(self, t):
118
+ return {"parameters": parameters, "columns": columns_result}
122
119
 
120
+ def get_table_of_column(self, t):
123
121
  tables_map = self.planner.statement.tables_map
124
122
 
125
123
  # get tables to check
126
124
  if len(t.parts) > 1:
127
125
  # try to find table
128
126
  table_parts = t.parts[:-1]
129
- table_name = '.'.join(table_parts)
127
+ table_name = ".".join(table_parts)
130
128
  if table_name in tables_map:
131
129
  return tables_map[table_name]
132
130
 
133
131
  elif len(table_parts) > 1:
134
132
  # maybe datasource is 1st part
135
133
  table_parts = table_parts[1:]
136
- table_name = '.'.join(table_parts)
134
+ table_name = ".".join(table_parts)
137
135
  if table_name in tables_map:
138
136
  return tables_map[table_name]
139
137
 
@@ -158,14 +156,10 @@ class PreparedStatementPlanner():
158
156
  # in reverse order
159
157
  for p in table.parts[::-1]:
160
158
  parts.insert(0, p)
161
- keys.append('.'.join(parts))
159
+ keys.append(".".join(parts))
162
160
 
163
161
  # remember table
164
- tbl = Table(
165
- ds=ds,
166
- node=table,
167
- is_predictor=is_predictor
168
- )
162
+ tbl = Table(ds=ds, node=table, is_predictor=is_predictor)
169
163
  tbl.keys = keys
170
164
 
171
165
  return tbl
@@ -189,7 +183,6 @@ class PreparedStatementPlanner():
189
183
  stmt.tables_map = {}
190
184
  stmt.tables_lvl1 = []
191
185
  if query.from_table is not None:
192
-
193
186
  if isinstance(query.from_table, ast.Join):
194
187
  # get all tables
195
188
  join_tables = utils.convert_join_to_list(query.from_table)
@@ -198,21 +191,17 @@ class PreparedStatementPlanner():
198
191
 
199
192
  if isinstance(query.from_table, ast.Select):
200
193
  # nested select, get only last select
201
- join_tables = [
202
- dict(
203
- table=utils.get_deepest_select(query.from_table).from_table
204
- )
205
- ]
194
+ join_tables = [dict(table=utils.get_deepest_select(query.from_table).from_table)]
206
195
 
207
196
  for i, join_table in enumerate(join_tables):
208
- table = join_table['table']
197
+ table = join_table["table"]
209
198
  if isinstance(table, ast.Identifier):
210
199
  tbl = self.table_from_identifier(table)
211
200
 
212
201
  if tbl.is_predictor:
213
202
  # Is the last table?
214
203
  if i + 1 < len(join_tables):
215
- raise PlanningException('Predictor must be last table in query')
204
+ raise PlanningException("Predictor must be last table in query")
216
205
 
217
206
  stmt.tables_lvl1.append(tbl)
218
207
  for key in tbl.keys:
@@ -225,13 +214,12 @@ class PreparedStatementPlanner():
225
214
  # is there any predictors at other levels?
226
215
  lvl1_predictors = [i for i in stmt.tables_lvl1 if i.is_predictor]
227
216
  if len(query_predictors) != len(lvl1_predictors):
228
- raise PlanningException('Predictor is not at first level')
217
+ raise PlanningException("Predictor is not at first level")
229
218
 
230
219
  # === get targets ===
231
220
  columns = []
232
221
  get_all_tables = False
233
222
  for t in query.targets:
234
-
235
223
  column = Column(t)
236
224
 
237
225
  # column alias
@@ -264,10 +252,10 @@ class PreparedStatementPlanner():
264
252
  column.type = self.get_type_of_var(t.value)
265
253
  elif isinstance(t, ast.Function):
266
254
  # mysql function
267
- if t.op == 'connection_id':
268
- column.type = 'integer'
255
+ if t.op == "connection_id":
256
+ column.type = "integer"
269
257
  else:
270
- column.type = 'str'
258
+ column.type = "str"
271
259
  else:
272
260
  # TODO go down into lower level.
273
261
  # It can be function, operation, select.
@@ -276,7 +264,7 @@ class PreparedStatementPlanner():
276
264
  # TODO add several known types for function, i.e ABS-int
277
265
 
278
266
  # TODO TypeCast - as casted type
279
- column.type = 'str'
267
+ column.type = "str"
280
268
 
281
269
  if alias is not None:
282
270
  column.alias = alias
@@ -299,28 +287,25 @@ class PreparedStatementPlanner():
299
287
  if step.result_data is not None:
300
288
  # save results
301
289
 
302
- if len(step.result_data['tables']) > 0:
303
- table_info = step.result_data['tables'][0]
304
- columns_info = step.result_data['columns'][table_info]
290
+ if len(step.result_data["tables"]) > 0:
291
+ table_info = step.result_data["tables"][0]
292
+ columns_info = step.result_data["columns"][table_info]
305
293
 
306
294
  table.columns = []
307
295
  table.ds = table_info[0]
308
296
  for col in columns_info:
309
297
  if isinstance(col, tuple):
310
298
  # is predictor
311
- col = dict(name=col[0], type='str')
299
+ col = dict(name=col[0], type="str")
312
300
  table.columns.append(
313
301
  Column(
314
- name=col['name'],
315
- type=col['type'],
302
+ name=col["name"],
303
+ type=col["type"],
316
304
  )
317
305
  )
318
306
 
319
307
  # map by names
320
- table.columns_map = {
321
- i.name.upper(): i
322
- for i in table.columns
323
- }
308
+ table.columns_map = {i.name.upper(): i for i in table.columns}
324
309
 
325
310
  # === create columns list ===
326
311
  columns_result = []
@@ -329,7 +314,7 @@ class PreparedStatementPlanner():
329
314
  # add data from all tables
330
315
  for table in stmt.tables_lvl1:
331
316
  if table.columns is None:
332
- raise PlanningException(f'Table is not found {table.name}')
317
+ raise PlanningException(f"Table is not found {table.name}")
333
318
 
334
319
  for col in table.columns:
335
320
  # col = {name: 'col', type: 'str'}
@@ -354,7 +339,7 @@ class PreparedStatementPlanner():
354
339
  column.type = table.columns_map[col_name].type
355
340
  else:
356
341
  # continue
357
- raise PlanningException(f'Column not found {col_name}')
342
+ raise PlanningException(f"Column not found {col_name}")
358
343
 
359
344
  else:
360
345
  # table is not found, looking for in all tables
@@ -368,11 +353,11 @@ class PreparedStatementPlanner():
368
353
 
369
354
  # forcing alias
370
355
  if column.alias is None:
371
- column.alias = f'column_{i}'
356
+ column.alias = f"column_{i}"
372
357
 
373
358
  # forcing type
374
359
  if column.type is None:
375
- column.type = 'str'
360
+ column.type = "str"
376
361
 
377
362
  columns_result.append(column)
378
363
 
@@ -393,28 +378,25 @@ class PreparedStatementPlanner():
393
378
  if step.result_data is not None:
394
379
  # save results
395
380
 
396
- if len(step.result_data['tables']) > 0:
397
- table_info = step.result_data['tables'][0]
398
- columns_info = step.result_data['columns'][table_info]
381
+ if len(step.result_data["tables"]) > 0:
382
+ table_info = step.result_data["tables"][0]
383
+ columns_info = step.result_data["columns"][table_info]
399
384
 
400
385
  table.columns = []
401
386
  table.ds = table_info[0]
402
387
  for col in columns_info:
403
388
  if isinstance(col, tuple):
404
389
  # is predictor
405
- col = dict(name=col[0], type='str')
390
+ col = dict(name=col[0], type="str")
406
391
  table.columns.append(
407
392
  Column(
408
- name=col['name'],
409
- type=col['type'],
393
+ name=col["name"],
394
+ type=col["type"],
410
395
  )
411
396
  )
412
397
 
413
398
  # map by names
414
- table.columns_map = {
415
- i.name.upper(): i
416
- for i in table.columns
417
- }
399
+ table.columns_map = {i.name.upper(): i for i in table.columns}
418
400
 
419
401
  # save results
420
402
  columns_result = []
@@ -430,7 +412,7 @@ class PreparedStatementPlanner():
430
412
 
431
413
  if column.type is None:
432
414
  # forcing type
433
- column.type = 'str'
415
+ column.type = "str"
434
416
 
435
417
  columns_result.append(column)
436
418
 
@@ -440,13 +422,12 @@ class PreparedStatementPlanner():
440
422
  stmt = self.planner.statement
441
423
 
442
424
  stmt.columns = [
443
- Column(name='Variable_name', type='str'),
444
- Column(name='Value', type='str'),
425
+ Column(name="Variable_name", type="str"),
426
+ Column(name="Value", type="str"),
445
427
  ]
446
428
  return []
447
429
 
448
430
  def prepare_steps(self, query):
449
-
450
431
  stmt = Statement()
451
432
  self.planner.statement = stmt
452
433
 
@@ -476,7 +457,6 @@ class PreparedStatementPlanner():
476
457
  if isinstance(query, ast.Show):
477
458
  return self.prepare_show(query)
478
459
  else:
479
-
480
460
  # do nothing
481
461
  return []
482
462
  # raise NotImplementedError(query.__name__)
@@ -496,7 +476,6 @@ class PreparedStatementPlanner():
496
476
  query = self.planner.query
497
477
 
498
478
  if params is not None:
499
-
500
479
  if len(params) != len(stmt.params):
501
480
  raise PlanningException("Count of execution parameters don't match prepared statement")
502
481
 
@@ -508,12 +487,14 @@ class PreparedStatementPlanner():
508
487
  stmt.params = None
509
488
 
510
489
  if (
511
- isinstance(query, ast.Select)
512
- or isinstance(query, ast.Union)
513
- or isinstance(query, ast.CreateTable)
514
- or isinstance(query, ast.Insert)
515
- or isinstance(query, ast.Update)
516
- or isinstance(query, ast.Delete)
490
+ isinstance(query, ast.Select)
491
+ or isinstance(query, ast.Union)
492
+ or isinstance(query, ast.CreateTable)
493
+ or isinstance(query, ast.Insert)
494
+ or isinstance(query, ast.Update)
495
+ or isinstance(query, ast.Delete)
496
+ or isinstance(query, ast.Intersect)
497
+ or isinstance(query, ast.Except)
517
498
  ):
518
499
  return self.plan_query(query)
519
500
  else:
@@ -6,6 +6,8 @@ from mindsdb_sql_parser.ast import (
6
6
  Parameter,
7
7
  BinaryOperation,
8
8
  Tuple,
9
+ Union,
10
+ Intersect,
9
11
  )
10
12
 
11
13
  from mindsdb.api.executor.planner.steps import FetchDataframeStep
@@ -92,7 +94,10 @@ class FetchDataframeStepCall(BaseStepCall):
92
94
  response: DataHubResponse = dn.query(native_query=step.raw_query, session=self.session)
93
95
  df = response.data_frame
94
96
  else:
95
- table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
97
+ if isinstance(step.query, (Union, Intersect)):
98
+ table_alias = ["", "", ""]
99
+ else:
100
+ table_alias = get_table_alias(step.query.from_table, self.context.get("database"))
96
101
 
97
102
  # TODO for information_schema we have 'database' = 'mindsdb'
98
103
 
@@ -9,7 +9,6 @@ from .base import BaseStepCall
9
9
 
10
10
 
11
11
  class UnionStepCall(BaseStepCall):
12
-
13
12
  bind = UnionStep
14
13
 
15
14
  def call(self, step):
@@ -19,7 +18,8 @@ class UnionStepCall(BaseStepCall):
19
18
  # count of columns have to match
20
19
  if len(left_result.columns) != len(right_result.columns):
21
20
  raise WrongArgumentError(
22
- f'UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} ')
21
+ f"UNION columns count mismatch: {len(left_result.columns)} != {len(right_result.columns)} "
22
+ )
23
23
 
24
24
  # types have to match
25
25
  # TODO: return checking type later
@@ -33,19 +33,21 @@ class UnionStepCall(BaseStepCall):
33
33
  table_a, names = left_result.to_df_cols()
34
34
  table_b, _ = right_result.to_df_cols()
35
35
 
36
- op = 'UNION ALL'
37
- if step.unique:
38
- op = 'UNION'
36
+ if step.operation.lower() == "intersect":
37
+ op = "INTERSECT"
38
+ else:
39
+ op = "UNION"
40
+
41
+ if step.unique is not True:
42
+ op += " ALL"
43
+
39
44
  query = f"""
40
45
  SELECT * FROM table_a
41
46
  {op}
42
47
  SELECT * FROM table_b
43
48
  """
44
49
 
45
- resp_df, _description = query_df_with_type_infer_fallback(query, {
46
- 'table_a': table_a,
47
- 'table_b': table_b
48
- })
50
+ resp_df, _description = query_df_with_type_infer_fallback(query, {"table_a": table_a, "table_b": table_b})
49
51
  resp_df.replace({np.nan: None}, inplace=True)
50
52
 
51
53
  return ResultSet.from_df_cols(df=resp_df, columns_dict=names)
@@ -3,6 +3,7 @@ import shutil
3
3
  import tarfile
4
4
  import tempfile
5
5
  import zipfile
6
+ from urllib.parse import urlparse
6
7
 
7
8
  import multipart
8
9
  import requests
@@ -13,7 +14,7 @@ from flask_restx import Resource
13
14
  from mindsdb.api.http.namespaces.configs.files import ns_conf
14
15
  from mindsdb.api.http.utils import http_error
15
16
  from mindsdb.metrics.metrics import api_endpoint_metrics
16
- from mindsdb.utilities.config import Config
17
+ from mindsdb.utilities.config import config
17
18
  from mindsdb.utilities.context import context as ctx
18
19
  from mindsdb.utilities import log
19
20
  from mindsdb.utilities.security import is_private_url, clear_filename, validate_urls
@@ -105,31 +106,55 @@ class File(Resource):
105
106
 
106
107
  if data.get("source_type") == "url":
107
108
  url = data["source"]
108
- config = Config()
109
- allowed_urls = config.get("file_upload_domains", [])
110
- if allowed_urls and not validate_urls(url, allowed_urls):
111
- return http_error(400, "Invalid File URL source.", f"Allowed hosts are: {', '.join(allowed_urls)}.")
109
+ try:
110
+ url = urlparse(url)
111
+ if not (url.scheme and url.netloc):
112
+ raise ValueError()
113
+ url = url.geturl()
114
+ except Exception:
115
+ return http_error(
116
+ 400,
117
+ "Invalid URL",
118
+ f"The URL is not valid: {data['source']}",
119
+ )
120
+
121
+ url_file_upload_enabled = config["url_file_upload"]["enabled"]
122
+ if url_file_upload_enabled is False:
123
+ return http_error(400, "URL file upload is disabled.", "URL file upload is disabled.")
124
+
125
+ allowed_origins = config["url_file_upload"]["allowed_origins"]
126
+ disallowed_origins = config["url_file_upload"]["disallowed_origins"]
127
+
128
+ if validate_urls(url, allowed_origins, disallowed_origins) is False:
129
+ return http_error(
130
+ 400,
131
+ "Invalid URL",
132
+ "URL is not allowed for security reasons. Allowed hosts are: "
133
+ f"{', '.join(allowed_origins) if allowed_origins else 'not specified'}.",
134
+ )
135
+
112
136
  data["file"] = clear_filename(data["name"])
113
137
  is_cloud = config.get("cloud", False)
114
- if is_cloud and is_private_url(url):
115
- return http_error(400, f"URL is private: {url}")
116
-
117
- if is_cloud is True and ctx.user_class != 1:
118
- info = requests.head(url)
119
- file_size = info.headers.get("Content-Length")
120
- try:
121
- file_size = int(file_size)
122
- except Exception:
123
- pass
124
-
125
- if file_size is None:
126
- return http_error(
127
- 400,
128
- "Error getting file info",
129
- "Сan't determine remote file size",
130
- )
131
- if file_size > MAX_FILE_SIZE:
132
- return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
138
+ if is_cloud:
139
+ if is_private_url(url):
140
+ return http_error(400, f"URL is private: {url}")
141
+
142
+ if ctx.user_class != 1:
143
+ info = requests.head(url, timeout=30)
144
+ file_size = info.headers.get("Content-Length")
145
+ try:
146
+ file_size = int(file_size)
147
+ except Exception:
148
+ pass
149
+
150
+ if file_size is None:
151
+ return http_error(
152
+ 400,
153
+ "Error getting file info",
154
+ "Сan't determine remote file size",
155
+ )
156
+ if file_size > MAX_FILE_SIZE:
157
+ return http_error(400, "File is too big", f"Upload limit for file is {MAX_FILE_SIZE >> 20} MB")
133
158
  with requests.get(url, stream=True) as r:
134
159
  if r.status_code != 200:
135
160
  return http_error(400, "Error getting file", f"Got status code: {r.status_code}")
mindsdb/api/mcp/start.py CHANGED
@@ -1,7 +1,8 @@
1
1
  import os
2
+ from typing import Any
3
+ from textwrap import dedent
2
4
  from contextlib import asynccontextmanager
3
5
  from collections.abc import AsyncIterator
4
- from typing import Optional, Dict, Any
5
6
  from dataclasses import dataclass
6
7
 
7
8
  import uvicorn
@@ -41,16 +42,32 @@ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
41
42
  mcp = FastMCP(
42
43
  "MindsDB",
43
44
  lifespan=app_lifespan,
44
- dependencies=["mindsdb"] # Add any additional dependencies
45
+ dependencies=["mindsdb"], # Add any additional dependencies
45
46
  )
46
47
  # MCP Queries
47
48
  LISTING_QUERY = "SHOW DATABASES"
48
49
 
49
50
 
50
- @mcp.tool()
51
- def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
52
- """
53
- Execute a SQL query against MindsDB
51
+ query_tool_description = dedent("""\
52
+ Executes a SQL query against MindsDB.
53
+
54
+ A database must be specified either in the `context` parameter or directly in the query string (e.g., `SELECT * FROM my_database.my_table`). Queries like `SELECT * FROM my_table` will fail without a `context`.
55
+
56
+ Args:
57
+ query (str): The SQL query to execute.
58
+ context (dict, optional): The default database context. For example, `{"db": "my_postgres"}`.
59
+
60
+ Returns:
61
+ A dictionary describing the result.
62
+ - For a successful query with no data to return (e.g., an `UPDATE` statement), the response is `{"type": "ok"}`.
63
+ - If the query returns tabular data, the response is a dictionary containing `data` (a list of rows) and `column_names` (a list of column names). For example: `{"type": "table", "data": [[1, "a"], [2, "b"]], "column_names": ["column_a", "column_b"]}`.
64
+ - In case of an error, a response is `{"type": "error", "error_message": "the error message"}`.
65
+ """)
66
+
67
+
68
+ @mcp.tool(name="query", description=query_tool_description)
69
+ def query(query: str, context: dict | None = None) -> dict[str, Any]:
70
+ """Execute a SQL query against MindsDB
54
71
 
55
72
  Args:
56
73
  query: The SQL query to execute
@@ -63,7 +80,7 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
63
80
  if context is None:
64
81
  context = {}
65
82
 
66
- logger.debug(f'Incoming MCP query: {query}')
83
+ logger.debug(f"Incoming MCP query: {query}")
67
84
 
68
85
  mysql_proxy = FakeMysqlProxy()
69
86
  mysql_proxy.set_context(context)
@@ -78,34 +95,30 @@ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
78
95
  return {
79
96
  "type": SQL_RESPONSE_TYPE.TABLE,
80
97
  "data": result.result_set.to_lists(json_types=True),
81
- "column_names": [
82
- column.alias or column.name
83
- for column in result.result_set.columns
84
- ],
98
+ "column_names": [column.alias or column.name for column in result.result_set.columns],
85
99
  }
86
100
  else:
87
- return {
88
- "type": SQL_RESPONSE_TYPE.ERROR,
89
- "error_code": 0,
90
- "error_message": "Unknown response type"
91
- }
101
+ return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": "Unknown response type"}
92
102
 
93
103
  except Exception as e:
94
104
  logger.error(f"Error processing query: {str(e)}")
95
- return {
96
- "type": SQL_RESPONSE_TYPE.ERROR,
97
- "error_code": 0,
98
- "error_message": str(e)
99
- }
105
+ return {"type": SQL_RESPONSE_TYPE.ERROR, "error_code": 0, "error_message": str(e)}
100
106
 
101
107
 
102
- @mcp.tool()
103
- def list_databases() -> Dict[str, Any]:
108
+ list_databases_tool_description = (
109
+ "Returns a list of all database connections currently available in MindsDB. "
110
+ + "The tool takes no parameters and responds with a list of database names, "
111
+ + 'for example: ["my_postgres", "my_mysql", "test_db"].'
112
+ )
113
+
114
+
115
+ @mcp.tool(name="list_databases", description=list_databases_tool_description)
116
+ def list_databases() -> list[str]:
104
117
  """
105
- List all databases in MindsDB along with their tables
118
+ List all databases in MindsDB
106
119
 
107
120
  Returns:
108
- Dict containing the list of databases and their tables
121
+ list[str]: list of databases
109
122
  """
110
123
 
111
124
  mysql_proxy = FakeMysqlProxy()
@@ -124,6 +137,7 @@ def list_databases() -> Dict[str, Any]:
124
137
 
125
138
  elif result.type == SQL_RESPONSE_TYPE.TABLE:
126
139
  data = result.result_set.to_lists(json_types=True)
140
+ data = [val[0] for val in data]
127
141
  return data
128
142
 
129
143
  except Exception as e:
@@ -135,12 +149,12 @@ def list_databases() -> Dict[str, Any]:
135
149
 
136
150
 
137
151
  class CustomAuthMiddleware(BaseHTTPMiddleware):
138
- """Custom middleware to handle authentication basing on header 'Authorization'
139
- """
152
+ """Custom middleware to handle authentication basing on header 'Authorization'"""
153
+
140
154
  async def dispatch(self, request: Request, call_next):
141
- mcp_access_token = os.environ.get('MINDSDB_MCP_ACCESS_TOKEN')
155
+ mcp_access_token = os.environ.get("MINDSDB_MCP_ACCESS_TOKEN")
142
156
  if mcp_access_token is not None:
143
- auth_token = request.headers.get('Authorization', '').partition('Bearer ')[-1]
157
+ auth_token = request.headers.get("Authorization", "").partition("Bearer ")[-1]
144
158
  if mcp_access_token != auth_token:
145
159
  return Response(status_code=401, content="Unauthorized", media_type="text/plain")
146
160
 
@@ -171,8 +185,8 @@ def start(*args, **kwargs):
171
185
  port (int): Port to listen on
172
186
  """
173
187
  config = Config()
174
- port = int(config['api'].get('mcp', {}).get('port', 47337))
175
- host = config['api'].get('mcp', {}).get('host', '127.0.0.1')
188
+ port = int(config["api"].get("mcp", {}).get("port", 47337))
189
+ host = config["api"].get("mcp", {}).get("host", "127.0.0.1")
176
190
 
177
191
  logger.info(f"Starting MCP server on {host}:{port}")
178
192
  mcp.settings.host = host