MindsDB 25.3.4.2__py3-none-any.whl → 25.4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.3.4.2'
3
+ __version__ = '25.4.1.0'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
mindsdb/__main__.py CHANGED
@@ -24,7 +24,8 @@ from mindsdb.__about__ import __version__ as mindsdb_version
24
24
  from mindsdb.utilities.config import config
25
25
  from mindsdb.utilities.exception import EntityNotExistsError
26
26
  from mindsdb.utilities.starters import (
27
- start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue, start_scheduler, start_tasks
27
+ start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue, start_scheduler, start_tasks,
28
+ start_mcp
28
29
  )
29
30
  from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
30
31
  from mindsdb.utilities.functions import get_versions_where_predictors_become_obsolete
@@ -57,6 +58,7 @@ class TrunkProcessEnum(Enum):
57
58
  JOBS = 'jobs'
58
59
  TASKS = 'tasks'
59
60
  ML_TASK_QUEUE = 'ml_task_queue'
61
+ MCP = 'mcp'
60
62
 
61
63
  @classmethod
62
64
  def _missing_(cls, value):
@@ -221,9 +223,9 @@ if __name__ == '__main__':
221
223
  ctx.set_default()
222
224
 
223
225
  # ---- CHECK SYSTEM ----
224
- if not (sys.version_info[0] >= 3 and sys.version_info[1] >= 9):
226
+ if not (sys.version_info[0] >= 3 and sys.version_info[1] >= 10):
225
227
  print("""
226
- MindsDB requires Python >= 3.9 to run
228
+ MindsDB requires Python >= 3.10 to run
227
229
 
228
230
  Once you have supported Python version installed you can start mindsdb as follows:
229
231
 
@@ -385,6 +387,7 @@ if __name__ == '__main__':
385
387
 
386
388
  http_api_config = config['api']['http']
387
389
  mysql_api_config = config['api']['mysql']
390
+ mcp_api_config = config['api']['mcp']
388
391
  trunc_processes_struct = {
389
392
  TrunkProcessEnum.HTTP: TrunkProcessData(
390
393
  name=TrunkProcessEnum.HTTP.value,
@@ -434,11 +437,25 @@ if __name__ == '__main__':
434
437
  name=TrunkProcessEnum.ML_TASK_QUEUE.value,
435
438
  entrypoint=start_ml_task_queue,
436
439
  args=(config.cmd_args.verbose,)
440
+ ),
441
+ TrunkProcessEnum.MCP: TrunkProcessData(
442
+ name=TrunkProcessEnum.MCP.value,
443
+ entrypoint=start_mcp,
444
+ port=mcp_api_config.get('port', 47337),
445
+ args=(config.cmd_args.verbose,),
446
+ restart_on_failure=mcp_api_config.get('restart_on_failure', False),
447
+ max_restart_count=mcp_api_config.get('max_restart_count', TrunkProcessData.max_restart_count),
448
+ max_restart_interval_seconds=mcp_api_config.get(
449
+ 'max_restart_interval_seconds', TrunkProcessData.max_restart_interval_seconds
450
+ )
437
451
  )
438
452
  }
439
453
 
440
454
  for api_enum in api_arr:
441
- trunc_processes_struct[api_enum].need_to_run = True
455
+ if api_enum in trunc_processes_struct:
456
+ trunc_processes_struct[api_enum].need_to_run = True
457
+ else:
458
+ logger.error(f"ERROR: {api_enum} API is not a valid api in config")
442
459
 
443
460
  if config['jobs']['disable'] is False:
444
461
  trunc_processes_struct[TrunkProcessEnum.JOBS].need_to_run = True
File without changes
@@ -0,0 +1,152 @@
1
+ from contextlib import asynccontextmanager
2
+ from collections.abc import AsyncIterator
3
+ from typing import Optional, Dict, Any
4
+ from dataclasses import dataclass
5
+
6
+ from mcp.server.fastmcp import FastMCP
7
+ from mindsdb.api.mysql.mysql_proxy.classes.fake_mysql_proxy import FakeMysqlProxy
8
+ from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE as SQL_RESPONSE_TYPE
9
+ from mindsdb.utilities import log
10
+ from mindsdb.utilities.config import Config
11
+ from mindsdb.interfaces.storage import db
12
+
13
+ logger = log.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class AppContext:
18
+ db: Any
19
+
20
+
21
+ @asynccontextmanager
22
+ async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
23
+ """Manage application lifecycle with type-safe context"""
24
+ # Initialize on startup
25
+ db.init()
26
+ try:
27
+ yield AppContext(db=db)
28
+ finally:
29
+ # TODO: We need better way to handle this in storage/db.py
30
+ pass
31
+
32
+
33
+ # Configure server with lifespan
34
+ mcp = FastMCP(
35
+ "MindsDB",
36
+ lifespan=app_lifespan,
37
+ dependencies=["mindsdb"] # Add any additional dependencies
38
+ )
39
+ # MCP Queries
40
+ LISTING_QUERY = "SHOW DATABASES"
41
+
42
+
43
+ @mcp.tool()
44
+ def query(query: str, context: Optional[Dict] = None) -> Dict[str, Any]:
45
+ """
46
+ Execute a SQL query against MindsDB
47
+
48
+ Args:
49
+ query: The SQL query to execute
50
+ context: Optional context parameters for the query
51
+
52
+ Returns:
53
+ Dict containing the query results or error information
54
+ """
55
+
56
+ if context is None:
57
+ context = {}
58
+
59
+ logger.debug(f'Incoming MCP query: {query}')
60
+
61
+ mysql_proxy = FakeMysqlProxy()
62
+ mysql_proxy.set_context(context)
63
+
64
+ try:
65
+ result = mysql_proxy.process_query(query)
66
+
67
+ if result.type == SQL_RESPONSE_TYPE.OK:
68
+ return {"type": SQL_RESPONSE_TYPE.OK}
69
+
70
+ if result.type == SQL_RESPONSE_TYPE.TABLE:
71
+ return {
72
+ "type": SQL_RESPONSE_TYPE.TABLE,
73
+ "data": result.data.to_lists(json_types=True),
74
+ "column_names": [
75
+ x["alias"] or x["name"] if "alias" in x else x["name"]
76
+ for x in result.columns
77
+ ],
78
+ }
79
+ else:
80
+ return {
81
+ "type": SQL_RESPONSE_TYPE.ERROR,
82
+ "error_code": 0,
83
+ "error_message": "Unknown response type"
84
+ }
85
+
86
+ except Exception as e:
87
+ logger.error(f"Error processing query: {str(e)}")
88
+ return {
89
+ "type": SQL_RESPONSE_TYPE.ERROR,
90
+ "error_code": 0,
91
+ "error_message": str(e)
92
+ }
93
+
94
+
95
+ @mcp.tool()
96
+ def list_databases() -> Dict[str, Any]:
97
+ """
98
+ List all databases in MindsDB along with their tables
99
+
100
+ Returns:
101
+ Dict containing the list of databases and their tables
102
+ """
103
+
104
+ mysql_proxy = FakeMysqlProxy()
105
+
106
+ try:
107
+ result = mysql_proxy.process_query(LISTING_QUERY)
108
+ if result.type == SQL_RESPONSE_TYPE.ERROR:
109
+ return {
110
+ "type": "error",
111
+ "error_code": result.error_code,
112
+ "error_message": result.error_message,
113
+ }
114
+
115
+ elif result.type == SQL_RESPONSE_TYPE.OK:
116
+ return {"type": "ok"}
117
+
118
+ elif result.type == SQL_RESPONSE_TYPE.TABLE:
119
+ data = result.data.to_lists(json_types=True)
120
+ return data
121
+
122
+ except Exception as e:
123
+ return {
124
+ "type": "error",
125
+ "error_code": 0,
126
+ "error_message": str(e),
127
+ }
128
+
129
+
130
+ def start(*args, **kwargs):
131
+ """Start the MCP server
132
+ Args:
133
+ host (str): Host to bind to
134
+ port (int): Port to listen on
135
+ """
136
+ config = Config()
137
+ port = int(config['api'].get('mcp', {}).get('port', 47337))
138
+ host = config['api'].get('mcp', {}).get('host', '127.0.0.1')
139
+
140
+ logger.info(f"Starting MCP server on {host}:{port}")
141
+ mcp.settings.host = host
142
+ mcp.settings.port = port
143
+
144
+ try:
145
+ mcp.run(transport="sse") # Use SSE transport instead of stdio
146
+ except Exception as e:
147
+ logger.error(f"Error starting MCP server: {str(e)}")
148
+ raise
149
+
150
+
151
+ if __name__ == "__main__":
152
+ start()
@@ -89,7 +89,7 @@ class VectorStoreHandler(BaseHandler):
89
89
  else:
90
90
  return value
91
91
 
92
- def _extract_conditions(self, where_statement) -> Optional[List[FilterCondition]]:
92
+ def extract_conditions(self, where_statement) -> Optional[List[FilterCondition]]:
93
93
  conditions = []
94
94
  # parse conditions
95
95
  if where_statement is not None:
@@ -110,13 +110,7 @@ class VectorStoreHandler(BaseHandler):
110
110
  right_hand = node.args[1].value
111
111
  elif isinstance(node.args[1], Tuple):
112
112
  # Constant could be actually a list i.e. [1.2, 3.2]
113
- right_hand = [
114
- ast.literal_eval(item.value)
115
- if isinstance(item, Constant)
116
- and not isinstance(item.value, list)
117
- else item.value
118
- for item in node.args[1].items
119
- ]
113
+ right_hand = [item.value for item in node.args[1].items]
120
114
  else:
121
115
  raise Exception(f"Unsupported right hand side: {node.args[1]}")
122
116
  conditions.append(
@@ -125,18 +119,21 @@ class VectorStoreHandler(BaseHandler):
125
119
 
126
120
  query_traversal(where_statement, _extract_comparison_conditions)
127
121
 
128
- # try to treat conditions that are not in TableField as metadata conditions
129
- for condition in conditions:
130
- if not self._is_condition_allowed(condition):
131
- condition.column = (
132
- TableField.METADATA.value + "." + condition.column
133
- )
134
-
135
122
  else:
136
123
  conditions = None
137
124
 
138
125
  return conditions
139
126
 
127
+ def _convert_metadata_filters(self, conditions):
128
+ if conditions is None:
129
+ return
130
+ # try to treat conditions that are not in TableField as metadata conditions
131
+ for condition in conditions:
132
+ if not self._is_condition_allowed(condition):
133
+ condition.column = (
134
+ TableField.METADATA.value + "." + condition.column
135
+ )
136
+
140
137
  def _is_columns_allowed(self, columns: List[str]) -> bool:
141
138
  """
142
139
  Check if columns are allowed.
@@ -325,14 +322,16 @@ class VectorStoreHandler(BaseHandler):
325
322
  if not df_insert.empty:
326
323
  self.insert(table_name, df_insert)
327
324
 
328
- def dispatch_delete(self, query: Delete):
325
+ def dispatch_delete(self, query: Delete, conditions: List[FilterCondition] = None):
329
326
  """
330
327
  Dispatch delete query to the appropriate method.
331
328
  """
332
329
  # parse key arguments
333
330
  table_name = query.table.parts[-1]
334
- where_statement = query.where
335
- conditions = self._extract_conditions(where_statement)
331
+ if conditions is None:
332
+ where_statement = query.where
333
+ conditions = self.extract_conditions(where_statement)
334
+ self._convert_metadata_filters(conditions)
336
335
 
337
336
  # dispatch delete
338
337
  return self.delete(table_name, conditions=conditions)
@@ -356,9 +355,10 @@ class VectorStoreHandler(BaseHandler):
356
355
  )
357
356
 
358
357
  # check if columns are allowed
359
- where_statement = query.where
360
358
  if conditions is None:
361
- conditions = self._extract_conditions(where_statement)
359
+ where_statement = query.where
360
+ conditions = self.extract_conditions(where_statement)
361
+ self._convert_metadata_filters(conditions)
362
362
 
363
363
  # get offset and limit
364
364
  offset = query.offset.value if query.offset is not None else None
@@ -127,17 +127,21 @@ class LLMReranker(BaseDocumentCompressor):
127
127
  ranked_results.append((batch[idx][1], score))
128
128
 
129
129
  # Check if we should stop early
130
- high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
131
- can_stop_early = (
132
- self.early_stop # Early stopping is enabled
133
- and self.num_docs_to_keep # We have a target number of docs
134
- and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
135
- and score >= self.early_stop_threshold # Current doc is good enough
136
- )
137
-
138
- if can_stop_early:
139
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
140
- return ranked_results
130
+ try:
131
+ high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
132
+ can_stop_early = (
133
+ self.early_stop # Early stopping is enabled
134
+ and self.num_docs_to_keep # We have a target number of docs
135
+ and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
136
+ and score >= self.early_stop_threshold # Current doc is good enough
137
+ )
138
+
139
+ if can_stop_early:
140
+ log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
141
+ return ranked_results
142
+ except Exception as e:
143
+ # Don't let early stopping errors stop the whole process
144
+ log.warning(f"Error in early stopping check: {str(e)}")
141
145
 
142
146
  except Exception as e:
143
147
  log.error(f"Batch processing error: {str(e)}")
@@ -222,3 +226,109 @@ class LLMReranker(BaseDocumentCompressor):
222
226
  "temperature": self.temperature,
223
227
  "remove_irrelevant": self.remove_irrelevant,
224
228
  }
229
+
230
+ def get_scores(self, query: str, documents: list[str], disable_events: bool = True):
231
+ """
232
+ Get relevance scores for documents given a query.
233
+ Args:
234
+ query: The query text
235
+ documents: List of document texts to score
236
+ disable_events: Whether to disable event dispatching (default True)
237
+ Returns:
238
+ List of relevance scores
239
+ """
240
+ query_document_pairs = [(query, doc) for doc in documents]
241
+ # Create event loop and run async code
242
+ import asyncio
243
+ try:
244
+ loop = asyncio.get_running_loop()
245
+ except RuntimeError:
246
+ # If no running loop exists, create a new one
247
+ loop = asyncio.new_event_loop()
248
+ asyncio.set_event_loop(loop)
249
+ # If disable_events is True, we need to modify the _rank function to not use dispatch_custom_event
250
+ if disable_events:
251
+ # Create a wrapper function that doesn't dispatch events
252
+ async def _rank_without_events(query_document_pairs):
253
+ ranked_results = []
254
+ # Process in larger batches for better throughput
255
+ batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
256
+ for i in range(0, len(query_document_pairs), batch_size):
257
+ batch = query_document_pairs[i:i + batch_size]
258
+ try:
259
+ # Define a no-events version of search_relevancy inside this closure
260
+ async def search_relevancy_no_events(query, document):
261
+ await self._init_client()
262
+ async with self._semaphore:
263
+ for attempt in range(self.max_retries):
264
+ try:
265
+ response = await self.client.chat.completions.create(
266
+ model=self.model,
267
+ messages=[
268
+ {"role": "system", "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'."},
269
+ {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}
270
+ ],
271
+ temperature=self.temperature,
272
+ n=1,
273
+ logprobs=True,
274
+ max_tokens=1
275
+ )
276
+ # Extract response and confidence score
277
+ answer = response.choices[0].message.content
278
+ logprob = response.choices[0].logprobs.content[0].logprob
279
+ # No event dispatch here
280
+ return {"document": document, "answer": answer, "logprob": logprob}
281
+ except Exception as e:
282
+ if attempt == self.max_retries - 1:
283
+ log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
284
+ raise
285
+ # Exponential backoff with jitter
286
+ retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
287
+ await asyncio.sleep(retry_delay)
288
+ # Use our no-events version for this batch
289
+ results = await asyncio.gather(
290
+ *[search_relevancy_no_events(query=query, document=document) for (query, document) in batch],
291
+ return_exceptions=True
292
+ )
293
+ for idx, result in enumerate(results):
294
+ if isinstance(result, Exception):
295
+ log.error(f"Error processing document {i+idx}: {str(result)}")
296
+ ranked_results.append((batch[idx][1], 0.0))
297
+ continue
298
+ answer = result["answer"]
299
+ logprob = result["logprob"]
300
+ prob = math.exp(logprob)
301
+ # Convert answer to score using the model's confidence
302
+ if answer.lower().strip() == "yes":
303
+ score = prob # If yes, use the model's confidence
304
+ elif answer.lower().strip() == "no":
305
+ score = 1 - prob # If no, invert the confidence
306
+ else:
307
+ score = 0.5 * prob # For unclear answers, reduce confidence
308
+ ranked_results.append((batch[idx][1], score))
309
+ # Check if we should stop early
310
+ try:
311
+ high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
312
+ can_stop_early = (
313
+ self.early_stop # Early stopping is enabled
314
+ and self.num_docs_to_keep # We have a target number of docs
315
+ and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
316
+ and score >= self.early_stop_threshold # Current doc is good enough
317
+ )
318
+ if can_stop_early:
319
+ log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
320
+ return ranked_results
321
+ except Exception as e:
322
+ # Don't let early stopping errors stop the whole process
323
+ log.warning(f"Error in early stopping check: {str(e)}")
324
+ except Exception as e:
325
+ log.error(f"Batch processing error: {str(e)}")
326
+ continue
327
+ return ranked_results
328
+ # Use our no-events version
329
+ documents_and_scores = loop.run_until_complete(_rank_without_events(query_document_pairs))
330
+ else:
331
+ # Use the original _rank method
332
+ documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs))
333
+ scores = [score for _, score in documents_and_scores]
334
+ return scores
@@ -296,6 +296,19 @@ class Project:
296
296
  ]
297
297
  return data
298
298
 
299
+ def get_knowledge_bases(self):
300
+ from mindsdb.api.executor.controllers.session_controller import SessionController
301
+ session = SessionController()
302
+
303
+ return {
304
+ kb['name']: {
305
+ 'type': 'knowledge_base',
306
+ 'id': kb['id'],
307
+ 'deletable': True
308
+ }
309
+ for kb in session.kb_controller.list(self.name)
310
+ }
311
+
299
312
  def get_views(self):
300
313
  records = (
301
314
  db.session.query(db.View).filter_by(
@@ -353,6 +366,8 @@ class Project:
353
366
  for agent in agents:
354
367
  data[agent['name']] = agent['metadata']
355
368
 
369
+ data.update(self.get_knowledge_bases())
370
+
356
371
  return data
357
372
 
358
373
  def get_columns(self, table_name: str):
@@ -4,6 +4,7 @@ from typing import Dict, List, Optional
4
4
 
5
5
  import pandas as pd
6
6
  import hashlib
7
+ import numpy as np
7
8
 
8
9
  from mindsdb_sql_parser.ast import (
9
10
  BinaryOperation,
@@ -37,9 +38,16 @@ from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
37
38
 
38
39
  from mindsdb.api.executor.command_executor import ExecuteCommands
39
40
  from mindsdb.utilities import log
41
+ from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
40
42
 
41
43
  logger = log.getLogger(__name__)
42
44
 
45
+ KB_TO_VECTORDB_COLUMNS = {
46
+ 'id': 'original_row_id',
47
+ 'chunk_id': 'id',
48
+ 'chunk_content': 'content'
49
+ }
50
+
43
51
 
44
52
  class KnowledgeBaseTable:
45
53
  """
@@ -103,7 +111,9 @@ class KnowledgeBaseTable:
103
111
  db_handler = self.get_vector_db()
104
112
  logger.debug(f"Using vector db handler: {type(db_handler)}")
105
113
 
106
- df = db_handler.dispatch_select(query)
114
+ conditions = db_handler.extract_conditions(query.where)
115
+ self.addapt_conditions_columns(conditions)
116
+ df = db_handler.dispatch_select(query, conditions)
107
117
 
108
118
  if df is not None:
109
119
 
@@ -115,8 +125,72 @@ class KnowledgeBaseTable:
115
125
  else:
116
126
  logger.warning("Query returned no data")
117
127
 
128
+ rerank_model = self._kb.params.get("rerank_model")
129
+ if rerank_model and df is not None and not df.empty:
130
+ try:
131
+ logger.info(f"Using reranker model: {rerank_model}")
132
+ reranker = LLMReranker(model=rerank_model)
133
+ # convert response from a dataframe to a list of strings
134
+ content_column = df[TableField.CONTENT.value]
135
+ # convert to list
136
+ documents = content_column.tolist()
137
+ # Extract query text from WHERE clause if it exists
138
+ query_text = ""
139
+ if query.where:
140
+ def extract_content(node, **kwargs):
141
+ nonlocal query_text
142
+ is_binary_op = isinstance(node, BinaryOperation)
143
+ is_identifier = isinstance(node.args[0], Identifier)
144
+ is_content = node.args[0].parts[-1].lower() == 'content'
145
+ is_constant = isinstance(node.args[1], Constant)
146
+ if is_binary_op and is_identifier and is_content and is_constant:
147
+ query_text = node.args[1].value
148
+ query_traversal(query.where, extract_content)
149
+ logger.debug(f"Extracted query text: {query_text}")
150
+ # Get scores from reranker
151
+ scores = reranker.get_scores(query_text, documents)
152
+ # Add scores as a new column for filtering
153
+ scores_array = np.array(scores)
154
+ # Add temporary column for sorting
155
+ df['_relevance_score'] = scores
156
+ # Filter by score threshold using numpy array for element-wise comparison
157
+ df = df[scores_array > reranker.filtering_threshold]
158
+ # Sort by relevance (higher score = more relevant)
159
+ df = df.sort_values(by='_relevance_score', ascending=False)
160
+ # Remove temporary column
161
+ # df = df.drop(columns=['_relevance_score'])
162
+ # Apply original limit if it exists
163
+ if query.limit and len(df) > query.limit.value:
164
+ df = df.iloc[:query.limit.value]
165
+ logger.debug(f"Applied reranking with model {rerank_model}")
166
+ except Exception as e:
167
+ logger.error(f"Error during reranking: {str(e)}")
168
+
169
+ df = self.addapt_result_columns(df)
118
170
  return df
119
171
 
172
+ def addapt_conditions_columns(self, conditions):
173
+ if conditions is None:
174
+ return
175
+ for condition in conditions:
176
+ if condition.column in KB_TO_VECTORDB_COLUMNS:
177
+ condition.column = KB_TO_VECTORDB_COLUMNS[condition.column]
178
+
179
+ def addapt_result_columns(self, df):
180
+ col_update = {}
181
+ for kb_col, vec_col in KB_TO_VECTORDB_COLUMNS.items():
182
+ if vec_col in df.columns:
183
+ col_update[vec_col] = kb_col
184
+
185
+ df = df.rename(columns=col_update)
186
+
187
+ columns = list(df.columns)
188
+ # update id, get from metadata
189
+ df[TableField.ID.value] = df[TableField.METADATA.value].apply(lambda m: m.get('original_row_id'))
190
+
191
+ # id on first place
192
+ return df[[TableField.ID.value] + columns]
193
+
120
194
  def insert_files(self, file_names: List[str]):
121
195
  """Process and insert files"""
122
196
  if not self.document_loader:
@@ -217,7 +291,9 @@ class KnowledgeBaseTable:
217
291
 
218
292
  # send to vectordb
219
293
  db_handler = self.get_vector_db()
220
- db_handler.dispatch_delete(query)
294
+ conditions = db_handler.extract_conditions(query.where)
295
+ self.addapt_conditions_columns(conditions)
296
+ db_handler.dispatch_delete(query, conditions)
221
297
 
222
298
  def hybrid_search(
223
299
  self,
@@ -201,6 +201,14 @@ class Config:
201
201
  "host": api_host,
202
202
  "port": "55432",
203
203
  "database": "mindsdb"
204
+ },
205
+ "mcp": {
206
+ "host": api_host,
207
+ "port": "47337",
208
+ "enabled": True,
209
+ "restart_on_failure": True,
210
+ "max_restart_count": 1,
211
+ "max_restart_interval_seconds": 60
204
212
  }
205
213
  },
206
214
  "cache": {
@@ -31,3 +31,10 @@ def start_ml_task_queue(*args, **kwargs):
31
31
  def start_scheduler(*args, **kwargs):
32
32
  from mindsdb.interfaces.jobs.scheduler import start
33
33
  start(*args, **kwargs)
34
+
35
+
36
+ def start_mcp(*args, **kwargs):
37
+ """Start the MCP server"""
38
+ from mindsdb.api.mcp.start import start
39
+
40
+ start(*args, **kwargs)