MindsDB 25.3.4.1__py3-none-any.whl → 25.4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (31) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +21 -4
  3. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +5 -2
  4. mindsdb/api/executor/datahub/datanodes/system_tables.py +131 -138
  5. mindsdb/api/mcp/__init__.py +0 -0
  6. mindsdb/api/mcp/start.py +152 -0
  7. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +74 -0
  8. mindsdb/integrations/handlers/confluence_handler/confluence_api_client.py +14 -2
  9. mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +278 -55
  10. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +52 -21
  11. mindsdb/integrations/handlers/ms_teams_handler/ms_teams_tables.py +6 -29
  12. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +37 -1
  13. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +28 -1
  14. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +53 -5
  15. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +37 -1
  16. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +42 -1
  17. mindsdb/integrations/libs/vectordatabase_handler.py +20 -20
  18. mindsdb/integrations/utilities/handlers/auth_utilities/__init__.py +1 -1
  19. mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/__init__.py +1 -1
  20. mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/ms_graph_api_auth_utilities.py +97 -18
  21. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +121 -11
  22. mindsdb/interfaces/database/projects.py +15 -0
  23. mindsdb/interfaces/knowledge_base/controller.py +78 -2
  24. mindsdb/utilities/config.py +8 -0
  25. mindsdb/utilities/render/sqlalchemy_render.py +30 -6
  26. mindsdb/utilities/starters.py +7 -0
  27. {mindsdb-25.3.4.1.dist-info → mindsdb-25.4.1.0.dist-info}/METADATA +233 -234
  28. {mindsdb-25.3.4.1.dist-info → mindsdb-25.4.1.0.dist-info}/RECORD +31 -29
  29. {mindsdb-25.3.4.1.dist-info → mindsdb-25.4.1.0.dist-info}/WHEEL +0 -0
  30. {mindsdb-25.3.4.1.dist-info → mindsdb-25.4.1.0.dist-info}/licenses/LICENSE +0 -0
  31. {mindsdb-25.3.4.1.dist-info → mindsdb-25.4.1.0.dist-info}/top_level.txt +0 -0
@@ -127,17 +127,21 @@ class LLMReranker(BaseDocumentCompressor):
127
127
  ranked_results.append((batch[idx][1], score))
128
128
 
129
129
  # Check if we should stop early
130
- high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
131
- can_stop_early = (
132
- self.early_stop # Early stopping is enabled
133
- and self.num_docs_to_keep # We have a target number of docs
134
- and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
135
- and score >= self.early_stop_threshold # Current doc is good enough
136
- )
137
-
138
- if can_stop_early:
139
- log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
140
- return ranked_results
130
+ try:
131
+ high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
132
+ can_stop_early = (
133
+ self.early_stop # Early stopping is enabled
134
+ and self.num_docs_to_keep # We have a target number of docs
135
+ and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
136
+ and score >= self.early_stop_threshold # Current doc is good enough
137
+ )
138
+
139
+ if can_stop_early:
140
+ log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
141
+ return ranked_results
142
+ except Exception as e:
143
+ # Don't let early stopping errors stop the whole process
144
+ log.warning(f"Error in early stopping check: {str(e)}")
141
145
 
142
146
  except Exception as e:
143
147
  log.error(f"Batch processing error: {str(e)}")
@@ -222,3 +226,109 @@ class LLMReranker(BaseDocumentCompressor):
222
226
  "temperature": self.temperature,
223
227
  "remove_irrelevant": self.remove_irrelevant,
224
228
  }
229
+
230
+ def get_scores(self, query: str, documents: list[str], disable_events: bool = True):
231
+ """
232
+ Get relevance scores for documents given a query.
233
+ Args:
234
+ query: The query text
235
+ documents: List of document texts to score
236
+ disable_events: Whether to disable event dispatching (default True)
237
+ Returns:
238
+ List of relevance scores
239
+ """
240
+ query_document_pairs = [(query, doc) for doc in documents]
241
+ # Create event loop and run async code
242
+ import asyncio
243
+ try:
244
+ loop = asyncio.get_running_loop()
245
+ except RuntimeError:
246
+ # If no running loop exists, create a new one
247
+ loop = asyncio.new_event_loop()
248
+ asyncio.set_event_loop(loop)
249
+ # If disable_events is True, we need to modify the _rank function to not use dispatch_custom_event
250
+ if disable_events:
251
+ # Create a wrapper function that doesn't dispatch events
252
+ async def _rank_without_events(query_document_pairs):
253
+ ranked_results = []
254
+ # Process in larger batches for better throughput
255
+ batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
256
+ for i in range(0, len(query_document_pairs), batch_size):
257
+ batch = query_document_pairs[i:i + batch_size]
258
+ try:
259
+ # Define a no-events version of search_relevancy inside this closure
260
+ async def search_relevancy_no_events(query, document):
261
+ await self._init_client()
262
+ async with self._semaphore:
263
+ for attempt in range(self.max_retries):
264
+ try:
265
+ response = await self.client.chat.completions.create(
266
+ model=self.model,
267
+ messages=[
268
+ {"role": "system", "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'."},
269
+ {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}
270
+ ],
271
+ temperature=self.temperature,
272
+ n=1,
273
+ logprobs=True,
274
+ max_tokens=1
275
+ )
276
+ # Extract response and confidence score
277
+ answer = response.choices[0].message.content
278
+ logprob = response.choices[0].logprobs.content[0].logprob
279
+ # No event dispatch here
280
+ return {"document": document, "answer": answer, "logprob": logprob}
281
+ except Exception as e:
282
+ if attempt == self.max_retries - 1:
283
+ log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
284
+ raise
285
+ # Exponential backoff with jitter
286
+ retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
287
+ await asyncio.sleep(retry_delay)
288
+ # Use our no-events version for this batch
289
+ results = await asyncio.gather(
290
+ *[search_relevancy_no_events(query=query, document=document) for (query, document) in batch],
291
+ return_exceptions=True
292
+ )
293
+ for idx, result in enumerate(results):
294
+ if isinstance(result, Exception):
295
+ log.error(f"Error processing document {i+idx}: {str(result)}")
296
+ ranked_results.append((batch[idx][1], 0.0))
297
+ continue
298
+ answer = result["answer"]
299
+ logprob = result["logprob"]
300
+ prob = math.exp(logprob)
301
+ # Convert answer to score using the model's confidence
302
+ if answer.lower().strip() == "yes":
303
+ score = prob # If yes, use the model's confidence
304
+ elif answer.lower().strip() == "no":
305
+ score = 1 - prob # If no, invert the confidence
306
+ else:
307
+ score = 0.5 * prob # For unclear answers, reduce confidence
308
+ ranked_results.append((batch[idx][1], score))
309
+ # Check if we should stop early
310
+ try:
311
+ high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
312
+ can_stop_early = (
313
+ self.early_stop # Early stopping is enabled
314
+ and self.num_docs_to_keep # We have a target number of docs
315
+ and len(high_scoring_docs) >= self.num_docs_to_keep # Found enough good docs
316
+ and score >= self.early_stop_threshold # Current doc is good enough
317
+ )
318
+ if can_stop_early:
319
+ log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
320
+ return ranked_results
321
+ except Exception as e:
322
+ # Don't let early stopping errors stop the whole process
323
+ log.warning(f"Error in early stopping check: {str(e)}")
324
+ except Exception as e:
325
+ log.error(f"Batch processing error: {str(e)}")
326
+ continue
327
+ return ranked_results
328
+ # Use our no-events version
329
+ documents_and_scores = loop.run_until_complete(_rank_without_events(query_document_pairs))
330
+ else:
331
+ # Use the original _rank method
332
+ documents_and_scores = loop.run_until_complete(self._rank(query_document_pairs))
333
+ scores = [score for _, score in documents_and_scores]
334
+ return scores
@@ -296,6 +296,19 @@ class Project:
296
296
  ]
297
297
  return data
298
298
 
299
+ def get_knowledge_bases(self):
300
+ from mindsdb.api.executor.controllers.session_controller import SessionController
301
+ session = SessionController()
302
+
303
+ return {
304
+ kb['name']: {
305
+ 'type': 'knowledge_base',
306
+ 'id': kb['id'],
307
+ 'deletable': True
308
+ }
309
+ for kb in session.kb_controller.list(self.name)
310
+ }
311
+
299
312
  def get_views(self):
300
313
  records = (
301
314
  db.session.query(db.View).filter_by(
@@ -353,6 +366,8 @@ class Project:
353
366
  for agent in agents:
354
367
  data[agent['name']] = agent['metadata']
355
368
 
369
+ data.update(self.get_knowledge_bases())
370
+
356
371
  return data
357
372
 
358
373
  def get_columns(self, table_name: str):
@@ -4,6 +4,7 @@ from typing import Dict, List, Optional
4
4
 
5
5
  import pandas as pd
6
6
  import hashlib
7
+ import numpy as np
7
8
 
8
9
  from mindsdb_sql_parser.ast import (
9
10
  BinaryOperation,
@@ -37,9 +38,16 @@ from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
37
38
 
38
39
  from mindsdb.api.executor.command_executor import ExecuteCommands
39
40
  from mindsdb.utilities import log
41
+ from mindsdb.integrations.utilities.rag.rerankers.reranker_compressor import LLMReranker
40
42
 
41
43
  logger = log.getLogger(__name__)
42
44
 
45
+ KB_TO_VECTORDB_COLUMNS = {
46
+ 'id': 'original_row_id',
47
+ 'chunk_id': 'id',
48
+ 'chunk_content': 'content'
49
+ }
50
+
43
51
 
44
52
  class KnowledgeBaseTable:
45
53
  """
@@ -103,7 +111,9 @@ class KnowledgeBaseTable:
103
111
  db_handler = self.get_vector_db()
104
112
  logger.debug(f"Using vector db handler: {type(db_handler)}")
105
113
 
106
- df = db_handler.dispatch_select(query)
114
+ conditions = db_handler.extract_conditions(query.where)
115
+ self.addapt_conditions_columns(conditions)
116
+ df = db_handler.dispatch_select(query, conditions)
107
117
 
108
118
  if df is not None:
109
119
 
@@ -115,8 +125,72 @@ class KnowledgeBaseTable:
115
125
  else:
116
126
  logger.warning("Query returned no data")
117
127
 
128
+ rerank_model = self._kb.params.get("rerank_model")
129
+ if rerank_model and df is not None and not df.empty:
130
+ try:
131
+ logger.info(f"Using reranker model: {rerank_model}")
132
+ reranker = LLMReranker(model=rerank_model)
133
+ # convert response from a dataframe to a list of strings
134
+ content_column = df[TableField.CONTENT.value]
135
+ # convert to list
136
+ documents = content_column.tolist()
137
+ # Extract query text from WHERE clause if it exists
138
+ query_text = ""
139
+ if query.where:
140
+ def extract_content(node, **kwargs):
141
+ nonlocal query_text
142
+ is_binary_op = isinstance(node, BinaryOperation)
143
+ is_identifier = isinstance(node.args[0], Identifier)
144
+ is_content = node.args[0].parts[-1].lower() == 'content'
145
+ is_constant = isinstance(node.args[1], Constant)
146
+ if is_binary_op and is_identifier and is_content and is_constant:
147
+ query_text = node.args[1].value
148
+ query_traversal(query.where, extract_content)
149
+ logger.debug(f"Extracted query text: {query_text}")
150
+ # Get scores from reranker
151
+ scores = reranker.get_scores(query_text, documents)
152
+ # Add scores as a new column for filtering
153
+ scores_array = np.array(scores)
154
+ # Add temporary column for sorting
155
+ df['_relevance_score'] = scores
156
+ # Filter by score threshold using numpy array for element-wise comparison
157
+ df = df[scores_array > reranker.filtering_threshold]
158
+ # Sort by relevance (higher score = more relevant)
159
+ df = df.sort_values(by='_relevance_score', ascending=False)
160
+ # Remove temporary column
161
+ # df = df.drop(columns=['_relevance_score'])
162
+ # Apply original limit if it exists
163
+ if query.limit and len(df) > query.limit.value:
164
+ df = df.iloc[:query.limit.value]
165
+ logger.debug(f"Applied reranking with model {rerank_model}")
166
+ except Exception as e:
167
+ logger.error(f"Error during reranking: {str(e)}")
168
+
169
+ df = self.addapt_result_columns(df)
118
170
  return df
119
171
 
172
+ def addapt_conditions_columns(self, conditions):
173
+ if conditions is None:
174
+ return
175
+ for condition in conditions:
176
+ if condition.column in KB_TO_VECTORDB_COLUMNS:
177
+ condition.column = KB_TO_VECTORDB_COLUMNS[condition.column]
178
+
179
+ def addapt_result_columns(self, df):
180
+ col_update = {}
181
+ for kb_col, vec_col in KB_TO_VECTORDB_COLUMNS.items():
182
+ if vec_col in df.columns:
183
+ col_update[vec_col] = kb_col
184
+
185
+ df = df.rename(columns=col_update)
186
+
187
+ columns = list(df.columns)
188
+ # update id, get from metadata
189
+ df[TableField.ID.value] = df[TableField.METADATA.value].apply(lambda m: m.get('original_row_id'))
190
+
191
+ # id on first place
192
+ return df[[TableField.ID.value] + columns]
193
+
120
194
  def insert_files(self, file_names: List[str]):
121
195
  """Process and insert files"""
122
196
  if not self.document_loader:
@@ -217,7 +291,9 @@ class KnowledgeBaseTable:
217
291
 
218
292
  # send to vectordb
219
293
  db_handler = self.get_vector_db()
220
- db_handler.dispatch_delete(query)
294
+ conditions = db_handler.extract_conditions(query.where)
295
+ self.addapt_conditions_columns(conditions)
296
+ db_handler.dispatch_delete(query, conditions)
221
297
 
222
298
  def hybrid_search(
223
299
  self,
@@ -201,6 +201,14 @@ class Config:
201
201
  "host": api_host,
202
202
  "port": "55432",
203
203
  "database": "mindsdb"
204
+ },
205
+ "mcp": {
206
+ "host": api_host,
207
+ "port": "47337",
208
+ "enabled": True,
209
+ "restart_on_failure": True,
210
+ "max_restart_count": 1,
211
+ "max_restart_interval_seconds": 60
204
212
  }
205
213
  },
206
214
  "cache": {
@@ -27,6 +27,7 @@ types_map = {}
27
27
  for type_name in sa_type_names:
28
28
  types_map[type_name.upper()] = getattr(sa.types, type_name)
29
29
  types_map['BOOL'] = types_map['BOOLEAN']
30
+ types_map['DEC'] = types_map['DECIMAL']
30
31
 
31
32
 
32
33
  class RenderError(Exception):
@@ -43,6 +44,11 @@ class INTERVAL(ColumnElement):
43
44
  @compiles(INTERVAL)
44
45
  def _compile_interval(element, compiler, **kw):
45
46
  items = element.info.split(' ', maxsplit=1)
47
+ if compiler.dialect.name == 'oracle' and len(items) == 2:
48
+ # replace to singular names (remove leading S if exists)
49
+ if items[1].upper().endswith('S'):
50
+ items[1] = items[1][:-1]
51
+
46
52
  if compiler.dialect.driver in ['snowflake']:
47
53
  # quote all
48
54
  args = " ".join(map(str, items))
@@ -118,6 +124,8 @@ class SqlalchemyRender:
118
124
  self.dialect = dialect(paramstyle="named")
119
125
  self.dialect.div_is_floordiv = False
120
126
 
127
+ self.selects_stack = []
128
+
121
129
  if dialect_name == 'mssql':
122
130
  # update version to MS_2008_VERSION for supports_multivalues_insert
123
131
  self.dialect.server_version_info = (10,)
@@ -143,8 +151,10 @@ class SqlalchemyRender:
143
151
  part = self.dialect.identifier_preparer.quote(i)
144
152
 
145
153
  parts2.append(part)
146
-
147
- return sa.column('.'.join(parts2), is_literal=True)
154
+ text = '.'.join(parts2)
155
+ if identifier.is_outer and self.dialect.name == 'oracle':
156
+ text += '(+)'
157
+ return sa.column(text, is_literal=True)
148
158
 
149
159
  def get_alias(self, alias):
150
160
  if alias is None or len(alias.parts) == 0:
@@ -152,6 +162,9 @@ class SqlalchemyRender:
152
162
  if len(alias.parts) > 1:
153
163
  raise NotImplementedError(f'Multiple alias {alias.parts}')
154
164
 
165
+ if self.selects_stack:
166
+ self.selects_stack[-1]['aliases'].append(alias)
167
+
155
168
  is_quoted = get_is_quoted(alias)[0]
156
169
  return AttributedStr(alias.parts[0], is_quoted)
157
170
 
@@ -205,12 +218,18 @@ class SqlalchemyRender:
205
218
  alias = self.get_alias(t.alias)
206
219
  col = col.label(alias)
207
220
  elif isinstance(t, ast.Function):
208
- fnc = self.to_function(t)
221
+ col = self.to_function(t)
209
222
  if t.alias:
210
223
  alias = self.get_alias(t.alias)
224
+ col = col.label(alias)
211
225
  else:
212
226
  alias = str(t.op)
213
- col = fnc.label(alias)
227
+ if self.selects_stack:
228
+ aliases = self.selects_stack[-1]['aliases']
229
+ if alias not in aliases:
230
+ aliases.append(alias)
231
+ col = col.label(alias)
232
+
214
233
  elif isinstance(t, ast.BinaryOperation):
215
234
  ops = {
216
235
  "+": operators.add,
@@ -432,9 +451,9 @@ class SqlalchemyRender:
432
451
  return typename
433
452
 
434
453
  typename = typename.upper()
435
- if re.match(r'^INT[\d]*$', typename):
454
+ if re.match(r'^INT[\d]+$', typename):
436
455
  typename = 'BIGINT'
437
- if re.match(r'^FLOAT[\d]*$', typename):
456
+ if re.match(r'^FLOAT[\d]+$', typename):
438
457
  typename = 'FLOAT'
439
458
 
440
459
  return types_map[typename]
@@ -513,6 +532,9 @@ class SqlalchemyRender:
513
532
  return self.prepare_union(node)
514
533
 
515
534
  cols = []
535
+
536
+ self.selects_stack.append({'aliases': []})
537
+
516
538
  for t in node.targets:
517
539
  col = self.to_expression(t)
518
540
  cols.append(col)
@@ -647,6 +669,8 @@ class SqlalchemyRender:
647
669
  else:
648
670
  raise NotImplementedError(f'Select mode: {node.mode}')
649
671
 
672
+ self.selects_stack.pop()
673
+
650
674
  return query
651
675
 
652
676
  def prepare_union(self, from_table):
@@ -31,3 +31,10 @@ def start_ml_task_queue(*args, **kwargs):
31
31
  def start_scheduler(*args, **kwargs):
32
32
  from mindsdb.interfaces.jobs.scheduler import start
33
33
  start(*args, **kwargs)
34
+
35
+
36
+ def start_mcp(*args, **kwargs):
37
+ """Start the MCP server"""
38
+ from mindsdb.api.mcp.start import start
39
+
40
+ start(*args, **kwargs)