MindsDB 25.5.4.1__py3-none-any.whl → 25.6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (70) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +28 -25
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/run_a2a.py +1 -1
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  8. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  9. mindsdb/api/executor/planner/plan_join.py +67 -77
  10. mindsdb/api/executor/planner/query_planner.py +176 -155
  11. mindsdb/api/executor/planner/steps.py +37 -12
  12. mindsdb/api/executor/sql_query/result_set.py +45 -64
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  15. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  16. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  17. mindsdb/api/executor/utilities/sql.py +42 -48
  18. mindsdb/api/http/namespaces/config.py +1 -1
  19. mindsdb/api/http/namespaces/file.py +14 -23
  20. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  21. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  23. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  24. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  25. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  26. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  27. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  28. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +26 -33
  29. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  30. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  31. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +53 -34
  32. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  33. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +334 -83
  34. mindsdb/integrations/libs/api_handler.py +261 -57
  35. mindsdb/integrations/libs/base.py +100 -29
  36. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  37. mindsdb/integrations/utilities/handler_utils.py +23 -8
  38. mindsdb/integrations/utilities/sql_utils.py +35 -40
  39. mindsdb/interfaces/agents/agents_controller.py +196 -192
  40. mindsdb/interfaces/agents/constants.py +7 -1
  41. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  42. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  43. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  44. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  45. mindsdb/interfaces/data_catalog/data_catalog_loader.py +359 -0
  46. mindsdb/interfaces/data_catalog/data_catalog_reader.py +34 -0
  47. mindsdb/interfaces/database/database.py +81 -57
  48. mindsdb/interfaces/database/integrations.py +220 -234
  49. mindsdb/interfaces/database/log.py +72 -104
  50. mindsdb/interfaces/database/projects.py +156 -193
  51. mindsdb/interfaces/file/file_controller.py +21 -65
  52. mindsdb/interfaces/knowledge_base/controller.py +63 -10
  53. mindsdb/interfaces/knowledge_base/evaluate.py +519 -0
  54. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  55. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  56. mindsdb/interfaces/skills/skills_controller.py +54 -36
  57. mindsdb/interfaces/skills/sql_agent.py +109 -86
  58. mindsdb/interfaces/storage/db.py +223 -79
  59. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  60. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  61. mindsdb/utilities/config.py +9 -2
  62. mindsdb/utilities/log.py +35 -26
  63. mindsdb/utilities/ml_task_queue/task.py +19 -22
  64. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  65. mindsdb/utilities/starters.py +49 -1
  66. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/METADATA +268 -268
  67. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/RECORD +70 -62
  68. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/WHEEL +0 -0
  69. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/licenses/LICENSE +0 -0
  70. {mindsdb-25.5.4.1.dist-info → mindsdb-25.6.2.0.dist-info}/top_level.txt +0 -0
@@ -26,17 +26,10 @@ class FileController:
26
26
 
27
27
  def get_files_names(self):
28
28
  """return list of files names"""
29
- return [
30
- x[0]
31
- for x in db.session.query(db.File.name).filter_by(company_id=ctx.company_id)
32
- ]
29
+ return [x[0] for x in db.session.query(db.File.name).filter_by(company_id=ctx.company_id)]
33
30
 
34
31
  def get_file_meta(self, name):
35
- file_record = (
36
- db.session.query(db.File)
37
- .filter_by(company_id=ctx.company_id, name=name)
38
- .first()
39
- )
32
+ file_record = db.session.query(db.File).filter_by(company_id=ctx.company_id, name=name).first()
40
33
  if file_record is None:
41
34
  return None
42
35
  columns = file_record.columns
@@ -54,9 +47,7 @@ class FileController:
54
47
  Returns:
55
48
  list[dict]: files metadata
56
49
  """
57
- file_records = (
58
- db.session.query(db.File).filter_by(company_id=ctx.company_id).all()
59
- )
50
+ file_records = db.session.query(db.File).filter_by(company_id=ctx.company_id).all()
60
51
  files_metadata = [
61
52
  {
62
53
  "name": record.name,
@@ -89,10 +80,7 @@ class FileController:
89
80
  try:
90
81
  pages_files, pages_index = self.get_file_pages(file_path)
91
82
 
92
- metadata = {
93
- 'is_feather': True,
94
- 'pages': pages_index
95
- }
83
+ metadata = {"is_feather": True, "pages": pages_index}
96
84
  df = pages_files[0]
97
85
  file_record = db.File(
98
86
  name=name,
@@ -101,7 +89,7 @@ class FileController:
101
89
  file_path="",
102
90
  row_count=len(df),
103
91
  columns=list(df.columns),
104
- metadata_=metadata
92
+ metadata_=metadata,
105
93
  )
106
94
  db.session.add(file_record)
107
95
  db.session.flush()
@@ -157,15 +145,11 @@ class FileController:
157
145
  """
158
146
 
159
147
  for num, df in pages_files.items():
160
- dest = dest_dir.joinpath(f'{num}.feather')
148
+ dest = dest_dir.joinpath(f"{num}.feather")
161
149
  df.to_feather(str(dest))
162
150
 
163
151
  def delete_file(self, name):
164
- file_record = (
165
- db.session.query(db.File)
166
- .filter_by(company_id=ctx.company_id, name=name)
167
- .first()
168
- )
152
+ file_record = db.session.query(db.File).filter_by(company_id=ctx.company_id, name=name).first()
169
153
  if file_record is None:
170
154
  return None
171
155
  file_id = file_record.id
@@ -175,20 +159,12 @@ class FileController:
175
159
  return True
176
160
 
177
161
  def get_file_path(self, name):
178
- file_record = (
179
- db.session.query(db.File)
180
- .filter_by(company_id=ctx.company_id, name=name)
181
- .first()
182
- )
162
+ file_record = db.session.query(db.File).filter_by(company_id=ctx.company_id, name=name).first()
183
163
  if file_record is None:
184
164
  raise Exception(f"File '{name}' does not exists")
185
165
  file_dir = f"file_{ctx.company_id}_{file_record.id}"
186
166
  self.fs_store.get(file_dir, base_dir=self.dir)
187
- return str(
188
- Path(self.dir)
189
- .joinpath(file_dir)
190
- .joinpath(Path(file_record.source_file_path).name)
191
- )
167
+ return str(Path(self.dir).joinpath(file_dir).joinpath(Path(file_record.source_file_path).name))
192
168
 
193
169
  def get_file_data(self, name: str, page_name: str = None) -> pd.DataFrame:
194
170
  """
@@ -198,11 +174,7 @@ class FileController:
198
174
  :param page_name: page name, optional
199
175
  :return: Page or file content
200
176
  """
201
- file_record = (
202
- db.session.query(db.File)
203
- .filter_by(company_id=ctx.company_id, name=name)
204
- .first()
205
- )
177
+ file_record = db.session.query(db.File).filter_by(company_id=ctx.company_id, name=name).first()
206
178
  if file_record is None:
207
179
  raise Exception(f"File '{name}' does not exists")
208
180
 
@@ -210,37 +182,29 @@ class FileController:
210
182
  self.fs_store.get(file_dir, base_dir=self.dir)
211
183
 
212
184
  metadata = file_record.metadata_ or {}
213
- if metadata.get('is_feather') is not True:
185
+ if metadata.get("is_feather") is not True:
214
186
  # migrate file
215
187
 
216
- file_path = (
217
- Path(self.dir)
218
- .joinpath(file_dir)
219
- .joinpath(Path(file_record.source_file_path).name)
220
- )
188
+ file_path = Path(self.dir).joinpath(file_dir).joinpath(Path(file_record.source_file_path).name)
221
189
 
222
190
  pages_files, pages_index = self.get_file_pages(str(file_path))
223
191
 
224
192
  self.store_pages_as_feather(file_path.parent, pages_files)
225
- metadata['is_feather'] = True
226
- metadata['pages'] = pages_index
193
+ metadata["is_feather"] = True
194
+ metadata["pages"] = pages_index
227
195
 
228
196
  file_record.metadata_ = metadata
229
- flag_modified(file_record, 'metadata_')
197
+ flag_modified(file_record, "metadata_")
230
198
  db.session.commit()
231
199
 
232
200
  if page_name is None:
233
201
  num = 0
234
202
  else:
235
- num = metadata.get('pages', {}).get(page_name)
203
+ num = metadata.get("pages", {}).get(page_name)
236
204
  if num is None:
237
- raise KeyError(f'Page not found: {page_name}')
205
+ raise KeyError(f"Page not found: {page_name}")
238
206
 
239
- path = (
240
- Path(self.dir)
241
- .joinpath(file_dir)
242
- .joinpath(f'{num}.feather')
243
- )
207
+ path = Path(self.dir).joinpath(file_dir).joinpath(f"{num}.feather")
244
208
  return pd.read_feather(path)
245
209
 
246
210
  def set_file_data(self, name: str, df: pd.DataFrame, page_name: str = None):
@@ -251,11 +215,7 @@ class FileController:
251
215
  :param page_name: name of page, optional
252
216
  """
253
217
 
254
- file_record = (
255
- db.session.query(db.File)
256
- .filter_by(company_id=ctx.company_id, name=name)
257
- .first()
258
- )
218
+ file_record = db.session.query(db.File).filter_by(company_id=ctx.company_id, name=name).first()
259
219
  if file_record is None:
260
220
  raise Exception(f"File '{name}' does not exists")
261
221
 
@@ -264,12 +224,8 @@ class FileController:
264
224
 
265
225
  num = 0
266
226
  if page_name is not None and file_record.metadata_ is not None:
267
- num = file_record.metadata_.get('pages', {}).get(page_name, 0)
227
+ num = file_record.metadata_.get("pages", {}).get(page_name, 0)
268
228
 
269
- path = (
270
- Path(self.dir)
271
- .joinpath(file_dir)
272
- .joinpath(f'{num}.feather')
273
- )
229
+ path = Path(self.dir).joinpath(file_dir).joinpath(f"{num}.feather")
274
230
  df.to_feather(path)
275
231
  self.fs_store.put(file_dir, base_dir=self.dir)
@@ -1,6 +1,8 @@
1
1
  import os
2
2
  import copy
3
3
  from typing import Dict, List, Optional
4
+ import json
5
+ import decimal
4
6
 
5
7
  import pandas as pd
6
8
  import numpy as np
@@ -29,6 +31,7 @@ from mindsdb.interfaces.database.projects import ProjectController
29
31
  from mindsdb.interfaces.variables.variables_controller import variables_controller
30
32
  from mindsdb.interfaces.knowledge_base.preprocessing.models import PreprocessingConfig, Document
31
33
  from mindsdb.interfaces.knowledge_base.preprocessing.document_preprocessor import PreprocessorFactory
34
+ from mindsdb.interfaces.knowledge_base.evaluate import EvaluateBase
32
35
  from mindsdb.interfaces.model.functions import PredictorRecordNotFound
33
36
  from mindsdb.utilities.exception import EntityExistsError, EntityNotExistsError
34
37
  from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
@@ -108,6 +111,15 @@ def safe_pandas_is_datetime(value: str) -> bool:
108
111
  return False
109
112
 
110
113
 
114
+ def to_json(obj):
115
+ if obj is None:
116
+ return None
117
+ try:
118
+ return json.dumps(obj)
119
+ except TypeError:
120
+ return obj
121
+
122
+
111
123
  class KnowledgeBaseTable:
112
124
  """
113
125
  Knowledge base table interface
@@ -175,10 +187,11 @@ class KnowledgeBaseTable:
175
187
  conditions = []
176
188
  query_text = None
177
189
  relevance_threshold = None
190
+ reranking_enabled_flag = True
178
191
  query_conditions = db_handler.extract_conditions(query.where)
179
192
  if query_conditions is not None:
180
193
  for item in query_conditions:
181
- if item.column == "relevance_threshold" and item.op.value == "=":
194
+ if item.column == "relevance" and item.op.value == FilterOperator.GREATER_THAN_OR_EQUAL.value:
182
195
  try:
183
196
  relevance_threshold = float(item.value)
184
197
  # Validate range: must be between 0 and 1
@@ -189,6 +202,15 @@ class KnowledgeBaseTable:
189
202
  error_msg = f"Invalid relevance_threshold value: {item.value}. {str(e)}"
190
203
  logger.error(error_msg)
191
204
  raise ValueError(error_msg)
205
+ elif item.column == "reranking":
206
+ reranking_enabled_flag = item.value
207
+ # cast to boolean
208
+ if isinstance(reranking_enabled_flag, str):
209
+ reranking_enabled_flag = reranking_enabled_flag.lower() not in ("false")
210
+ elif item.column == "relevance" and item.op.value != FilterOperator.GREATER_THAN_OR_EQUAL.value:
211
+ raise ValueError(
212
+ f"Invalid operator for relevance: {item.op.value}. Only GREATER_THAN_OR_EQUAL is allowed."
213
+ )
192
214
  elif item.column == TableField.CONTENT.value:
193
215
  query_text = item.value
194
216
 
@@ -222,8 +244,7 @@ class KnowledgeBaseTable:
222
244
  logger.debug(f"Query returned {len(df)} rows")
223
245
  logger.debug(f"Columns in response: {df.columns.tolist()}")
224
246
  # Check if we have a rerank_model configured in KB params
225
-
226
- df = self.add_relevance(df, query_text, relevance_threshold)
247
+ df = self.add_relevance(df, query_text, relevance_threshold, reranking_enabled_flag)
227
248
 
228
249
  if (
229
250
  query.group_by is not None
@@ -234,15 +255,22 @@ class KnowledgeBaseTable:
234
255
  or not isinstance(query.targets[0], Star)
235
256
  ):
236
257
  query_copy.where = None
258
+ if "metadata" in df.columns:
259
+ df["metadata"] = df["metadata"].apply(to_json)
260
+
237
261
  df = query_df(df, query_copy, session=self.session)
238
262
 
239
263
  return df
240
264
 
241
- def add_relevance(self, df, query_text, relevance_threshold=None):
265
+ def score_documents(self, query_text, documents, reranking_model_params):
266
+ reranker = get_reranking_model_from_params(reranking_model_params)
267
+ return reranker.get_scores(query_text, documents)
268
+
269
+ def add_relevance(self, df, query_text, relevance_threshold=None, reranking_enabled_flag=True):
242
270
  relevance_column = TableField.RELEVANCE.value
243
271
 
244
272
  reranking_model_params = get_model_params(self._kb.params.get("reranking_model"), "default_reranking_model")
245
- if reranking_model_params and query_text and len(df) > 0:
273
+ if reranking_model_params and query_text and len(df) > 0 and reranking_enabled_flag:
246
274
  # Use reranker for relevance score
247
275
  try:
248
276
  logger.info(f"Using knowledge reranking model from params: {reranking_model_params}")
@@ -601,14 +629,15 @@ class KnowledgeBaseTable:
601
629
  metadata = {}
602
630
  for col in metadata_columns:
603
631
  value = row[col]
632
+ value_type = type(value)
604
633
  # Convert numpy/pandas types to Python native types
605
634
  if safe_pandas_is_datetime(value) or isinstance(value, pd.Timestamp):
606
635
  value = str(value)
607
- elif pd.api.types.is_integer_dtype(value):
636
+ elif pd.api.types.is_integer_dtype(value_type):
608
637
  value = int(value)
609
- elif pd.api.types.is_float_dtype(value):
638
+ elif pd.api.types.is_float_dtype(value_type) or isinstance(value, decimal.Decimal):
610
639
  value = float(value)
611
- elif pd.api.types.is_bool_dtype(value):
640
+ elif pd.api.types.is_bool_dtype(value_type):
612
641
  value = bool(value)
613
642
  elif isinstance(value, dict):
614
643
  metadata.update(value)
@@ -935,7 +964,16 @@ class KnowledgeBaseController:
935
964
  model_record = db.Predictor.query.get(model["id"])
936
965
  embedding_model_id = model_record.id
937
966
 
938
- reranking_model_params = get_model_params(params.get("reranking_model", {}), "default_reranking_model")
967
+ # if params.get("reranking_model", {}) is bool and False we evaluate it to empty dictionary
968
+ reranking_model_params = params.get("reranking_model", {})
969
+
970
+ if isinstance(reranking_model_params, bool) and not reranking_model_params:
971
+ params["reranking_model"] = {}
972
+ # if params.get("reranking_model", {}) is string and false in any case we evaluate it to empty dictionary
973
+ if isinstance(reranking_model_params, str) and reranking_model_params.lower() == "false":
974
+ params["reranking_model"] = {}
975
+
976
+ reranking_model_params = get_model_params(reranking_model_params, "default_reranking_model")
939
977
  if reranking_model_params:
940
978
  # Get reranking model from params.
941
979
  # This is called here to check validaity of the parameters.
@@ -1028,7 +1066,7 @@ class KnowledgeBaseController:
1028
1066
  except PredictorRecordNotFound:
1029
1067
  pass
1030
1068
 
1031
- if params.get("provider", None) not in ("openai", "azure"):
1069
+ if params.get("provider", None) not in ("openai", "azure_openai"):
1032
1070
  # try use litellm
1033
1071
  KnowledgeBaseTable.call_litellm_embedding(self.session, params, ["test"])
1034
1072
  return
@@ -1192,3 +1230,18 @@ class KnowledgeBaseController:
1192
1230
  Update a knowledge base record
1193
1231
  """
1194
1232
  raise NotImplementedError()
1233
+
1234
+ def evaluate(self, table_name: str, project_name: str, params: dict = None) -> pd.DataFrame:
1235
+ """
1236
+ Run evaluate and/or create test data for evaluation
1237
+ :param table_name: name of KB
1238
+ :param project_name: project of KB
1239
+ :param params: evaluation parameters
1240
+ :return: evaluation results
1241
+ """
1242
+ project_id = self.session.database_controller.get_project(project_name).id
1243
+ kb_table = self.get_table(table_name, project_id)
1244
+
1245
+ scores = EvaluateBase.run(self.session, kb_table, params)
1246
+
1247
+ return scores