MindsDB 25.5.4.2__py3-none-any.whl → 25.6.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (76) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/a2a/agent.py +50 -26
  3. mindsdb/api/a2a/common/server/server.py +32 -26
  4. mindsdb/api/a2a/task_manager.py +68 -6
  5. mindsdb/api/executor/command_executor.py +69 -14
  6. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +49 -65
  7. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +91 -84
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +29 -48
  9. mindsdb/api/executor/datahub/datanodes/system_tables.py +35 -61
  10. mindsdb/api/executor/planner/plan_join.py +67 -77
  11. mindsdb/api/executor/planner/query_planner.py +176 -155
  12. mindsdb/api/executor/planner/steps.py +37 -12
  13. mindsdb/api/executor/sql_query/result_set.py +45 -64
  14. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +14 -18
  15. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +17 -18
  16. mindsdb/api/executor/sql_query/steps/insert_step.py +13 -33
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +43 -35
  18. mindsdb/api/executor/utilities/sql.py +42 -48
  19. mindsdb/api/http/namespaces/config.py +1 -1
  20. mindsdb/api/http/namespaces/file.py +14 -23
  21. mindsdb/api/http/namespaces/knowledge_bases.py +132 -154
  22. mindsdb/api/mysql/mysql_proxy/data_types/mysql_datum.py +12 -28
  23. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/binary_resultset_row_package.py +59 -50
  24. mindsdb/api/mysql/mysql_proxy/data_types/mysql_packets/resultset_row_package.py +9 -8
  25. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +449 -461
  26. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +87 -36
  27. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +219 -28
  28. mindsdb/integrations/handlers/file_handler/file_handler.py +15 -9
  29. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +43 -24
  30. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +10 -3
  31. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +1 -1
  32. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +29 -33
  33. mindsdb/integrations/handlers/openai_handler/openai_handler.py +277 -356
  34. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +74 -51
  35. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +305 -98
  36. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +145 -40
  37. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +136 -6
  38. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +352 -83
  39. mindsdb/integrations/libs/api_handler.py +279 -57
  40. mindsdb/integrations/libs/base.py +185 -30
  41. mindsdb/integrations/utilities/files/file_reader.py +99 -73
  42. mindsdb/integrations/utilities/handler_utils.py +23 -8
  43. mindsdb/integrations/utilities/sql_utils.py +35 -40
  44. mindsdb/interfaces/agents/agents_controller.py +226 -196
  45. mindsdb/interfaces/agents/constants.py +8 -1
  46. mindsdb/interfaces/agents/langchain_agent.py +42 -11
  47. mindsdb/interfaces/agents/mcp_client_agent.py +29 -21
  48. mindsdb/interfaces/agents/mindsdb_database_agent.py +23 -18
  49. mindsdb/interfaces/data_catalog/__init__.py +0 -0
  50. mindsdb/interfaces/data_catalog/base_data_catalog.py +54 -0
  51. mindsdb/interfaces/data_catalog/data_catalog_loader.py +375 -0
  52. mindsdb/interfaces/data_catalog/data_catalog_reader.py +38 -0
  53. mindsdb/interfaces/database/database.py +81 -57
  54. mindsdb/interfaces/database/integrations.py +222 -234
  55. mindsdb/interfaces/database/log.py +72 -104
  56. mindsdb/interfaces/database/projects.py +156 -193
  57. mindsdb/interfaces/file/file_controller.py +21 -65
  58. mindsdb/interfaces/knowledge_base/controller.py +66 -25
  59. mindsdb/interfaces/knowledge_base/evaluate.py +516 -0
  60. mindsdb/interfaces/knowledge_base/llm_client.py +75 -0
  61. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +83 -43
  62. mindsdb/interfaces/skills/skills_controller.py +31 -36
  63. mindsdb/interfaces/skills/sql_agent.py +113 -86
  64. mindsdb/interfaces/storage/db.py +242 -82
  65. mindsdb/migrations/versions/2025-05-28_a44643042fe8_added_data_catalog_tables.py +118 -0
  66. mindsdb/migrations/versions/2025-06-09_608e376c19a7_updated_data_catalog_data_types.py +58 -0
  67. mindsdb/utilities/config.py +13 -2
  68. mindsdb/utilities/log.py +35 -26
  69. mindsdb/utilities/ml_task_queue/task.py +19 -22
  70. mindsdb/utilities/render/sqlalchemy_render.py +129 -181
  71. mindsdb/utilities/starters.py +40 -0
  72. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/METADATA +257 -257
  73. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/RECORD +76 -68
  74. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/WHEEL +0 -0
  75. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/licenses/LICENSE +0 -0
  76. {mindsdb-25.5.4.2.dist-info → mindsdb-25.6.3.0.dist-info}/top_level.txt +0 -0
@@ -217,6 +217,9 @@ class Config:
217
217
  "project_name": "mindsdb",
218
218
  "enabled": False,
219
219
  },
220
+ "data_catalog": {
221
+ "enabled": False,
222
+ },
220
223
  }
221
224
  # endregion
222
225
 
@@ -360,6 +363,8 @@ class Config:
360
363
  self._env_config["default_reranking_model"] = {
361
364
  "api_key": os.environ["MINDSDB_DEFAULT_RERANKING_MODEL_API_KEY"]
362
365
  }
366
+ if os.environ.get("MINDSDB_DATA_CATALOG_ENABLED", "").lower() in ("1", "true"):
367
+ self._env_config["data_catalog"] = {"enabled": True}
363
368
 
364
369
  # region vars: a2a configuration
365
370
  a2a_config = {}
@@ -395,11 +400,17 @@ class Config:
395
400
  bool: True if config was loaded or updated
396
401
  """
397
402
 
398
- if self.auto_config_mtime != self.auto_config_path.stat().st_mtime:
403
+ if (
404
+ self.auto_config_path.is_file()
405
+ and self.auto_config_path.read_text() != ""
406
+ and self.auto_config_mtime != self.auto_config_path.stat().st_mtime
407
+ ):
399
408
  try:
400
409
  self._auto_config = json.loads(self.auto_config_path.read_text())
401
410
  except json.JSONDecodeError as e:
402
- raise ValueError(f"The 'auto' configuration file ({self.auto_config_path}) contains invalid JSON: {e}")
411
+ raise ValueError(
412
+ f"The 'auto' configuration file ({self.auto_config_path}) contains invalid JSON: {e}\nFile content: {self.auto_config_path.read_text()}"
413
+ )
403
414
  self.auto_config_mtime = self.auto_config_path.stat().st_mtime
404
415
  return True
405
416
  return False
mindsdb/utilities/log.py CHANGED
@@ -12,11 +12,11 @@ class JsonFormatter(logging.Formatter):
12
12
  def format(self, record):
13
13
  record_message = super().format(record)
14
14
  log_record = {
15
- 'process_name': record.processName,
16
- 'name': record.name,
17
- 'message': record_message,
18
- 'level': record.levelname,
19
- 'time': record.created
15
+ "process_name": record.processName,
16
+ "name": record.name,
17
+ "message": record_message,
18
+ "level": record.levelname,
19
+ "time": record.created,
20
20
  }
21
21
  return json.dumps(log_record)
22
22
 
@@ -44,12 +44,12 @@ class ColorFormatter(logging.Formatter):
44
44
 
45
45
 
46
46
  def get_console_handler_config_level() -> int:
47
- console_handler_config = app_config['logging']['handlers']['console']
47
+ console_handler_config = app_config["logging"]["handlers"]["console"]
48
48
  return getattr(logging, console_handler_config["level"])
49
49
 
50
50
 
51
51
  def get_file_handler_config_level() -> int:
52
- file_handler_config = app_config['logging']['handlers']['file']
52
+ file_handler_config = app_config["logging"]["handlers"]["file"]
53
53
  return getattr(logging, file_handler_config["level"])
54
54
 
55
55
 
@@ -60,27 +60,34 @@ def get_mindsdb_log_level() -> int:
60
60
  return min(console_handler_config_level, file_handler_config_level)
61
61
 
62
62
 
63
- def configure_logging():
63
+ def configure_logging(process_name: str = None):
64
64
  handlers_config = {}
65
- console_handler_config = app_config['logging']['handlers']['console']
65
+ console_handler_config = app_config["logging"]["handlers"]["console"]
66
66
  console_handler_config_level = getattr(logging, console_handler_config["level"])
67
- if console_handler_config['enabled'] is True:
68
- handlers_config['console'] = {
67
+ if console_handler_config["enabled"] is True:
68
+ handlers_config["console"] = {
69
69
  "class": "logging.StreamHandler",
70
- "formatter": console_handler_config.get('formatter', 'default'),
71
- "level": console_handler_config_level
70
+ "formatter": console_handler_config.get("formatter", "default"),
71
+ "level": console_handler_config_level,
72
72
  }
73
73
 
74
- file_handler_config = app_config['logging']['handlers']['file']
74
+ file_handler_config = app_config["logging"]["handlers"]["file"]
75
75
  file_handler_config_level = getattr(logging, file_handler_config["level"])
76
- if file_handler_config['enabled'] is True:
77
- handlers_config['file'] = {
76
+ if file_handler_config["enabled"] is True:
77
+ file_name = file_handler_config["filename"]
78
+ if process_name is not None:
79
+ if "." in file_name:
80
+ parts = file_name.rpartition(".")
81
+ file_name = f"{parts[0]}_{process_name}.{parts[2]}"
82
+ else:
83
+ file_name = f"{file_name}_{process_name}"
84
+ handlers_config["file"] = {
78
85
  "class": "logging.handlers.RotatingFileHandler",
79
86
  "formatter": "file",
80
87
  "level": file_handler_config_level,
81
- "filename": app_config.paths["log"] / file_handler_config["filename"],
88
+ "filename": app_config.paths["log"] / file_name,
82
89
  "maxBytes": file_handler_config["maxBytes"], # 0.5 Mb
83
- "backupCount": file_handler_config["backupCount"]
90
+ "backupCount": file_handler_config["backupCount"],
84
91
  }
85
92
 
86
93
  mindsdb_log_level = get_mindsdb_log_level()
@@ -90,9 +97,7 @@ def configure_logging():
90
97
  formatters={
91
98
  "default": {"()": ColorFormatter},
92
99
  "json": {"()": JsonFormatter},
93
- "file": {
94
- "format": "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s"
95
- }
100
+ "file": {"format": "%(asctime)s %(processName)15s %(levelname)-8s %(name)s: %(message)s"},
96
101
  },
97
102
  handlers=handlers_config,
98
103
  loggers={
@@ -115,6 +120,14 @@ def configure_logging():
115
120
  dictConfig(logging_config)
116
121
 
117
122
 
123
+ def initialize_logging(process_name: str = None) -> None:
124
+ """Initialyze logging"""
125
+ global logging_initialized
126
+ if not logging_initialized:
127
+ configure_logging(process_name)
128
+ logging_initialized = True
129
+
130
+
118
131
  # I would prefer to leave code to use logging.getLogger(), but there are a lot of complicated situations
119
132
  # in MindsDB with processes being spawned that require logging to be configured again in a lot of cases.
120
133
  # Using a custom logger-getter like this lets us do that logic here, once.
@@ -122,9 +135,5 @@ def getLogger(name=None):
122
135
  """
123
136
  Get a new logger, configuring logging first if it hasn't been done yet.
124
137
  """
125
- global logging_initialized
126
- if not logging_initialized:
127
- configure_logging()
128
- logging_initialized = True
129
-
138
+ initialize_logging()
130
139
  return logging.getLogger(name)
@@ -8,14 +8,14 @@ from mindsdb.utilities.ml_task_queue.const import ML_TASK_STATUS
8
8
 
9
9
 
10
10
  class Task:
11
- """ Abstraction for ML task. Should have interface similat to concurrent.futures.Future
11
+ """Abstraction for ML task. Should have interface similat to concurrent.futures.Future
12
12
 
13
- Attributes:
14
- db (Redis): database object
15
- redis_key (RedisKey): redis keys associated with task
16
- dataframe (DataFrame): task result
17
- exception (Exception): task exeuton runtime exception
18
- _timeout (int): max time without status updating
13
+ Attributes:
14
+ db (Redis): database object
15
+ redis_key (RedisKey): redis keys associated with task
16
+ dataframe (DataFrame): task result
17
+ exception (Exception): task exeuton runtime exception
18
+ _timeout (int): max time without status updating
19
19
  """
20
20
 
21
21
  def __init__(self, connection: redis.Redis, redis_key: RedisKey) -> None:
@@ -23,18 +23,17 @@ class Task:
23
23
  self.redis_key = redis_key
24
24
  self.dataframe = None
25
25
  self.exception = None
26
- self._timeout = 30
26
+ self._timeout = 60
27
27
 
28
28
  def subscribe(self) -> ML_TASK_STATUS:
29
- """ return tasks status untill it is not done or failed
30
- """
29
+ """return tasks status untill it is not done or failed"""
31
30
  pubsub = self.db.pubsub()
32
31
  cache = self.db.cache()
33
32
  pubsub.subscribe(self.redis_key.status)
34
- while (msg := pubsub.get_message(timeout=self._timeout)):
35
- if msg['type'] not in pubsub.PUBLISH_MESSAGE_TYPES:
33
+ while msg := pubsub.get_message(timeout=self._timeout):
34
+ if msg["type"] not in pubsub.PUBLISH_MESSAGE_TYPES:
36
35
  continue
37
- ml_task_status = ML_TASK_STATUS(msg['data'])
36
+ ml_task_status = ML_TASK_STATUS(msg["data"])
38
37
  if ml_task_status == ML_TASK_STATUS.COMPLETE:
39
38
  dataframe_bytes = cache.get(self.redis_key.dataframe)
40
39
  if dataframe_bytes is not None:
@@ -51,8 +50,7 @@ class Task:
51
50
  yield ml_task_status
52
51
 
53
52
  def wait(self, status: ML_TASK_STATUS = ML_TASK_STATUS.COMPLETE) -> None:
54
- """ block threasd untill task is not done or failed
55
- """
53
+ """block threasd untill task is not done or failed"""
56
54
  for status in self.subscribe():
57
55
  if status in (ML_TASK_STATUS.WAITING, ML_TASK_STATUS.PROCESSING):
58
56
  continue
@@ -60,23 +58,22 @@ class Task:
60
58
  if self.exception is not None:
61
59
  raise self.exception
62
60
  else:
63
- raise Exception('Unknown error during ML task execution')
61
+ raise Exception("Unknown error during ML task execution")
64
62
  if status == ML_TASK_STATUS.TIMEOUT:
65
63
  raise Exception(f"Can't get answer in {self._timeout} seconds")
66
64
  if status == ML_TASK_STATUS.COMPLETE:
67
65
  return
68
- raise KeyError('Unknown task status')
66
+ raise KeyError("Unknown task status")
69
67
 
70
68
  def result(self) -> DataFrame:
71
- """ wait task is done and return result
69
+ """wait task is done and return result
72
70
 
73
- Returns:
74
- DataFrame: task result
71
+ Returns:
72
+ DataFrame: task result
75
73
  """
76
74
  self.wait()
77
75
  return self.dataframe
78
76
 
79
77
  def add_done_callback(self, fn: Callable) -> None:
80
- """ need for compatability with concurrent.futures.Future interface
81
- """
78
+ """need for compatability with concurrent.futures.Future interface"""
82
79
  pass