MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +53 -94
  3. mindsdb/api/a2a/agent.py +30 -206
  4. mindsdb/api/a2a/common/server/server.py +26 -27
  5. mindsdb/api/a2a/task_manager.py +93 -227
  6. mindsdb/api/a2a/utils.py +21 -0
  7. mindsdb/api/executor/command_executor.py +8 -6
  8. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  9. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
  10. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  11. mindsdb/api/executor/planner/query_prepare.py +68 -87
  12. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  13. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  14. mindsdb/api/executor/utilities/sql.py +97 -21
  15. mindsdb/api/http/namespaces/agents.py +126 -201
  16. mindsdb/api/http/namespaces/config.py +12 -1
  17. mindsdb/api/http/namespaces/file.py +49 -24
  18. mindsdb/api/mcp/start.py +45 -31
  19. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  20. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  21. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  22. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  23. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  24. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  25. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  26. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  27. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
  28. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  29. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
  30. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
  31. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  32. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  33. mindsdb/integrations/libs/keyword_search_base.py +41 -0
  34. mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
  35. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  36. mindsdb/integrations/utilities/sql_utils.py +11 -0
  37. mindsdb/interfaces/agents/agents_controller.py +29 -9
  38. mindsdb/interfaces/agents/langchain_agent.py +7 -5
  39. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  40. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  41. mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
  42. mindsdb/interfaces/database/projects.py +1 -3
  43. mindsdb/interfaces/functions/controller.py +54 -64
  44. mindsdb/interfaces/functions/to_markdown.py +47 -14
  45. mindsdb/interfaces/knowledge_base/controller.py +228 -110
  46. mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
  47. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  48. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  49. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  50. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  51. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
  52. mindsdb/interfaces/skills/sql_agent.py +181 -130
  53. mindsdb/interfaces/storage/db.py +9 -7
  54. mindsdb/utilities/config.py +58 -40
  55. mindsdb/utilities/exception.py +58 -7
  56. mindsdb/utilities/security.py +54 -11
  57. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
  58. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
  59. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
  60. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
  61. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,13 @@ def _merge_configs(original_config: dict, override_config: dict) -> dict:
28
28
  return original_config
29
29
 
30
30
 
31
+ def _overwrite_configs(original_config: dict, override_config: dict) -> dict:
32
+ """Overwrite original config with override config."""
33
+ for key in list(override_config.keys()):
34
+ original_config[key] = override_config[key]
35
+ return original_config
36
+
37
+
31
38
  def create_data_dir(path: Path) -> None:
32
39
  """Create a directory and checks that it is writable.
33
40
 
@@ -196,10 +203,20 @@ class Config:
196
203
  "host": "0.0.0.0", # API server binds to all interfaces by default
197
204
  "port": "8000",
198
205
  },
206
+ "a2a": {
207
+ "host": api_host,
208
+ "port": 47338,
209
+ "mindsdb_host": "localhost",
210
+ "mindsdb_port": 47334,
211
+ "agent_name": "my_agent",
212
+ "project_name": "mindsdb",
213
+ "enabled": False,
214
+ },
199
215
  },
200
216
  "cache": {"type": "local"},
201
217
  "ml_task_queue": {"type": "local"},
202
- "file_upload_domains": [],
218
+ "url_file_upload": {"enabled": True, "allowed_origins": [], "disallowed_origins": []},
219
+ "file_upload_domains": [], # deprecated, use config[url_file_upload][allowed_origins] instead
203
220
  "web_crawling_allowed_sites": [],
204
221
  "cloud": False,
205
222
  "jobs": {"disable": False},
@@ -208,15 +225,6 @@ class Config:
208
225
  "default_llm": {},
209
226
  "default_embedding_model": {},
210
227
  "default_reranking_model": {},
211
- "a2a": {
212
- "host": "localhost",
213
- "port": 47338,
214
- "mindsdb_host": "localhost",
215
- "mindsdb_port": 47334,
216
- "agent_name": "my_agent",
217
- "project_name": "mindsdb",
218
- "enabled": False,
219
- },
220
228
  "data_catalog": {
221
229
  "enabled": False,
222
230
  },
@@ -242,12 +250,11 @@ class Config:
242
250
  """Collect config values from env vars to self._env_config"""
243
251
  self._env_config = {
244
252
  "logging": {"handlers": {"console": {}, "file": {}}},
245
- "api": {"http": {"server": {}}},
253
+ "api": {"http": {"server": {}}, "a2a": {}},
246
254
  "auth": {},
247
255
  "paths": {},
248
256
  "permanent_storage": {},
249
257
  "ml_task_queue": {},
250
- "a2a": {},
251
258
  }
252
259
 
253
260
  # region storage root path
@@ -389,7 +396,7 @@ class Config:
389
396
  )
390
397
 
391
398
  if a2a_config:
392
- self._env_config["a2a"] = a2a_config
399
+ self._env_config["api"]["a2a"] = a2a_config
393
400
  # endregion
394
401
 
395
402
  def fetch_auto_config(self) -> bool:
@@ -456,47 +463,36 @@ class Config:
456
463
  _merge_configs(new_config, self._env_config)
457
464
 
458
465
  # Apply command-line arguments for A2A
459
- cmd_args_config = {}
466
+ a2a_config = {}
460
467
 
461
468
  # Check for A2A command-line arguments
462
469
  if hasattr(self.cmd_args, "a2a_host") and self.cmd_args.a2a_host is not None:
463
- if "a2a" not in cmd_args_config:
464
- cmd_args_config["a2a"] = {}
465
- cmd_args_config["a2a"]["host"] = self.cmd_args.a2a_host
470
+ a2a_config["host"] = self.cmd_args.a2a_host
466
471
 
467
472
  if hasattr(self.cmd_args, "a2a_port") and self.cmd_args.a2a_port is not None:
468
- if "a2a" not in cmd_args_config:
469
- cmd_args_config["a2a"] = {}
470
- cmd_args_config["a2a"]["port"] = self.cmd_args.a2a_port
473
+ a2a_config["port"] = self.cmd_args.a2a_port
471
474
 
472
475
  if hasattr(self.cmd_args, "mindsdb_host") and self.cmd_args.mindsdb_host is not None:
473
- if "a2a" not in cmd_args_config:
474
- cmd_args_config["a2a"] = {}
475
- cmd_args_config["a2a"]["mindsdb_host"] = self.cmd_args.mindsdb_host
476
+ a2a_config["mindsdb_host"] = self.cmd_args.mindsdb_host
476
477
 
477
478
  if hasattr(self.cmd_args, "mindsdb_port") and self.cmd_args.mindsdb_port is not None:
478
- if "a2a" not in cmd_args_config:
479
- cmd_args_config["a2a"] = {}
480
- cmd_args_config["a2a"]["mindsdb_port"] = self.cmd_args.mindsdb_port
479
+ a2a_config["mindsdb_port"] = self.cmd_args.mindsdb_port
481
480
 
482
481
  if hasattr(self.cmd_args, "agent_name") and self.cmd_args.agent_name is not None:
483
- if "a2a" not in cmd_args_config:
484
- cmd_args_config["a2a"] = {}
485
- cmd_args_config["a2a"]["agent_name"] = self.cmd_args.agent_name
482
+ a2a_config["agent_name"] = self.cmd_args.agent_name
486
483
 
487
484
  if hasattr(self.cmd_args, "project_name") and self.cmd_args.project_name is not None:
488
- if "a2a" not in cmd_args_config:
489
- cmd_args_config["a2a"] = {}
490
- cmd_args_config["a2a"]["project_name"] = self.cmd_args.project_name
485
+ a2a_config["project_name"] = self.cmd_args.project_name
491
486
 
492
487
  # Merge command-line args config with highest priority
493
- if cmd_args_config:
494
- _merge_configs(new_config, cmd_args_config)
488
+ if a2a_config:
489
+ _merge_configs(new_config, {"api": {"a2a": a2a_config}})
495
490
 
496
491
  # Ensure A2A port is never 0, which would prevent the A2A API from starting
497
- if "a2a" in new_config and isinstance(new_config["a2a"], dict):
498
- if "port" in new_config["a2a"] and (new_config["a2a"]["port"] == 0 or new_config["a2a"]["port"] is None):
499
- new_config["a2a"]["port"] = 47338 # Use the default port value
492
+ a2a_config = new_config["api"].get("a2a")
493
+ if a2a_config is not None and isinstance(a2a_config, dict):
494
+ if "port" in a2a_config and (a2a_config["port"] == 0 or a2a_config["port"] is None):
495
+ a2a_config["port"] = 47338 # Use the default port value
500
496
 
501
497
  # region create dirs
502
498
  for key, value in new_config["paths"].items():
@@ -521,11 +517,23 @@ class Config:
521
517
  self.ensure_auto_config_is_relevant()
522
518
  return self._config
523
519
 
524
- def update(self, data: dict) -> None:
525
- """Update calues in `auto` config"""
520
+ def update(self, data: dict, overwrite: bool = False) -> None:
521
+ """
522
+ Update values in `auto` config.
523
+ Args:
524
+ data (dict): data to update in `auto` config.
525
+ overwrite (bool): if True, overwrite existing keys, otherwise merge them.
526
+ - False (default): Merge recursively. Existing nested dictionaries are preserved
527
+ and only the specified keys in `data` are updated.
528
+ - True: Overwrite completely. Existing keys are replaced entirely with values
529
+ from `data`, discarding any nested structure not present in `data`.
530
+ """
526
531
  self.ensure_auto_config_is_relevant()
527
532
 
528
- _merge_configs(self._auto_config, data)
533
+ if overwrite:
534
+ _overwrite_configs(self._auto_config, data)
535
+ else:
536
+ _merge_configs(self._auto_config, data)
529
537
 
530
538
  self.auto_config_path.write_text(json.dumps(self._auto_config, indent=4))
531
539
 
@@ -548,6 +556,16 @@ class Config:
548
556
  "Use 'MINDSDB_HTTP_SERVER_TYPE' instead."
549
557
  )
550
558
 
559
+ file_upload_domains = self._config.get("file_upload_domains")
560
+ if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0:
561
+ allowed_origins = self._config["url_file_upload"]["allowed_origins"]
562
+ if isinstance(allowed_origins, list) and len(allowed_origins) == 0:
563
+ self._config["url_file_upload"]["allowed_origins"] = file_upload_domains
564
+ logger.warning(
565
+ 'Config option "file_upload_domains" is deprecated, '
566
+ 'use config["url_file_upload"]["allowed_origins"] instead.'
567
+ )
568
+
551
569
  for env_name in ("MINDSDB_HTTP_SERVER_TYPE", "MINDSDB_DEFAULT_SERVER"):
552
570
  env_value = os.environ.get(env_name, "")
553
571
  if env_value.lower() not in ("waitress", "flask", "gunicorn", ""):
@@ -1,29 +1,80 @@
1
+ from textwrap import indent
2
+
3
+
1
4
  class BaseEntityException(Exception):
2
5
  """Base exception for entitys errors
3
6
 
4
- Attributes:
5
- message (str): error message
6
- entity_name (str): entity name
7
+ Attributes:
8
+ message (str): error message
9
+ entity_name (str): entity name
7
10
  """
11
+
8
12
  def __init__(self, message: str, entity_name: str = None) -> None:
9
13
  self.message = message
10
- self.entity_name = entity_name or 'unknown'
14
+ self.entity_name = entity_name or "unknown"
11
15
 
12
16
  def __str__(self) -> str:
13
- return f'{self.message}: {self.entity_name}'
17
+ return f"{self.message}: {self.entity_name}"
14
18
 
15
19
 
16
20
  class EntityExistsError(BaseEntityException):
17
21
  """Raise when entity exists, but should not"""
22
+
18
23
  def __init__(self, message: str = None, entity_name: str = None) -> None:
19
24
  if message is None:
20
- message = 'Entity exists error'
25
+ message = "Entity exists error"
21
26
  super().__init__(message, entity_name)
22
27
 
23
28
 
24
29
  class EntityNotExistsError(BaseEntityException):
25
30
  """Raise when entity not exists, but should"""
31
+
26
32
  def __init__(self, message: str = None, entity_name: str = None) -> None:
27
33
  if message is None:
28
- message = 'Entity does not exists error'
34
+ message = "Entity does not exists error"
29
35
  super().__init__(message, entity_name)
36
+
37
+
38
+ def format_db_error_message(
39
+ db_name: str | None = None,
40
+ db_type: str | None = None,
41
+ db_error_msg: str | None = None,
42
+ failed_query: str | None = None,
43
+ is_external: bool = True,
44
+ ) -> str:
45
+ """Format the error message for the database query.
46
+
47
+ Args:
48
+ db_name (str | None): The name of the database.
49
+ db_type (str | None): The type of the database.
50
+ db_error_msg (str | None): The error message.
51
+ failed_query (str | None): The failed query.
52
+ is_external (bool): True if error appeared in external database, False if in internal duckdb
53
+
54
+ Returns:
55
+ str: The formatted error message.
56
+ """
57
+ error_message = "Failed to execute external database query during query processing."
58
+ if is_external:
59
+ error_message = (
60
+ "An error occurred while executing a derived query on the external "
61
+ "database during processing of your original SQL query."
62
+ )
63
+ else:
64
+ error_message = (
65
+ "An error occurred while processing an internally generated query derived from your original SQL statement."
66
+ )
67
+ if db_name is not None or db_type is not None:
68
+ error_message += "\n\nDatabase Details:"
69
+ if db_name is not None:
70
+ error_message += f"\n- Name: {db_name}"
71
+ if db_type is not None:
72
+ error_message += f"\n- Type: {db_type}"
73
+
74
+ if db_error_msg is not None:
75
+ error_message += f"\n\nError:\n{indent(db_error_msg, ' ')}"
76
+
77
+ if failed_query is not None:
78
+ error_message += f"\n\nFailed Query:\n{indent(failed_query, ' ')}"
79
+
80
+ return error_message
@@ -27,28 +27,71 @@ def clear_filename(filename: str) -> str:
27
27
 
28
28
  if not filename:
29
29
  return filename
30
- badchars = '\\/:*?\"<>|'
30
+ badchars = '\\/:*?"<>|'
31
31
  for c in badchars:
32
- filename = filename.replace(c, '')
32
+ filename = filename.replace(c, "")
33
33
  return filename
34
34
 
35
35
 
36
- def validate_urls(urls, allowed_urls):
36
+ def _split_url(url: str) -> tuple[str, str]:
37
+ """
38
+ Splits the URL into scheme and netloc.
39
+
40
+ Args:
41
+ url (str): The URL to split.
42
+
43
+ Returns:
44
+ tuple[str, str]: The scheme and netloc of the URL.
45
+
46
+ Raises:
47
+ ValueError: If the URL does not include protocol and host name.
48
+ """
49
+ parsed_url = urlparse(url)
50
+ if not (parsed_url.scheme and parsed_url.netloc):
51
+ raise ValueError(f"URL must include protocol and host name: {url}")
52
+ return parsed_url.scheme.lower(), parsed_url.netloc.lower()
53
+
54
+
55
+ def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool:
37
56
  """
38
57
  Checks if the provided URL(s) is/are from an allowed host.
39
58
 
40
- This function parses the URL(s) and checks the network location part (netloc)
59
+ This function parses the URL(s) and checks the origin (scheme + netloc)
41
60
  against a list of allowed hosts.
42
61
 
43
- :param urls: The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
44
- :param allowed_urls: The list of allowed URLs.
45
- :return bool: True if the URL(s) is/are from an allowed host, False otherwise.
62
+ Examples:
63
+ validate_urls("http://site.com/file", ["site.com"]) -> Exception
64
+ validate_urls("https://site.com/file", ["https://site.com"]) -> True
65
+ validate_urls("http://site.com/file", ["https://site.com"]) -> False
66
+ validate_urls("https://site.com/file", ["https://example.com"]) -> False
67
+ validate_urls("site.com/file", ["https://site.com"]) -> Exception
68
+
69
+ Args:
70
+ urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
71
+ allowed_urls (list[str]): The list of allowed URLs.
72
+ disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function
73
+ will return False if the URL is in the disallowed list.
74
+
75
+ Returns:
76
+ bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise.
46
77
  """
47
- allowed_netlocs = [urlparse(allowed_url).netloc for allowed_url in allowed_urls]
78
+ if disallowed_urls is None:
79
+ disallowed_urls = []
80
+
81
+ allowed_origins = [_split_url(url) for url in allowed_urls]
82
+ disallowed_origins = [_split_url(url) for url in disallowed_urls]
48
83
 
49
84
  if isinstance(urls, str):
50
85
  urls = [urls]
51
86
 
52
- # Check if all provided URLs are from the allowed sites
53
- valid = all(urlparse(url).netloc in allowed_netlocs for url in urls)
54
- return valid
87
+ if allowed_origins:
88
+ for url in urls:
89
+ if _split_url(url) not in allowed_origins:
90
+ return False
91
+
92
+ if disallowed_origins:
93
+ for url in urls:
94
+ if _split_url(url) in disallowed_origins:
95
+ return False
96
+
97
+ return True