MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (46) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/api/executor/command_executor.py +8 -6
  3. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  6. mindsdb/api/executor/planner/query_prepare.py +68 -87
  7. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
  8. mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
  9. mindsdb/api/http/namespaces/file.py +49 -24
  10. mindsdb/api/mcp/start.py +45 -31
  11. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
  12. mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
  13. mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
  14. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
  15. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
  16. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
  17. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  18. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
  19. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
  20. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
  21. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  22. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  23. mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
  24. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
  25. mindsdb/interfaces/agents/agents_controller.py +29 -9
  26. mindsdb/interfaces/agents/langchain_agent.py +7 -5
  27. mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
  28. mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
  29. mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
  30. mindsdb/interfaces/knowledge_base/controller.py +115 -89
  31. mindsdb/interfaces/knowledge_base/evaluate.py +16 -4
  32. mindsdb/interfaces/knowledge_base/executor.py +346 -0
  33. mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
  34. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
  35. mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
  36. mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
  37. mindsdb/interfaces/skills/sql_agent.py +181 -130
  38. mindsdb/interfaces/storage/db.py +9 -7
  39. mindsdb/utilities/config.py +12 -1
  40. mindsdb/utilities/exception.py +47 -7
  41. mindsdb/utilities/security.py +54 -11
  42. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +248 -262
  43. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +46 -45
  44. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
  45. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
  46. {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
@@ -199,7 +199,8 @@ class Config:
199
199
  },
200
200
  "cache": {"type": "local"},
201
201
  "ml_task_queue": {"type": "local"},
202
- "file_upload_domains": [],
202
+ "url_file_upload": {"enabled": True, "allowed_origins": [], "disallowed_origins": []},
203
+ "file_upload_domains": [], # deprecated, use config[url_file_upload][allowed_origins] instead
203
204
  "web_crawling_allowed_sites": [],
204
205
  "cloud": False,
205
206
  "jobs": {"disable": False},
@@ -548,6 +549,16 @@ class Config:
548
549
  "Use 'MINDSDB_HTTP_SERVER_TYPE' instead."
549
550
  )
550
551
 
552
+ file_upload_domains = self._config.get("file_upload_domains")
553
+ if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0:
554
+ allowed_origins = self._config["url_file_upload"]["allowed_origins"]
555
+ if isinstance(allowed_origins, list) and len(allowed_origins) == 0:
556
+ self._config["url_file_upload"]["allowed_origins"] = file_upload_domains
557
+ logger.warning(
558
+ 'Config option "file_upload_domains" is deprecated, '
559
+ 'use config["url_file_upload"]["allowed_origins"] instead.'
560
+ )
561
+
551
562
  for env_name in ("MINDSDB_HTTP_SERVER_TYPE", "MINDSDB_DEFAULT_SERVER"):
552
563
  env_value = os.environ.get(env_name, "")
553
564
  if env_value.lower() not in ("waitress", "flask", "gunicorn", ""):
@@ -1,29 +1,69 @@
1
+ from textwrap import indent
2
+
3
+
1
4
  class BaseEntityException(Exception):
2
5
  """Base exception for entitys errors
3
6
 
4
- Attributes:
5
- message (str): error message
6
- entity_name (str): entity name
7
+ Attributes:
8
+ message (str): error message
9
+ entity_name (str): entity name
7
10
  """
11
+
8
12
  def __init__(self, message: str, entity_name: str = None) -> None:
9
13
  self.message = message
10
- self.entity_name = entity_name or 'unknown'
14
+ self.entity_name = entity_name or "unknown"
11
15
 
12
16
  def __str__(self) -> str:
13
- return f'{self.message}: {self.entity_name}'
17
+ return f"{self.message}: {self.entity_name}"
14
18
 
15
19
 
16
20
  class EntityExistsError(BaseEntityException):
17
21
  """Raise when entity exists, but should not"""
22
+
18
23
  def __init__(self, message: str = None, entity_name: str = None) -> None:
19
24
  if message is None:
20
- message = 'Entity exists error'
25
+ message = "Entity exists error"
21
26
  super().__init__(message, entity_name)
22
27
 
23
28
 
24
29
  class EntityNotExistsError(BaseEntityException):
25
30
  """Raise when entity not exists, but should"""
31
+
26
32
  def __init__(self, message: str = None, entity_name: str = None) -> None:
27
33
  if message is None:
28
- message = 'Entity does not exists error'
34
+ message = "Entity does not exists error"
29
35
  super().__init__(message, entity_name)
36
+
37
+
38
+ def format_db_error_message(
39
+ db_name: str | None = None,
40
+ db_type: str | None = None,
41
+ db_error_msg: str | None = None,
42
+ failed_query: str | None = None,
43
+ ) -> str:
44
+ """Format the error message for the database query.
45
+
46
+ Args:
47
+ db_name (str | None): The name of the database.
48
+ db_type (str | None): The type of the database.
49
+ db_error_msg (str | None): The error message.
50
+ failed_query (str | None): The failed query.
51
+
52
+ Returns:
53
+ str: The formatted error message.
54
+ """
55
+ error_message = "Failed to execute external database query during query processing."
56
+ if db_name is not None or db_type is not None:
57
+ error_message += "\n\nDatabase Details:"
58
+ if db_name is not None:
59
+ error_message += f"\n- Name: {db_name}"
60
+ if db_type is not None:
61
+ error_message += f"\n- Type: {db_type}"
62
+
63
+ if db_error_msg is not None:
64
+ error_message += f"\n\nError:\n{indent(db_error_msg, ' ')}"
65
+
66
+ if failed_query is not None:
67
+ error_message += f"\n\nFailed Query:\n{indent(failed_query, ' ')}"
68
+
69
+ return error_message
@@ -27,28 +27,71 @@ def clear_filename(filename: str) -> str:
27
27
 
28
28
  if not filename:
29
29
  return filename
30
- badchars = '\\/:*?\"<>|'
30
+ badchars = '\\/:*?"<>|'
31
31
  for c in badchars:
32
- filename = filename.replace(c, '')
32
+ filename = filename.replace(c, "")
33
33
  return filename
34
34
 
35
35
 
36
- def validate_urls(urls, allowed_urls):
36
+ def _split_url(url: str) -> tuple[str, str]:
37
+ """
38
+ Splits the URL into scheme and netloc.
39
+
40
+ Args:
41
+ url (str): The URL to split.
42
+
43
+ Returns:
44
+ tuple[str, str]: The scheme and netloc of the URL.
45
+
46
+ Raises:
47
+ ValueError: If the URL does not include protocol and host name.
48
+ """
49
+ parsed_url = urlparse(url)
50
+ if not (parsed_url.scheme and parsed_url.netloc):
51
+ raise ValueError(f"URL must include protocol and host name: {url}")
52
+ return parsed_url.scheme.lower(), parsed_url.netloc.lower()
53
+
54
+
55
+ def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool:
37
56
  """
38
57
  Checks if the provided URL(s) is/are from an allowed host.
39
58
 
40
- This function parses the URL(s) and checks the network location part (netloc)
59
+ This function parses the URL(s) and checks the origin (scheme + netloc)
41
60
  against a list of allowed hosts.
42
61
 
43
- :param urls: The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
44
- :param allowed_urls: The list of allowed URLs.
45
- :return bool: True if the URL(s) is/are from an allowed host, False otherwise.
62
+ Examples:
63
+ validate_urls("http://site.com/file", ["site.com"]) -> Exception
64
+ validate_urls("https://site.com/file", ["https://site.com"]) -> True
65
+ validate_urls("http://site.com/file", ["https://site.com"]) -> False
66
+ validate_urls("https://site.com/file", ["https://example.com"]) -> False
67
+ validate_urls("site.com/file", ["https://site.com"]) -> Exception
68
+
69
+ Args:
70
+ urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
71
+ allowed_urls (list[str]): The list of allowed URLs.
72
+ disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function
73
+ will return False if the URL is in the disallowed list.
74
+
75
+ Returns:
76
+ bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise.
46
77
  """
47
- allowed_netlocs = [urlparse(allowed_url).netloc for allowed_url in allowed_urls]
78
+ if disallowed_urls is None:
79
+ disallowed_urls = []
80
+
81
+ allowed_origins = [_split_url(url) for url in allowed_urls]
82
+ disallowed_origins = [_split_url(url) for url in disallowed_urls]
48
83
 
49
84
  if isinstance(urls, str):
50
85
  urls = [urls]
51
86
 
52
- # Check if all provided URLs are from the allowed sites
53
- valid = all(urlparse(url).netloc in allowed_netlocs for url in urls)
54
- return valid
87
+ if allowed_origins:
88
+ for url in urls:
89
+ if _split_url(url) not in allowed_origins:
90
+ return False
91
+
92
+ if disallowed_origins:
93
+ for url in urls:
94
+ if _split_url(url) in disallowed_origins:
95
+ return False
96
+
97
+ return True