MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +150 -140
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +86 -77
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/langchain_agent.py +7 -5
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
- mindsdb/interfaces/knowledge_base/controller.py +115 -89
- mindsdb/interfaces/knowledge_base/evaluate.py +16 -4
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +12 -1
- mindsdb/utilities/exception.py +47 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/METADATA +248 -262
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/RECORD +46 -45
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.1.0.dist-info}/top_level.txt +0 -0
mindsdb/utilities/config.py
CHANGED
|
@@ -199,7 +199,8 @@ class Config:
|
|
|
199
199
|
},
|
|
200
200
|
"cache": {"type": "local"},
|
|
201
201
|
"ml_task_queue": {"type": "local"},
|
|
202
|
-
"
|
|
202
|
+
"url_file_upload": {"enabled": True, "allowed_origins": [], "disallowed_origins": []},
|
|
203
|
+
"file_upload_domains": [], # deprecated, use config[url_file_upload][allowed_origins] instead
|
|
203
204
|
"web_crawling_allowed_sites": [],
|
|
204
205
|
"cloud": False,
|
|
205
206
|
"jobs": {"disable": False},
|
|
@@ -548,6 +549,16 @@ class Config:
|
|
|
548
549
|
"Use 'MINDSDB_HTTP_SERVER_TYPE' instead."
|
|
549
550
|
)
|
|
550
551
|
|
|
552
|
+
file_upload_domains = self._config.get("file_upload_domains")
|
|
553
|
+
if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0:
|
|
554
|
+
allowed_origins = self._config["url_file_upload"]["allowed_origins"]
|
|
555
|
+
if isinstance(allowed_origins, list) and len(allowed_origins) == 0:
|
|
556
|
+
self._config["url_file_upload"]["allowed_origins"] = file_upload_domains
|
|
557
|
+
logger.warning(
|
|
558
|
+
'Config option "file_upload_domains" is deprecated, '
|
|
559
|
+
'use config["url_file_upload"]["allowed_origins"] instead.'
|
|
560
|
+
)
|
|
561
|
+
|
|
551
562
|
for env_name in ("MINDSDB_HTTP_SERVER_TYPE", "MINDSDB_DEFAULT_SERVER"):
|
|
552
563
|
env_value = os.environ.get(env_name, "")
|
|
553
564
|
if env_value.lower() not in ("waitress", "flask", "gunicorn", ""):
|
mindsdb/utilities/exception.py
CHANGED
|
@@ -1,29 +1,69 @@
|
|
|
1
|
+
from textwrap import indent
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class BaseEntityException(Exception):
|
|
2
5
|
"""Base exception for entitys errors
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
+
Attributes:
|
|
8
|
+
message (str): error message
|
|
9
|
+
entity_name (str): entity name
|
|
7
10
|
"""
|
|
11
|
+
|
|
8
12
|
def __init__(self, message: str, entity_name: str = None) -> None:
|
|
9
13
|
self.message = message
|
|
10
|
-
self.entity_name = entity_name or
|
|
14
|
+
self.entity_name = entity_name or "unknown"
|
|
11
15
|
|
|
12
16
|
def __str__(self) -> str:
|
|
13
|
-
return f
|
|
17
|
+
return f"{self.message}: {self.entity_name}"
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
class EntityExistsError(BaseEntityException):
|
|
17
21
|
"""Raise when entity exists, but should not"""
|
|
22
|
+
|
|
18
23
|
def __init__(self, message: str = None, entity_name: str = None) -> None:
|
|
19
24
|
if message is None:
|
|
20
|
-
message =
|
|
25
|
+
message = "Entity exists error"
|
|
21
26
|
super().__init__(message, entity_name)
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class EntityNotExistsError(BaseEntityException):
|
|
25
30
|
"""Raise when entity not exists, but should"""
|
|
31
|
+
|
|
26
32
|
def __init__(self, message: str = None, entity_name: str = None) -> None:
|
|
27
33
|
if message is None:
|
|
28
|
-
message =
|
|
34
|
+
message = "Entity does not exists error"
|
|
29
35
|
super().__init__(message, entity_name)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def format_db_error_message(
|
|
39
|
+
db_name: str | None = None,
|
|
40
|
+
db_type: str | None = None,
|
|
41
|
+
db_error_msg: str | None = None,
|
|
42
|
+
failed_query: str | None = None,
|
|
43
|
+
) -> str:
|
|
44
|
+
"""Format the error message for the database query.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
db_name (str | None): The name of the database.
|
|
48
|
+
db_type (str | None): The type of the database.
|
|
49
|
+
db_error_msg (str | None): The error message.
|
|
50
|
+
failed_query (str | None): The failed query.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str: The formatted error message.
|
|
54
|
+
"""
|
|
55
|
+
error_message = "Failed to execute external database query during query processing."
|
|
56
|
+
if db_name is not None or db_type is not None:
|
|
57
|
+
error_message += "\n\nDatabase Details:"
|
|
58
|
+
if db_name is not None:
|
|
59
|
+
error_message += f"\n- Name: {db_name}"
|
|
60
|
+
if db_type is not None:
|
|
61
|
+
error_message += f"\n- Type: {db_type}"
|
|
62
|
+
|
|
63
|
+
if db_error_msg is not None:
|
|
64
|
+
error_message += f"\n\nError:\n{indent(db_error_msg, ' ')}"
|
|
65
|
+
|
|
66
|
+
if failed_query is not None:
|
|
67
|
+
error_message += f"\n\nFailed Query:\n{indent(failed_query, ' ')}"
|
|
68
|
+
|
|
69
|
+
return error_message
|
mindsdb/utilities/security.py
CHANGED
|
@@ -27,28 +27,71 @@ def clear_filename(filename: str) -> str:
|
|
|
27
27
|
|
|
28
28
|
if not filename:
|
|
29
29
|
return filename
|
|
30
|
-
badchars = '
|
|
30
|
+
badchars = '\\/:*?"<>|'
|
|
31
31
|
for c in badchars:
|
|
32
|
-
filename = filename.replace(c,
|
|
32
|
+
filename = filename.replace(c, "")
|
|
33
33
|
return filename
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def
|
|
36
|
+
def _split_url(url: str) -> tuple[str, str]:
|
|
37
|
+
"""
|
|
38
|
+
Splits the URL into scheme and netloc.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
url (str): The URL to split.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
tuple[str, str]: The scheme and netloc of the URL.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If the URL does not include protocol and host name.
|
|
48
|
+
"""
|
|
49
|
+
parsed_url = urlparse(url)
|
|
50
|
+
if not (parsed_url.scheme and parsed_url.netloc):
|
|
51
|
+
raise ValueError(f"URL must include protocol and host name: {url}")
|
|
52
|
+
return parsed_url.scheme.lower(), parsed_url.netloc.lower()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool:
|
|
37
56
|
"""
|
|
38
57
|
Checks if the provided URL(s) is/are from an allowed host.
|
|
39
58
|
|
|
40
|
-
This function parses the URL(s) and checks the
|
|
59
|
+
This function parses the URL(s) and checks the origin (scheme + netloc)
|
|
41
60
|
against a list of allowed hosts.
|
|
42
61
|
|
|
43
|
-
:
|
|
44
|
-
|
|
45
|
-
|
|
62
|
+
Examples:
|
|
63
|
+
validate_urls("http://site.com/file", ["site.com"]) -> Exception
|
|
64
|
+
validate_urls("https://site.com/file", ["https://site.com"]) -> True
|
|
65
|
+
validate_urls("http://site.com/file", ["https://site.com"]) -> False
|
|
66
|
+
validate_urls("https://site.com/file", ["https://example.com"]) -> False
|
|
67
|
+
validate_urls("site.com/file", ["https://site.com"]) -> Exception
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
|
|
71
|
+
allowed_urls (list[str]): The list of allowed URLs.
|
|
72
|
+
disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function
|
|
73
|
+
will return False if the URL is in the disallowed list.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise.
|
|
46
77
|
"""
|
|
47
|
-
|
|
78
|
+
if disallowed_urls is None:
|
|
79
|
+
disallowed_urls = []
|
|
80
|
+
|
|
81
|
+
allowed_origins = [_split_url(url) for url in allowed_urls]
|
|
82
|
+
disallowed_origins = [_split_url(url) for url in disallowed_urls]
|
|
48
83
|
|
|
49
84
|
if isinstance(urls, str):
|
|
50
85
|
urls = [urls]
|
|
51
86
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
87
|
+
if allowed_origins:
|
|
88
|
+
for url in urls:
|
|
89
|
+
if _split_url(url) not in allowed_origins:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
if disallowed_origins:
|
|
93
|
+
for url in urls:
|
|
94
|
+
if _split_url(url) in disallowed_origins:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return True
|