MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +53 -94
- mindsdb/api/a2a/agent.py +30 -206
- mindsdb/api/a2a/common/server/server.py +26 -27
- mindsdb/api/a2a/task_manager.py +93 -227
- mindsdb/api/a2a/utils.py +21 -0
- mindsdb/api/executor/command_executor.py +8 -6
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/planner/query_prepare.py +68 -87
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
- mindsdb/api/executor/utilities/sql.py +97 -21
- mindsdb/api/http/namespaces/agents.py +126 -201
- mindsdb/api/http/namespaces/config.py +12 -1
- mindsdb/api/http/namespaces/file.py +49 -24
- mindsdb/api/mcp/start.py +45 -31
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/libs/keyword_search_base.py +41 -0
- mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
- mindsdb/integrations/utilities/sql_utils.py +11 -0
- mindsdb/interfaces/agents/agents_controller.py +29 -9
- mindsdb/interfaces/agents/langchain_agent.py +7 -5
- mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
- mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
- mindsdb/interfaces/database/projects.py +1 -3
- mindsdb/interfaces/functions/controller.py +54 -64
- mindsdb/interfaces/functions/to_markdown.py +47 -14
- mindsdb/interfaces/knowledge_base/controller.py +228 -110
- mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
- mindsdb/interfaces/knowledge_base/executor.py +346 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
- mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
- mindsdb/interfaces/skills/sql_agent.py +181 -130
- mindsdb/interfaces/storage/db.py +9 -7
- mindsdb/utilities/config.py +58 -40
- mindsdb/utilities/exception.py +58 -7
- mindsdb/utilities/security.py +54 -11
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0
mindsdb/utilities/config.py
CHANGED
|
@@ -28,6 +28,13 @@ def _merge_configs(original_config: dict, override_config: dict) -> dict:
|
|
|
28
28
|
return original_config
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def _overwrite_configs(original_config: dict, override_config: dict) -> dict:
|
|
32
|
+
"""Overwrite original config with override config."""
|
|
33
|
+
for key in list(override_config.keys()):
|
|
34
|
+
original_config[key] = override_config[key]
|
|
35
|
+
return original_config
|
|
36
|
+
|
|
37
|
+
|
|
31
38
|
def create_data_dir(path: Path) -> None:
|
|
32
39
|
"""Create a directory and checks that it is writable.
|
|
33
40
|
|
|
@@ -196,10 +203,20 @@ class Config:
|
|
|
196
203
|
"host": "0.0.0.0", # API server binds to all interfaces by default
|
|
197
204
|
"port": "8000",
|
|
198
205
|
},
|
|
206
|
+
"a2a": {
|
|
207
|
+
"host": api_host,
|
|
208
|
+
"port": 47338,
|
|
209
|
+
"mindsdb_host": "localhost",
|
|
210
|
+
"mindsdb_port": 47334,
|
|
211
|
+
"agent_name": "my_agent",
|
|
212
|
+
"project_name": "mindsdb",
|
|
213
|
+
"enabled": False,
|
|
214
|
+
},
|
|
199
215
|
},
|
|
200
216
|
"cache": {"type": "local"},
|
|
201
217
|
"ml_task_queue": {"type": "local"},
|
|
202
|
-
"
|
|
218
|
+
"url_file_upload": {"enabled": True, "allowed_origins": [], "disallowed_origins": []},
|
|
219
|
+
"file_upload_domains": [], # deprecated, use config[url_file_upload][allowed_origins] instead
|
|
203
220
|
"web_crawling_allowed_sites": [],
|
|
204
221
|
"cloud": False,
|
|
205
222
|
"jobs": {"disable": False},
|
|
@@ -208,15 +225,6 @@ class Config:
|
|
|
208
225
|
"default_llm": {},
|
|
209
226
|
"default_embedding_model": {},
|
|
210
227
|
"default_reranking_model": {},
|
|
211
|
-
"a2a": {
|
|
212
|
-
"host": "localhost",
|
|
213
|
-
"port": 47338,
|
|
214
|
-
"mindsdb_host": "localhost",
|
|
215
|
-
"mindsdb_port": 47334,
|
|
216
|
-
"agent_name": "my_agent",
|
|
217
|
-
"project_name": "mindsdb",
|
|
218
|
-
"enabled": False,
|
|
219
|
-
},
|
|
220
228
|
"data_catalog": {
|
|
221
229
|
"enabled": False,
|
|
222
230
|
},
|
|
@@ -242,12 +250,11 @@ class Config:
|
|
|
242
250
|
"""Collect config values from env vars to self._env_config"""
|
|
243
251
|
self._env_config = {
|
|
244
252
|
"logging": {"handlers": {"console": {}, "file": {}}},
|
|
245
|
-
"api": {"http": {"server": {}}},
|
|
253
|
+
"api": {"http": {"server": {}}, "a2a": {}},
|
|
246
254
|
"auth": {},
|
|
247
255
|
"paths": {},
|
|
248
256
|
"permanent_storage": {},
|
|
249
257
|
"ml_task_queue": {},
|
|
250
|
-
"a2a": {},
|
|
251
258
|
}
|
|
252
259
|
|
|
253
260
|
# region storage root path
|
|
@@ -389,7 +396,7 @@ class Config:
|
|
|
389
396
|
)
|
|
390
397
|
|
|
391
398
|
if a2a_config:
|
|
392
|
-
self._env_config["a2a"] = a2a_config
|
|
399
|
+
self._env_config["api"]["a2a"] = a2a_config
|
|
393
400
|
# endregion
|
|
394
401
|
|
|
395
402
|
def fetch_auto_config(self) -> bool:
|
|
@@ -456,47 +463,36 @@ class Config:
|
|
|
456
463
|
_merge_configs(new_config, self._env_config)
|
|
457
464
|
|
|
458
465
|
# Apply command-line arguments for A2A
|
|
459
|
-
|
|
466
|
+
a2a_config = {}
|
|
460
467
|
|
|
461
468
|
# Check for A2A command-line arguments
|
|
462
469
|
if hasattr(self.cmd_args, "a2a_host") and self.cmd_args.a2a_host is not None:
|
|
463
|
-
|
|
464
|
-
cmd_args_config["a2a"] = {}
|
|
465
|
-
cmd_args_config["a2a"]["host"] = self.cmd_args.a2a_host
|
|
470
|
+
a2a_config["host"] = self.cmd_args.a2a_host
|
|
466
471
|
|
|
467
472
|
if hasattr(self.cmd_args, "a2a_port") and self.cmd_args.a2a_port is not None:
|
|
468
|
-
|
|
469
|
-
cmd_args_config["a2a"] = {}
|
|
470
|
-
cmd_args_config["a2a"]["port"] = self.cmd_args.a2a_port
|
|
473
|
+
a2a_config["port"] = self.cmd_args.a2a_port
|
|
471
474
|
|
|
472
475
|
if hasattr(self.cmd_args, "mindsdb_host") and self.cmd_args.mindsdb_host is not None:
|
|
473
|
-
|
|
474
|
-
cmd_args_config["a2a"] = {}
|
|
475
|
-
cmd_args_config["a2a"]["mindsdb_host"] = self.cmd_args.mindsdb_host
|
|
476
|
+
a2a_config["mindsdb_host"] = self.cmd_args.mindsdb_host
|
|
476
477
|
|
|
477
478
|
if hasattr(self.cmd_args, "mindsdb_port") and self.cmd_args.mindsdb_port is not None:
|
|
478
|
-
|
|
479
|
-
cmd_args_config["a2a"] = {}
|
|
480
|
-
cmd_args_config["a2a"]["mindsdb_port"] = self.cmd_args.mindsdb_port
|
|
479
|
+
a2a_config["mindsdb_port"] = self.cmd_args.mindsdb_port
|
|
481
480
|
|
|
482
481
|
if hasattr(self.cmd_args, "agent_name") and self.cmd_args.agent_name is not None:
|
|
483
|
-
|
|
484
|
-
cmd_args_config["a2a"] = {}
|
|
485
|
-
cmd_args_config["a2a"]["agent_name"] = self.cmd_args.agent_name
|
|
482
|
+
a2a_config["agent_name"] = self.cmd_args.agent_name
|
|
486
483
|
|
|
487
484
|
if hasattr(self.cmd_args, "project_name") and self.cmd_args.project_name is not None:
|
|
488
|
-
|
|
489
|
-
cmd_args_config["a2a"] = {}
|
|
490
|
-
cmd_args_config["a2a"]["project_name"] = self.cmd_args.project_name
|
|
485
|
+
a2a_config["project_name"] = self.cmd_args.project_name
|
|
491
486
|
|
|
492
487
|
# Merge command-line args config with highest priority
|
|
493
|
-
if
|
|
494
|
-
_merge_configs(new_config,
|
|
488
|
+
if a2a_config:
|
|
489
|
+
_merge_configs(new_config, {"api": {"a2a": a2a_config}})
|
|
495
490
|
|
|
496
491
|
# Ensure A2A port is never 0, which would prevent the A2A API from starting
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
492
|
+
a2a_config = new_config["api"].get("a2a")
|
|
493
|
+
if a2a_config is not None and isinstance(a2a_config, dict):
|
|
494
|
+
if "port" in a2a_config and (a2a_config["port"] == 0 or a2a_config["port"] is None):
|
|
495
|
+
a2a_config["port"] = 47338 # Use the default port value
|
|
500
496
|
|
|
501
497
|
# region create dirs
|
|
502
498
|
for key, value in new_config["paths"].items():
|
|
@@ -521,11 +517,23 @@ class Config:
|
|
|
521
517
|
self.ensure_auto_config_is_relevant()
|
|
522
518
|
return self._config
|
|
523
519
|
|
|
524
|
-
def update(self, data: dict) -> None:
|
|
525
|
-
"""
|
|
520
|
+
def update(self, data: dict, overwrite: bool = False) -> None:
|
|
521
|
+
"""
|
|
522
|
+
Update values in `auto` config.
|
|
523
|
+
Args:
|
|
524
|
+
data (dict): data to update in `auto` config.
|
|
525
|
+
overwrite (bool): if True, overwrite existing keys, otherwise merge them.
|
|
526
|
+
- False (default): Merge recursively. Existing nested dictionaries are preserved
|
|
527
|
+
and only the specified keys in `data` are updated.
|
|
528
|
+
- True: Overwrite completely. Existing keys are replaced entirely with values
|
|
529
|
+
from `data`, discarding any nested structure not present in `data`.
|
|
530
|
+
"""
|
|
526
531
|
self.ensure_auto_config_is_relevant()
|
|
527
532
|
|
|
528
|
-
|
|
533
|
+
if overwrite:
|
|
534
|
+
_overwrite_configs(self._auto_config, data)
|
|
535
|
+
else:
|
|
536
|
+
_merge_configs(self._auto_config, data)
|
|
529
537
|
|
|
530
538
|
self.auto_config_path.write_text(json.dumps(self._auto_config, indent=4))
|
|
531
539
|
|
|
@@ -548,6 +556,16 @@ class Config:
|
|
|
548
556
|
"Use 'MINDSDB_HTTP_SERVER_TYPE' instead."
|
|
549
557
|
)
|
|
550
558
|
|
|
559
|
+
file_upload_domains = self._config.get("file_upload_domains")
|
|
560
|
+
if isinstance(file_upload_domains, list) and len(file_upload_domains) > 0:
|
|
561
|
+
allowed_origins = self._config["url_file_upload"]["allowed_origins"]
|
|
562
|
+
if isinstance(allowed_origins, list) and len(allowed_origins) == 0:
|
|
563
|
+
self._config["url_file_upload"]["allowed_origins"] = file_upload_domains
|
|
564
|
+
logger.warning(
|
|
565
|
+
'Config option "file_upload_domains" is deprecated, '
|
|
566
|
+
'use config["url_file_upload"]["allowed_origins"] instead.'
|
|
567
|
+
)
|
|
568
|
+
|
|
551
569
|
for env_name in ("MINDSDB_HTTP_SERVER_TYPE", "MINDSDB_DEFAULT_SERVER"):
|
|
552
570
|
env_value = os.environ.get(env_name, "")
|
|
553
571
|
if env_value.lower() not in ("waitress", "flask", "gunicorn", ""):
|
mindsdb/utilities/exception.py
CHANGED
|
@@ -1,29 +1,80 @@
|
|
|
1
|
+
from textwrap import indent
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class BaseEntityException(Exception):
|
|
2
5
|
"""Base exception for entitys errors
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
+
Attributes:
|
|
8
|
+
message (str): error message
|
|
9
|
+
entity_name (str): entity name
|
|
7
10
|
"""
|
|
11
|
+
|
|
8
12
|
def __init__(self, message: str, entity_name: str = None) -> None:
|
|
9
13
|
self.message = message
|
|
10
|
-
self.entity_name = entity_name or
|
|
14
|
+
self.entity_name = entity_name or "unknown"
|
|
11
15
|
|
|
12
16
|
def __str__(self) -> str:
|
|
13
|
-
return f
|
|
17
|
+
return f"{self.message}: {self.entity_name}"
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
class EntityExistsError(BaseEntityException):
|
|
17
21
|
"""Raise when entity exists, but should not"""
|
|
22
|
+
|
|
18
23
|
def __init__(self, message: str = None, entity_name: str = None) -> None:
|
|
19
24
|
if message is None:
|
|
20
|
-
message =
|
|
25
|
+
message = "Entity exists error"
|
|
21
26
|
super().__init__(message, entity_name)
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class EntityNotExistsError(BaseEntityException):
|
|
25
30
|
"""Raise when entity not exists, but should"""
|
|
31
|
+
|
|
26
32
|
def __init__(self, message: str = None, entity_name: str = None) -> None:
|
|
27
33
|
if message is None:
|
|
28
|
-
message =
|
|
34
|
+
message = "Entity does not exists error"
|
|
29
35
|
super().__init__(message, entity_name)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def format_db_error_message(
|
|
39
|
+
db_name: str | None = None,
|
|
40
|
+
db_type: str | None = None,
|
|
41
|
+
db_error_msg: str | None = None,
|
|
42
|
+
failed_query: str | None = None,
|
|
43
|
+
is_external: bool = True,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""Format the error message for the database query.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
db_name (str | None): The name of the database.
|
|
49
|
+
db_type (str | None): The type of the database.
|
|
50
|
+
db_error_msg (str | None): The error message.
|
|
51
|
+
failed_query (str | None): The failed query.
|
|
52
|
+
is_external (bool): True if error appeared in external database, False if in internal duckdb
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str: The formatted error message.
|
|
56
|
+
"""
|
|
57
|
+
error_message = "Failed to execute external database query during query processing."
|
|
58
|
+
if is_external:
|
|
59
|
+
error_message = (
|
|
60
|
+
"An error occurred while executing a derived query on the external "
|
|
61
|
+
"database during processing of your original SQL query."
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
error_message = (
|
|
65
|
+
"An error occurred while processing an internally generated query derived from your original SQL statement."
|
|
66
|
+
)
|
|
67
|
+
if db_name is not None or db_type is not None:
|
|
68
|
+
error_message += "\n\nDatabase Details:"
|
|
69
|
+
if db_name is not None:
|
|
70
|
+
error_message += f"\n- Name: {db_name}"
|
|
71
|
+
if db_type is not None:
|
|
72
|
+
error_message += f"\n- Type: {db_type}"
|
|
73
|
+
|
|
74
|
+
if db_error_msg is not None:
|
|
75
|
+
error_message += f"\n\nError:\n{indent(db_error_msg, ' ')}"
|
|
76
|
+
|
|
77
|
+
if failed_query is not None:
|
|
78
|
+
error_message += f"\n\nFailed Query:\n{indent(failed_query, ' ')}"
|
|
79
|
+
|
|
80
|
+
return error_message
|
mindsdb/utilities/security.py
CHANGED
|
@@ -27,28 +27,71 @@ def clear_filename(filename: str) -> str:
|
|
|
27
27
|
|
|
28
28
|
if not filename:
|
|
29
29
|
return filename
|
|
30
|
-
badchars = '
|
|
30
|
+
badchars = '\\/:*?"<>|'
|
|
31
31
|
for c in badchars:
|
|
32
|
-
filename = filename.replace(c,
|
|
32
|
+
filename = filename.replace(c, "")
|
|
33
33
|
return filename
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def
|
|
36
|
+
def _split_url(url: str) -> tuple[str, str]:
|
|
37
|
+
"""
|
|
38
|
+
Splits the URL into scheme and netloc.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
url (str): The URL to split.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
tuple[str, str]: The scheme and netloc of the URL.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If the URL does not include protocol and host name.
|
|
48
|
+
"""
|
|
49
|
+
parsed_url = urlparse(url)
|
|
50
|
+
if not (parsed_url.scheme and parsed_url.netloc):
|
|
51
|
+
raise ValueError(f"URL must include protocol and host name: {url}")
|
|
52
|
+
return parsed_url.scheme.lower(), parsed_url.netloc.lower()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def validate_urls(urls: str | list[str], allowed_urls: list[str], disallowed_urls: list[str] | None = None) -> bool:
|
|
37
56
|
"""
|
|
38
57
|
Checks if the provided URL(s) is/are from an allowed host.
|
|
39
58
|
|
|
40
|
-
This function parses the URL(s) and checks the
|
|
59
|
+
This function parses the URL(s) and checks the origin (scheme + netloc)
|
|
41
60
|
against a list of allowed hosts.
|
|
42
61
|
|
|
43
|
-
:
|
|
44
|
-
|
|
45
|
-
|
|
62
|
+
Examples:
|
|
63
|
+
validate_urls("http://site.com/file", ["site.com"]) -> Exception
|
|
64
|
+
validate_urls("https://site.com/file", ["https://site.com"]) -> True
|
|
65
|
+
validate_urls("http://site.com/file", ["https://site.com"]) -> False
|
|
66
|
+
validate_urls("https://site.com/file", ["https://example.com"]) -> False
|
|
67
|
+
validate_urls("site.com/file", ["https://site.com"]) -> Exception
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
urls (str | list[str]): The URL(s) to check. Can be a single URL (str) or a list of URLs (list).
|
|
71
|
+
allowed_urls (list[str]): The list of allowed URLs.
|
|
72
|
+
disallowed_urls (list[str]): The list of disallowed URLs. If provided, the function
|
|
73
|
+
will return False if the URL is in the disallowed list.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
bool: True if the URL(s) is/are from an allowed host and not in the disallowed list, False otherwise.
|
|
46
77
|
"""
|
|
47
|
-
|
|
78
|
+
if disallowed_urls is None:
|
|
79
|
+
disallowed_urls = []
|
|
80
|
+
|
|
81
|
+
allowed_origins = [_split_url(url) for url in allowed_urls]
|
|
82
|
+
disallowed_origins = [_split_url(url) for url in disallowed_urls]
|
|
48
83
|
|
|
49
84
|
if isinstance(urls, str):
|
|
50
85
|
urls = [urls]
|
|
51
86
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
87
|
+
if allowed_origins:
|
|
88
|
+
for url in urls:
|
|
89
|
+
if _split_url(url) not in allowed_origins:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
if disallowed_origins:
|
|
93
|
+
for url in urls:
|
|
94
|
+
if _split_url(url) in disallowed_origins:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
return True
|