PyPI - MindsDB - Versions diffs - 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl - Mend

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +53 -94
mindsdb/api/a2a/agent.py +30 -206
mindsdb/api/a2a/common/server/server.py +26 -27
mindsdb/api/a2a/task_manager.py +93 -227
mindsdb/api/a2a/utils.py +21 -0
mindsdb/api/executor/command_executor.py +8 -6
mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/planner/query_prepare.py +68 -87
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
mindsdb/api/executor/utilities/sql.py +97 -21
mindsdb/api/http/namespaces/agents.py +126 -201
mindsdb/api/http/namespaces/config.py +12 -1
mindsdb/api/http/namespaces/file.py +49 -24
mindsdb/api/mcp/start.py +45 -31
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/libs/keyword_search_base.py +41 -0
mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
mindsdb/integrations/utilities/sql_utils.py +11 -0
mindsdb/interfaces/agents/agents_controller.py +29 -9
mindsdb/interfaces/agents/langchain_agent.py +7 -5
mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
mindsdb/interfaces/database/projects.py +1 -3
mindsdb/interfaces/functions/controller.py +54 -64
mindsdb/interfaces/functions/to_markdown.py +47 -14
mindsdb/interfaces/knowledge_base/controller.py +228 -110
mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
mindsdb/interfaces/knowledge_base/executor.py +346 -0
mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
mindsdb/interfaces/skills/sql_agent.py +181 -130
mindsdb/interfaces/storage/db.py +9 -7
mindsdb/utilities/config.py +58 -40
mindsdb/utilities/exception.py +58 -7
mindsdb/utilities/security.py +54 -11
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -160,7 +160,7 @@ class AgentsController:
         Parameters:
             name (str): The name of the new agent
             project_name (str): The containing project
-            model_name (str): The name of the existing ML model the agent will use
+            model_name (str | dict): The name of the existing ML model the agent will use
             skills (List[Union[str, dict]]): List of existing skill names to add to the new agent, or list of dicts
                  with one of keys is "name", and other is additional parameters for relationship agent<>skill
             provider (str): The provider of the model
@@ -172,6 +172,9 @@ class AgentsController:
                 include_knowledge_bases: List of knowledge bases to include for text2sql skills
                 ignore_knowledge_bases: List of knowledge bases to ignore for text2sql skills
                 <provider>_api_key: API key for the provider (e.g., openai_api_key)
+                data: Dict, data sources for an agent, keys:
+                  - knowledge_bases: List of KBs to use (alternative to `include_knowledge_bases`)
+                  - tables: list of tables to use (alternative to `include_tables`)
         Returns:
             agent (db.Agents): The created agent
@@ -188,12 +191,17 @@ class AgentsController:
         if agent is not None:
             raise ValueError(f"Agent with name already exists: {name}")
-        if model_name is not None:
-            _, provider = self.check_model_provider(model_name, provider)
         # No need to copy params since we're not preserving the original reference
         params = params or {}
+        if isinstance(model_name, dict):
+            # move into params
+            params["model"] = model_name
+            model_name = None
+        if model_name is not None:
+            _, provider = self.check_model_provider(model_name, provider)
         if model_name is None:
             logger.warning("'model_name' param is not provided. Using default global llm model at runtime.")
@@ -230,6 +238,12 @@ class AgentsController:
         if "database" in params or need_params:
             params["database"] = database
+        if "data" in params:
+            if include_knowledge_bases is None:
+                include_knowledge_bases = params["data"].get("knowledge_bases")
+            if include_tables is None:
+                include_tables = params["data"].get("tables")
         if "knowledge_base_database" in params or include_knowledge_bases or ignore_knowledge_bases:
             params["knowledge_base_database"] = knowledge_base_database
@@ -549,13 +563,19 @@ class AgentsController:
         agent.deleted_at = datetime.datetime.now()
         db.session.commit()
-    def get_agent_llm_params(self, model_params: dict):
+    def get_agent_llm_params(self, agent_params: dict):
         """
         Get agent LLM parameters by combining default config with user provided parameters.
         Similar to how knowledge bases handle default parameters.
         """
         combined_model_params = copy.deepcopy(config.get("default_llm", {}))
+        if "model" in agent_params:
+            model_params = agent_params["model"]
+        else:
+            # params for LLM can be arbitrary
+            model_params = agent_params
         if model_params:
             combined_model_params.update(model_params)
@@ -596,9 +616,9 @@ class AgentsController:
             db.session.commit()
         # Get agent parameters and combine with default LLM parameters at runtime
-        agent_params = self.get_agent_llm_params(agent.params)
+        llm_params = self.get_agent_llm_params(agent.params)
-        lang_agent = LangchainAgent(agent, model, params=agent_params)
+        lang_agent = LangchainAgent(agent, model, llm_params=llm_params)
         return lang_agent.get_completion(messages)
     def _get_completion_stream(
@@ -636,7 +656,7 @@ class AgentsController:
             db.session.commit()
         # Get agent parameters and combine with default LLM parameters at runtime
-        agent_params = self.get_agent_llm_params(agent.params)
+        llm_params = self.get_agent_llm_params(agent.params)
-        lang_agent = LangchainAgent(agent, model=model, params=agent_params)
+        lang_agent = LangchainAgent(agent, model=model, llm_params=llm_params)
         return lang_agent.get_completion(messages, stream=True)

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -228,7 +228,7 @@ def process_chunk(chunk):
 class LangchainAgent:
-    def __init__(self, agent: db.Agents, model: dict = None, params: dict = None):
+    def __init__(self, agent: db.Agents, model: dict = None, llm_params: dict = None):
         self.agent = agent
         self.model = model
@@ -241,12 +241,12 @@ class LangchainAgent:
         self.mdb_langfuse_callback_handler: Optional[object] = None  # custom (see langfuse_callback_handler.py)
         self.langfuse_client_wrapper = LangfuseClientWrapper()
-        self.args = self._initialize_args(params)
+        self.args = self._initialize_args(llm_params)
         # Back compatibility for old models
         self.provider = self.args.get("provider", get_llm_provider(self.args))
-    def _initialize_args(self, params: dict = None) -> dict:
+    def _initialize_args(self, llm_params: dict = None) -> dict:
         """
         Initialize the arguments for agent execution.
@@ -254,14 +254,16 @@ class LangchainAgent:
         The params are already merged with defaults by AgentsController.get_agent_llm_params.
         Args:
-            params: Parameters for agent execution (already merged with defaults)
+            llm_params: Parameters for agent execution (already merged with defaults)
         Returns:
             dict: Final parameters for agent execution
         """
         # Use the parameters passed to the method (already merged with defaults by AgentsController)
         # No fallback needed as AgentsController.get_agent_llm_params already handles this
-        args = params.copy() if params else {}
+        args = self.agent.params.copy()
+        if llm_params:
+            args.update(llm_params)
         # Set model name and provider if given in create agent otherwise use global llm defaults
         # AgentsController.get_agent_llm_params

mindsdb/interfaces/agents/mcp_client_agent.py CHANGED Viewed

@@ -71,11 +71,11 @@ class MCPLangchainAgent(LangchainAgent):
         self,
         agent: db.Agents,
         model: dict = None,
-        params: dict = None,
+        llm_params: dict = None,
         mcp_host: str = "127.0.0.1",
         mcp_port: int = 47337,
     ):
-        super().__init__(agent, model, params)
+        super().__init__(agent, model, llm_params)
         self.mcp_host = mcp_host
         self.mcp_port = mcp_port
         self.exit_stack = AsyncExitStack()
@@ -251,10 +251,10 @@ def create_mcp_agent(
         raise ValueError(f"Agent {agent_name} not found in project {project_name}")
     # Get merged parameters (defaults + agent params)
-    merged_params = agent_controller.get_agent_llm_params(agent_db.params)
+    llm_params = agent_controller.get_agent_llm_params(agent_db.params)
     # Create MCP agent with merged parameters
-    mcp_agent = MCPLangchainAgent(agent_db, params=merged_params, mcp_host=mcp_host, mcp_port=mcp_port)
+    mcp_agent = MCPLangchainAgent(agent_db, llm_params=llm_params, mcp_host=mcp_host, mcp_port=mcp_port)
     # Wrap for LiteLLM compatibility
     return LiteLLMAgentWrapper(mcp_agent)

mindsdb/interfaces/agents/mindsdb_database_agent.py CHANGED Viewed

@@ -96,27 +96,7 @@ class MindsDBSQL(SQLDatabase):
             # Log the query for debugging
             logger.info(f"Executing SQL query: {command}")
-            # Removing backticks causes in query execution.
-            # remove backticks
-            # command = command.replace('`', '')
-            # Parse the SQL string to an AST object first
-            from mindsdb_sql_parser import parse_sql
-            ast_query = parse_sql(command)
-            # Now execute the parsed query
-            result = self._sql_agent.skill_tool.get_command_executor().execute_command(
-                ast_query, database_name="mindsdb"
-            )
-            # Convert ExecuteAnswer to a DataFrame for easier manipulation
-            if result.data is not None:
-                df = result.data.to_df()
-                return df.to_string(index=False)
-            else:
-                return "Query executed successfully, but returned no data."
+            return self._sql_agent.query(command)
         except Exception as e:
             logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")
@@ -127,28 +107,6 @@ class MindsDBSQL(SQLDatabase):
                 return f"Error executing knowledge base query: {str(e)}. Please check that the knowledge base exists and your query syntax is correct."
             return f"Error: {str(e)}"
-    # def run_no_throw(self, command: str, fetch: str = "all") -> str:
-    #     """Execute a SQL command and return the result as a string.
-    #
-    #     This method catches any exceptions and returns an error message instead of raising an exception.
-    #
-    #     Args:
-    #         command: The SQL command to execute
-    #         fetch: Whether to fetch 'all' results or just 'one'
-    #
-    #     Returns:
-    #         A string representation of the result or an error message
-    #     """
-    #     command = extract_essential(command)
-    #     try:
-    #         return self._sql_agent.query_safe(command)
-    #     except Exception as e:
-    #         logger.error(f"Error executing SQL command: {str(e)}")
-    #         # If this is a knowledge base query, provide a more helpful error message
-    #         if "knowledge_base" in command.lower() or any(kb in command for kb in self._sql_agent.get_usable_knowledge_base_names()):
-    #             return f"Error executing knowledge base query: {str(e)}. Please check that the knowledge base exists and your query syntax is correct."
-    #         return f"Error: {str(e)}"
     def get_usable_knowledge_base_names(self) -> List[str]:
         """Get a list of usable knowledge base names.
@@ -160,3 +118,12 @@ class MindsDBSQL(SQLDatabase):
         except Exception as e:
             logger.error(f"Error getting usable knowledge base names: {str(e)}")
             return []
+    def check_knowledge_base_permission(self, name):
+        """Get a list of usable knowledge base names.
+        Returns:
+            A list of knowledge base names that can be used in queries
+        """
+        return self._sql_agent.check_knowledge_base_permission(name)

mindsdb/interfaces/data_catalog/data_catalog_reader.py CHANGED Viewed

@@ -18,7 +18,9 @@ class DataCatalogReader(BaseDataCatalog):
         metadata_str = "Data Catalog: \n"
         if hasattr(self.data_handler, "meta_get_handler_info"):
-            metadata_str += self.data_handler.meta_get_handler_info() + "\n\n"
+            info = self.data_handler.meta_get_handler_info()
+            if info:
+                metadata_str += info + "\n\n"
         for table in tables:
             metadata_str += table.as_string() + "\n\n"

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -362,9 +362,7 @@ class Project:
                     columns = [ASSISTANT_COLUMN, USER_COLUMN]
             case "KNOWLEDGE_BASE":
-                from mindsdb.interfaces.knowledge_base.controller import KB_TO_VECTORDB_COLUMNS
-                columns = list(KB_TO_VECTORDB_COLUMNS.keys()) + ["metadata", "relevance", "distance"]
+                columns = ["id", "chunk_id", "chunk_content", "metadata", "relevance", "distance"]
             case "TABLE":
                 # like 'mindsdb.models'
                 pass

mindsdb/interfaces/functions/controller.py CHANGED Viewed

@@ -7,15 +7,15 @@ from mindsdb.utilities.config import config
 def python_to_duckdb_type(py_type):
-    if py_type == 'int':
+    if py_type == "int":
         return BIGINT
-    elif py_type == 'float':
+    elif py_type == "float":
         return DOUBLE
-    elif py_type == 'str':
+    elif py_type == "str":
         return VARCHAR
-    elif py_type == 'bool':
+    elif py_type == "bool":
         return BOOLEAN
-    elif py_type == 'bytes':
+    elif py_type == "bytes":
         return BLOB
     else:
         # Unknown
@@ -53,8 +53,8 @@ class BYOMFunctionsController:
             # first run
             self.byom_engines = []
             for name, info in self.session.integration_controller.get_all().items():
-                if info['type'] == 'ml' and info['engine'] == 'byom':
-                    if info['connection_data'].get('mode') == 'custom_function':
+                if info["type"] == "ml" and info["engine"] == "byom":
+                    if info["connection_data"].get("mode") == "custom_function":
                         self.byom_engines.append(name)
         return self.byom_engines
@@ -63,7 +63,7 @@ class BYOMFunctionsController:
             ml_handler = self.session.integration_controller.get_ml_handler(engine)
             storage = HandlerStorage(ml_handler.integration_id)
-            methods = storage.json_get('methods')
+            methods = storage.json_get("methods")
             self.byom_methods[engine] = methods
             self.byom_handlers[engine] = ml_handler
@@ -81,7 +81,7 @@ class BYOMFunctionsController:
             # do nothing
             return
-        new_name = f'{node.namespace}_{fnc_name}'
+        new_name = f"{node.namespace}_{fnc_name}"
         node.op = new_name
         if new_name in self.callbacks:
@@ -91,16 +91,13 @@ class BYOMFunctionsController:
         def callback(*args):
             return self.method_call(engine, fnc_name, args)
-        input_types = [
-            param['type']
-            for param in methods[fnc_name]['input_params']
-        ]
+        input_types = [param["type"] for param in methods[fnc_name]["input_params"]]
         meta = {
-            'name': new_name,
-            'callback': callback,
-            'input_types': input_types,
-            'output_type': methods[fnc_name]['output_type']
+            "name": new_name,
+            "callback": callback,
+            "input_types": input_types,
+            "output_type": methods[fnc_name]["output_type"],
         }
         self.callbacks[new_name] = meta
@@ -114,7 +111,6 @@ class BYOMFunctionsController:
 class FunctionController(BYOMFunctionsController):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -124,10 +120,10 @@ class FunctionController(BYOMFunctionsController):
             return meta
         # builtin functions
-        if node.op.lower() == 'llm':
+        if node.op.lower() == "llm":
             return self.llm_call_function(node)
-        elif node.op.lower() == 'to_markdown':
+        elif node.op.lower() == "to_markdown":
             return self.to_markdown_call_function(node)
     def llm_call_function(self, node):
@@ -141,70 +137,74 @@ class FunctionController(BYOMFunctionsController):
         try:
             from langchain_core.messages import HumanMessage
             from mindsdb.interfaces.agents.langchain_agent import create_chat_model
             llm = create_chat_model(chat_model_params)
         except Exception as e:
-            raise RuntimeError(f'Unable to use LLM function, check ENV variables: {e}')
+            raise RuntimeError(f"Unable to use LLM function, check ENV variables: {e}")
         def callback(question):
             resp = llm([HumanMessage(question)])
             return resp.content
-        meta = {
-            'name': name,
-            'callback': callback,
-            'input_types': ['str'],
-            'output_type': 'str'
-        }
+        meta = {"name": name, "callback": callback, "input_types": ["str"], "output_type": "str"}
         self.callbacks[name] = meta
         return meta
     def to_markdown_call_function(self, node):
         # load on-demand because lib is heavy
         from mindsdb.interfaces.functions.to_markdown import ToMarkdown
         name = node.op.lower()
         if name in self.callbacks:
             return self.callbacks[name]
-        def callback(file_path_or_url):
-            chat_model_params = self._parse_chat_model_params('TO_MARKDOWN_FUNCTION_')
+        def prepare_chat_model_params(chat_model_params: dict) -> dict:
+            """
+            Parepares the chat model parameters for the ToMarkdown function.
+            """
             params_copy = copy.deepcopy(chat_model_params)
-            params_copy['model'] = params_copy.pop('model_name')
-            params_copy.pop('api_keys')
-            params_copy.pop('provider')
+            params_copy["model"] = params_copy.pop("model_name")
+            # Set the base_url for the Google provider.
+            if params_copy["provider"] == "google" and "base_url" not in params_copy:
+                params_copy["base_url"] = "https://generativelanguage.googleapis.com/v1beta/"
+            params_copy.pop("api_keys")
+            params_copy.pop("provider")
+            return params_copy
+        def callback(file_path_or_url):
+            chat_model_params = self._parse_chat_model_params("TO_MARKDOWN_FUNCTION_")
+            chat_model_params = prepare_chat_model_params(chat_model_params)
             to_markdown = ToMarkdown()
-            return to_markdown.call(file_path_or_url, **params_copy)
+            return to_markdown.call(file_path_or_url, **chat_model_params)
-        meta = {
-            'name': name,
-            'callback': callback,
-            'input_types': ['str'],
-            'output_type': 'str'
-        }
+        meta = {"name": name, "callback": callback, "input_types": ["str"], "output_type": "str"}
         self.callbacks[name] = meta
         return meta
-    def _parse_chat_model_params(self, param_prefix: str = 'LLM_FUNCTION_'):
+    def _parse_chat_model_params(self, param_prefix: str = "LLM_FUNCTION_"):
         """
         Parses the environment variables for chat model parameters.
         """
         chat_model_params = config.get("default_llm") or {}
         for k, v in os.environ.items():
             if k.startswith(param_prefix):
-                param_name = k[len(param_prefix):]
-                if param_name == 'MODEL':
-                    chat_model_params['model_name'] = v
+                param_name = k[len(param_prefix) :]
+                if param_name == "MODEL":
+                    chat_model_params["model_name"] = v
                 else:
                     chat_model_params[param_name.lower()] = v
-        if 'provider' not in chat_model_params:
-            chat_model_params['provider'] = 'openai'
+        if "provider" not in chat_model_params:
+            chat_model_params["provider"] = "openai"
-        if 'api_key' in chat_model_params:
+        if "api_key" in chat_model_params:
             # move to api_keys dict
-            chat_model_params["api_keys"] = {chat_model_params['provider']: chat_model_params['api_key']}
+            chat_model_params["api_keys"] = {chat_model_params["provider"]: chat_model_params["api_key"]}
         return chat_model_params
@@ -215,33 +215,23 @@ class DuckDBFunctions:
         self.functions = {}
     def check_function(self, node):
         meta = self.controller.check_function(node)
         if meta is None:
             return
-        name = meta['name']
+        name = meta["name"]
         if name in self.functions:
             return
-        input_types = [
-            python_to_duckdb_type(param)
-            for param in meta['input_types']
-        ]
+        input_types = [python_to_duckdb_type(param) for param in meta["input_types"]]
         self.functions[name] = {
-            'callback': function_maker(len(input_types), meta['callback']),
-            'input': input_types,
-            'output': python_to_duckdb_type(meta['output_type'])
+            "callback": function_maker(len(input_types), meta["callback"]),
+            "input": input_types,
+            "output": python_to_duckdb_type(meta["output_type"]),
         }
     def register(self, connection):
         for name, info in self.functions.items():
-            connection.create_function(
-                name,
-                info['callback'],
-                info['input'],
-                info['output'],
-                null_handling="special"
-            )
+            connection.create_function(name, info["callback"], info["input"], info["output"], null_handling="special")

mindsdb/interfaces/functions/to_markdown.py CHANGED Viewed

@@ -2,6 +2,7 @@ from io import BytesIO
 import os
 from typing import Union
 from urllib.parse import urlparse
+import xml.etree.ElementTree as ET
 from aipdf import ocr
 import mimetypes
@@ -12,6 +13,7 @@ class ToMarkdown:
     """
     Extracts the content of documents of various formats in markdown format.
     """
     def __init__(self):
         """
         Initializes the ToMarkdown class.
@@ -24,24 +26,28 @@ class ToMarkdown:
         file_extension = self._get_file_extension(file_path_or_url)
         file_content = self._get_file_content(file_path_or_url)
-        if file_extension == '.pdf':
+        if file_extension == ".pdf":
             return self._pdf_to_markdown(file_content, **kwargs)
+        elif file_extension in (".xml", ".nessus"):
+            return self._xml_to_markdown(file_content, **kwargs)
         else:
             raise ValueError(f"Unsupported file type: {file_extension}.")
-    def _get_file_content(self, file_path_or_url: str) -> str:
+    def _get_file_content(self, file_path_or_url: str) -> BytesIO:
         """
         Retrieves the content of a file.
         """
         parsed_url = urlparse(file_path_or_url)
-        if parsed_url.scheme in ('http', 'https'):
+        if parsed_url.scheme in ("http", "https"):
             response = requests.get(file_path_or_url)
             if response.status_code == 200:
-                return response
+                return BytesIO(response.content)
             else:
-                raise RuntimeError(f'Unable to retrieve file from URL: {file_path_or_url}')
+                raise RuntimeError(f"Unable to retrieve file from URL: {file_path_or_url}")
         else:
-            with open(file_path_or_url, 'rb') as file:
+            with open(file_path_or_url, "rb") as file:
                 return BytesIO(file.read())
     def _get_file_extension(self, file_path_or_url: str) -> str:
@@ -49,13 +55,13 @@ class ToMarkdown:
         Retrieves the file extension from a file path or URL.
         """
         parsed_url = urlparse(file_path_or_url)
-        if parsed_url.scheme in ('http', 'https'):
+        if parsed_url.scheme in ("http", "https"):
             try:
                 # Make a HEAD request to get headers without downloading the file.
                 response = requests.head(file_path_or_url, allow_redirects=True)
-                content_type = response.headers.get('Content-Type', '')
+                content_type = response.headers.get("Content-Type", "")
                 if content_type:
-                    ext = mimetypes.guess_extension(content_type.split(';')[0].strip())
+                    ext = mimetypes.guess_extension(content_type.split(";")[0].strip())
                     if ext:
                         return ext
@@ -64,16 +70,43 @@ class ToMarkdown:
                 if ext:
                     return ext
             except requests.RequestException:
-                raise RuntimeError(f'Unable to retrieve file extension from URL: {file_path_or_url}')
+                raise RuntimeError(f"Unable to retrieve file extension from URL: {file_path_or_url}")
         else:
             return os.path.splitext(file_path_or_url)[1]
-    def _pdf_to_markdown(self, file_content: Union[requests.Response, bytes], **kwargs) -> str:
+    def _pdf_to_markdown(self, file_content: Union[requests.Response, BytesIO], **kwargs) -> str:
         """
         Converts a PDF file to markdown.
         """
-        if isinstance(file_content, requests.Response):
-            file_content = BytesIO(file_content.content)
         markdown_pages = ocr(file_content, **kwargs)
         return "\n\n---\n\n".join(markdown_pages)
+    def _xml_to_markdown(self, file_content: Union[requests.Response, BytesIO], **kwargs) -> str:
+        """
+        Converts an XML (or Nessus) file to markdown.
+        """
+        def parse_element(element: ET.Element, depth: int = 0) -> str:
+            """
+            Recursively parses an XML element and converts it to markdown.
+            """
+            markdown = []
+            heading = "#" * (depth + 1)
+            markdown.append(f"{heading} {element.tag}")
+            for key, val in element.attrib.items():
+                markdown.append(f"- **{key}**: {val}")
+            text = (element.text or "").strip()
+            if text:
+                markdown.append(f"\n{text}\n")
+            for child in element:
+                markdown.append(parse_element(child, depth + 1))
+            return "\n".join(markdown)
+        root = ET.fromstring(file_content.read().decode("utf-8"))
+        markdown_content = parse_element(root)
+        return markdown_content

MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl