PyPI - MindsDB - Versions diffs - 25.7.1.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl - Mend

MindsDB 25.7.1.0py3-none-any.whl → 25.7.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (27) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +53 -94
mindsdb/api/a2a/agent.py +30 -206
mindsdb/api/a2a/common/server/server.py +26 -27
mindsdb/api/a2a/task_manager.py +93 -227
mindsdb/api/a2a/utils.py +21 -0
mindsdb/api/executor/utilities/sql.py +97 -21
mindsdb/api/http/namespaces/agents.py +126 -201
mindsdb/api/http/namespaces/config.py +12 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +94 -1
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
mindsdb/integrations/libs/keyword_search_base.py +41 -0
mindsdb/integrations/libs/vectordatabase_handler.py +35 -14
mindsdb/integrations/utilities/sql_utils.py +11 -0
mindsdb/interfaces/database/projects.py +1 -3
mindsdb/interfaces/functions/controller.py +54 -64
mindsdb/interfaces/functions/to_markdown.py +47 -14
mindsdb/interfaces/knowledge_base/controller.py +127 -35
mindsdb/interfaces/knowledge_base/evaluate.py +2 -2
mindsdb/utilities/config.py +46 -39
mindsdb/utilities/exception.py +11 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +244 -244
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +27 -25
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.7.1.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py CHANGED Viewed

@@ -18,6 +18,8 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
     DistanceFunction,
     TableField,
 )
+from mindsdb.integrations.libs.keyword_search_base import KeywordSearchBase
+from mindsdb.integrations.utilities.sql_utils import KeywordSearchArgs
 from mindsdb.utilities import log
 from mindsdb.utilities.profiler import profiler
 from mindsdb.utilities.context import context as ctx
@@ -26,7 +28,7 @@ logger = log.getLogger(__name__)
 # todo Issue #7316 add support for different indexes and search algorithms e.g. cosine similarity or L2 norm
-class PgVectorHandler(PostgresHandler, VectorStoreHandler):
+class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
     """This handler handles connection and execution of the PostgreSQL with pgvector extension statements."""
     name = "pgvector"
@@ -228,6 +230,40 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
         else:
             return ""
+    @staticmethod
+    def _construct_where_clause_with_keywords(filter_conditions=None, keyword_query=None, content_column_name=None):
+        if not keyword_query or not content_column_name:
+            return PgVectorHandler._construct_where_clause(filter_conditions)
+        keyword_query_condition = (
+            f"""to_tsvector('english', {content_column_name}) @@ websearch_to_tsquery('english', '{keyword_query}')"""
+        )
+        if filter_conditions is None:
+            return ""
+        where_clauses = []
+        for item in filter_conditions:
+            key = item["name"]
+            if item["op"].lower() in ("in", "not in"):
+                values = list(repr(i) for i in item["value"])
+                item["value"] = "({})".format(", ".join(values))
+            else:
+                if item["value"] is None:
+                    item["value"] = "null"
+                else:
+                    item["value"] = repr(item["value"])
+            where_clauses.append(f"{key} {item['op']} {item['value']}")
+        where_clauses.append(keyword_query_condition)
+        if len(where_clauses) > 1:
+            return f"WHERE {' AND '.join(where_clauses)}"
+        elif len(where_clauses) == 1:
+            return f"WHERE {where_clauses[0]}"
+        else:
+            return ""
     @staticmethod
     def _construct_full_after_from_clause(
         where_clause: str,
@@ -236,6 +272,36 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
     ) -> str:
         return f"{where_clause} {offset_clause} {limit_clause}"
+    def _build_keyword_bm25_query(
+        self,
+        table_name: str,
+        query: str,
+        columns: List[str] = None,
+        content_column_name: str = "content",
+        conditions: List[FilterCondition] = None,
+        limit: int = None,
+        offset: int = None,
+    ):
+        if columns is None:
+            columns = ["id", "content", "metadata"]
+        filter_conditions, _ = self._translate_conditions(conditions)
+        # given filter conditions, construct where clause
+        where_clause = self._construct_where_clause_with_keywords(filter_conditions, query, content_column_name)
+        query = f"""
+            SELECT
+                {", ".join(columns)},
+                ts_rank_cd(to_tsvector('english', {content_column_name}), websearch_to_tsquery('english', '{query}')) as distance
+            FROM
+                {table_name}
+            {where_clause if where_clause else ""}
+            {f"LIMIT {limit}" if limit else ""}
+            {f"OFFSET {offset}" if offset else ""};"""
+        return query
     def _build_select_query(
         self,
         table_name: str,
@@ -320,6 +386,33 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler):
             columns = ["id", "content", "embeddings", "metadata"]
         query = self._build_select_query(table_name, columns, conditions, limit, offset)
+        result = self.raw_query(query)
+        # ensure embeddings are returned as string so they can be parsed by mindsdb
+        if "embeddings" in columns:
+            result["embeddings"] = result["embeddings"].astype(str)
+        return result
+    def keyword_select(
+        self,
+        table_name: str,
+        columns: List[str] = None,
+        conditions: List[FilterCondition] = None,
+        offset: int = None,
+        limit: int = None,
+        keyword_search_args: KeywordSearchArgs = None,
+    ) -> pd.DataFrame:
+        table_name = self._check_table(table_name)
+        if columns is None:
+            columns = ["id", "content", "embeddings", "metadata"]
+        content_column_name = keyword_search_args.column
+        query = self._build_keyword_bm25_query(
+            table_name, keyword_search_args.query, columns, content_column_name, conditions, limit, offset
+        )
         result = self.raw_query(query)
         # ensure embeddings are returned as string so they can be parsed by mindsdb

mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py CHANGED Viewed

@@ -271,10 +271,11 @@ class SalesforceHandler(MetaAPIHandler):
         # Retrieve the metadata for all Salesforce resources.
         main_metadata = connection.sobjects.describe()
         if table_names:
             # Filter the metadata for the specified tables.
-            main_metadata = [resource for resource in main_metadata["sobjects"] if resource["name"] in table_names]
+            main_metadata = [
+                resource for resource in main_metadata["sobjects"] if resource["name"].lower() in table_names
+            ]
         else:
             main_metadata = main_metadata["sobjects"]

mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py CHANGED Viewed

@@ -165,7 +165,7 @@ def create_table_class(resource_name: Text) -> MetaAPIResource:
             client = self.handler.connect()
             resource_metadata = next(
-                (resource for resource in main_metadata if resource["name"] == resource_name),
+                (resource for resource in main_metadata if resource["name"].lower() == resource_name),
             )
             # Get row count if Id column is aggregatable.

mindsdb/integrations/libs/keyword_search_base.py ADDED Viewed

@@ -0,0 +1,41 @@
+from mindsdb_sql_parser.ast import Select
+from typing import List
+import pandas as pd
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, KeywordSearchArgs
+class KeywordSearchBase:
+    """
+    Base class for keyword search integrations.
+    This class provides a common interface for keyword search functionality.
+    """
+    def __init__(self, *args, **kwargs):
+        pass
+    def dispatch_keyword_select(
+        self, query: Select, conditions: List[FilterCondition] = None, keyword_search_args: KeywordSearchArgs = None
+    ):
+        """Dispatches a keyword search select query to the appropriate method."""
+        raise NotImplementedError()
+    def keyword_select(
+        self,
+        table_name: str,
+        columns: List[str] = None,
+        conditions: List[FilterCondition] = None,
+        offset: int = None,
+        limit: int = None,
+    ) -> pd.DataFrame:
+        """Select data from table
+        Args:
+            table_name (str): table name
+            columns (List[str]): columns to select
+            conditions (List[FilterCondition]): conditions to select
+        Returns:
+            HandlerResponse
+        """
+        raise NotImplementedError()

mindsdb/integrations/libs/vectordatabase_handler.py CHANGED Viewed

@@ -21,7 +21,7 @@ from mindsdb_sql_parser.ast.base import ASTNode
 from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
 from mindsdb.utilities import log
-from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
 from mindsdb.integrations.utilities.query_traversal import query_traversal
 from .base import BaseHandler
@@ -372,44 +372,65 @@ class VectorStoreHandler(BaseHandler):
         return self.delete(table_name, conditions=conditions)
     def dispatch_select(
-        self, query: Select, conditions: List[FilterCondition] = None, allowed_metadata_columns: List[str] = None
+        self,
+        query: Select,
+        conditions: Optional[List[FilterCondition]] = None,
+        allowed_metadata_columns: List[str] = None,
+        keyword_search_args: Optional[KeywordSearchArgs] = None,
     ):
         """
-        Dispatch select query to the appropriate method.
+        Dispatches a select query to the appropriate method, handling both
+        standard selections and keyword searches based on the provided arguments.
         """
-        # parse key arguments
+        # 1. Parse common query arguments
         table_name = query.from_table.parts[-1]
-        # if targets are star, select all columns
+        # If targets are a star (*), select all schema columns
         if isinstance(query.targets[0], Star):
             columns = [col["name"] for col in self.SCHEMA]
         else:
             columns = [col.parts[-1] for col in query.targets]
+        # 2. Validate columns
         if not self._is_columns_allowed(columns):
-            raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}")
+            allowed_cols = [col["name"] for col in self.SCHEMA]
+            raise Exception(f"Columns {columns} not allowed. Allowed columns are {allowed_cols}")
-        # check if columns are allowed
+        # 3. Extract and process conditions
         if conditions is None:
             where_statement = query.where
             conditions = self.extract_conditions(where_statement)
         self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns)
-        # get offset and limit
+        # 4. Get offset and limit
         offset = query.offset.value if query.offset is not None else None
         limit = query.limit.value if query.limit is not None else None
-        # dispatch select
-        try:
-            return self.select(
+        # 5. Conditionally dispatch to the correct select method
+        if keyword_search_args:
+            # It's a keyword search
+            return self.keyword_select(
                 table_name,
                 columns=columns,
                 conditions=conditions,
                 offset=offset,
                 limit=limit,
+                keyword_search_args=keyword_search_args,
             )
-        except Exception as e:
-            handler_engine = self.__class__.name
-            raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
+        else:
+            # It's a standard select
+            try:
+                return self.select(
+                    table_name,
+                    columns=columns,
+                    conditions=conditions,
+                    offset=offset,
+                    limit=limit,
+                )
+            except Exception as e:
+                handler_engine = self.__class__.name
+                raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
     def _dispatch(self, query: ASTNode) -> HandlerResponse:
         """

mindsdb/integrations/utilities/sql_utils.py CHANGED Viewed

@@ -60,6 +60,17 @@ class FilterCondition:
         """
+class KeywordSearchArgs:
+    def __init__(self, column: str, query: str):
+        """
+        Args:
+            column: The column to search in.
+            query: The search query string.
+        """
+        self.column = column
+        self.query = query
 class SortColumn:
     def __init__(self, column: str, ascending: bool = True):
         self.column = column

mindsdb/interfaces/database/projects.py CHANGED Viewed

@@ -362,9 +362,7 @@ class Project:
                     columns = [ASSISTANT_COLUMN, USER_COLUMN]
             case "KNOWLEDGE_BASE":
-                from mindsdb.interfaces.knowledge_base.controller import KB_TO_VECTORDB_COLUMNS
-                columns = list(KB_TO_VECTORDB_COLUMNS.keys()) + ["metadata", "relevance", "distance"]
+                columns = ["id", "chunk_id", "chunk_content", "metadata", "relevance", "distance"]
             case "TABLE":
                 # like 'mindsdb.models'
                 pass

mindsdb/interfaces/functions/controller.py CHANGED Viewed

@@ -7,15 +7,15 @@ from mindsdb.utilities.config import config
 def python_to_duckdb_type(py_type):
-    if py_type == 'int':
+    if py_type == "int":
         return BIGINT
-    elif py_type == 'float':
+    elif py_type == "float":
         return DOUBLE
-    elif py_type == 'str':
+    elif py_type == "str":
         return VARCHAR
-    elif py_type == 'bool':
+    elif py_type == "bool":
         return BOOLEAN
-    elif py_type == 'bytes':
+    elif py_type == "bytes":
         return BLOB
     else:
         # Unknown
@@ -53,8 +53,8 @@ class BYOMFunctionsController:
             # first run
             self.byom_engines = []
             for name, info in self.session.integration_controller.get_all().items():
-                if info['type'] == 'ml' and info['engine'] == 'byom':
-                    if info['connection_data'].get('mode') == 'custom_function':
+                if info["type"] == "ml" and info["engine"] == "byom":
+                    if info["connection_data"].get("mode") == "custom_function":
                         self.byom_engines.append(name)
         return self.byom_engines
@@ -63,7 +63,7 @@ class BYOMFunctionsController:
             ml_handler = self.session.integration_controller.get_ml_handler(engine)
             storage = HandlerStorage(ml_handler.integration_id)
-            methods = storage.json_get('methods')
+            methods = storage.json_get("methods")
             self.byom_methods[engine] = methods
             self.byom_handlers[engine] = ml_handler
@@ -81,7 +81,7 @@ class BYOMFunctionsController:
             # do nothing
             return
-        new_name = f'{node.namespace}_{fnc_name}'
+        new_name = f"{node.namespace}_{fnc_name}"
         node.op = new_name
         if new_name in self.callbacks:
@@ -91,16 +91,13 @@ class BYOMFunctionsController:
         def callback(*args):
             return self.method_call(engine, fnc_name, args)
-        input_types = [
-            param['type']
-            for param in methods[fnc_name]['input_params']
-        ]
+        input_types = [param["type"] for param in methods[fnc_name]["input_params"]]
         meta = {
-            'name': new_name,
-            'callback': callback,
-            'input_types': input_types,
-            'output_type': methods[fnc_name]['output_type']
+            "name": new_name,
+            "callback": callback,
+            "input_types": input_types,
+            "output_type": methods[fnc_name]["output_type"],
         }
         self.callbacks[new_name] = meta
@@ -114,7 +111,6 @@ class BYOMFunctionsController:
 class FunctionController(BYOMFunctionsController):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -124,10 +120,10 @@ class FunctionController(BYOMFunctionsController):
             return meta
         # builtin functions
-        if node.op.lower() == 'llm':
+        if node.op.lower() == "llm":
             return self.llm_call_function(node)
-        elif node.op.lower() == 'to_markdown':
+        elif node.op.lower() == "to_markdown":
             return self.to_markdown_call_function(node)
     def llm_call_function(self, node):
@@ -141,70 +137,74 @@ class FunctionController(BYOMFunctionsController):
         try:
             from langchain_core.messages import HumanMessage
             from mindsdb.interfaces.agents.langchain_agent import create_chat_model
             llm = create_chat_model(chat_model_params)
         except Exception as e:
-            raise RuntimeError(f'Unable to use LLM function, check ENV variables: {e}')
+            raise RuntimeError(f"Unable to use LLM function, check ENV variables: {e}")
         def callback(question):
             resp = llm([HumanMessage(question)])
             return resp.content
-        meta = {
-            'name': name,
-            'callback': callback,
-            'input_types': ['str'],
-            'output_type': 'str'
-        }
+        meta = {"name": name, "callback": callback, "input_types": ["str"], "output_type": "str"}
         self.callbacks[name] = meta
         return meta
     def to_markdown_call_function(self, node):
         # load on-demand because lib is heavy
         from mindsdb.interfaces.functions.to_markdown import ToMarkdown
         name = node.op.lower()
         if name in self.callbacks:
             return self.callbacks[name]
-        def callback(file_path_or_url):
-            chat_model_params = self._parse_chat_model_params('TO_MARKDOWN_FUNCTION_')
+        def prepare_chat_model_params(chat_model_params: dict) -> dict:
+            """
+            Parepares the chat model parameters for the ToMarkdown function.
+            """
             params_copy = copy.deepcopy(chat_model_params)
-            params_copy['model'] = params_copy.pop('model_name')
-            params_copy.pop('api_keys')
-            params_copy.pop('provider')
+            params_copy["model"] = params_copy.pop("model_name")
+            # Set the base_url for the Google provider.
+            if params_copy["provider"] == "google" and "base_url" not in params_copy:
+                params_copy["base_url"] = "https://generativelanguage.googleapis.com/v1beta/"
+            params_copy.pop("api_keys")
+            params_copy.pop("provider")
+            return params_copy
+        def callback(file_path_or_url):
+            chat_model_params = self._parse_chat_model_params("TO_MARKDOWN_FUNCTION_")
+            chat_model_params = prepare_chat_model_params(chat_model_params)
             to_markdown = ToMarkdown()
-            return to_markdown.call(file_path_or_url, **params_copy)
+            return to_markdown.call(file_path_or_url, **chat_model_params)
-        meta = {
-            'name': name,
-            'callback': callback,
-            'input_types': ['str'],
-            'output_type': 'str'
-        }
+        meta = {"name": name, "callback": callback, "input_types": ["str"], "output_type": "str"}
         self.callbacks[name] = meta
         return meta
-    def _parse_chat_model_params(self, param_prefix: str = 'LLM_FUNCTION_'):
+    def _parse_chat_model_params(self, param_prefix: str = "LLM_FUNCTION_"):
         """
         Parses the environment variables for chat model parameters.
         """
         chat_model_params = config.get("default_llm") or {}
         for k, v in os.environ.items():
             if k.startswith(param_prefix):
-                param_name = k[len(param_prefix):]
-                if param_name == 'MODEL':
-                    chat_model_params['model_name'] = v
+                param_name = k[len(param_prefix) :]
+                if param_name == "MODEL":
+                    chat_model_params["model_name"] = v
                 else:
                     chat_model_params[param_name.lower()] = v
-        if 'provider' not in chat_model_params:
-            chat_model_params['provider'] = 'openai'
+        if "provider" not in chat_model_params:
+            chat_model_params["provider"] = "openai"
-        if 'api_key' in chat_model_params:
+        if "api_key" in chat_model_params:
             # move to api_keys dict
-            chat_model_params["api_keys"] = {chat_model_params['provider']: chat_model_params['api_key']}
+            chat_model_params["api_keys"] = {chat_model_params["provider"]: chat_model_params["api_key"]}
         return chat_model_params
@@ -215,33 +215,23 @@ class DuckDBFunctions:
         self.functions = {}
     def check_function(self, node):
         meta = self.controller.check_function(node)
         if meta is None:
             return
-        name = meta['name']
+        name = meta["name"]
         if name in self.functions:
             return
-        input_types = [
-            python_to_duckdb_type(param)
-            for param in meta['input_types']
-        ]
+        input_types = [python_to_duckdb_type(param) for param in meta["input_types"]]
         self.functions[name] = {
-            'callback': function_maker(len(input_types), meta['callback']),
-            'input': input_types,
-            'output': python_to_duckdb_type(meta['output_type'])
+            "callback": function_maker(len(input_types), meta["callback"]),
+            "input": input_types,
+            "output": python_to_duckdb_type(meta["output_type"]),
         }
     def register(self, connection):
         for name, info in self.functions.items():
-            connection.create_function(
-                name,
-                info['callback'],
-                info['input'],
-                info['output'],
-                null_handling="special"
-            )
+            connection.create_function(name, info["callback"], info["input"], info["output"], null_handling="special")

mindsdb/interfaces/functions/to_markdown.py CHANGED Viewed

@@ -2,6 +2,7 @@ from io import BytesIO
 import os
 from typing import Union
 from urllib.parse import urlparse
+import xml.etree.ElementTree as ET
 from aipdf import ocr
 import mimetypes
@@ -12,6 +13,7 @@ class ToMarkdown:
     """
     Extracts the content of documents of various formats in markdown format.
     """
     def __init__(self):
         """
         Initializes the ToMarkdown class.
@@ -24,24 +26,28 @@ class ToMarkdown:
         file_extension = self._get_file_extension(file_path_or_url)
         file_content = self._get_file_content(file_path_or_url)
-        if file_extension == '.pdf':
+        if file_extension == ".pdf":
             return self._pdf_to_markdown(file_content, **kwargs)
+        elif file_extension in (".xml", ".nessus"):
+            return self._xml_to_markdown(file_content, **kwargs)
         else:
             raise ValueError(f"Unsupported file type: {file_extension}.")
-    def _get_file_content(self, file_path_or_url: str) -> str:
+    def _get_file_content(self, file_path_or_url: str) -> BytesIO:
         """
         Retrieves the content of a file.
         """
         parsed_url = urlparse(file_path_or_url)
-        if parsed_url.scheme in ('http', 'https'):
+        if parsed_url.scheme in ("http", "https"):
             response = requests.get(file_path_or_url)
             if response.status_code == 200:
-                return response
+                return BytesIO(response.content)
             else:
-                raise RuntimeError(f'Unable to retrieve file from URL: {file_path_or_url}')
+                raise RuntimeError(f"Unable to retrieve file from URL: {file_path_or_url}")
         else:
-            with open(file_path_or_url, 'rb') as file:
+            with open(file_path_or_url, "rb") as file:
                 return BytesIO(file.read())
     def _get_file_extension(self, file_path_or_url: str) -> str:
@@ -49,13 +55,13 @@ class ToMarkdown:
         Retrieves the file extension from a file path or URL.
         """
         parsed_url = urlparse(file_path_or_url)
-        if parsed_url.scheme in ('http', 'https'):
+        if parsed_url.scheme in ("http", "https"):
             try:
                 # Make a HEAD request to get headers without downloading the file.
                 response = requests.head(file_path_or_url, allow_redirects=True)
-                content_type = response.headers.get('Content-Type', '')
+                content_type = response.headers.get("Content-Type", "")
                 if content_type:
-                    ext = mimetypes.guess_extension(content_type.split(';')[0].strip())
+                    ext = mimetypes.guess_extension(content_type.split(";")[0].strip())
                     if ext:
                         return ext
@@ -64,16 +70,43 @@ class ToMarkdown:
                 if ext:
                     return ext
             except requests.RequestException:
-                raise RuntimeError(f'Unable to retrieve file extension from URL: {file_path_or_url}')
+                raise RuntimeError(f"Unable to retrieve file extension from URL: {file_path_or_url}")
         else:
             return os.path.splitext(file_path_or_url)[1]
-    def _pdf_to_markdown(self, file_content: Union[requests.Response, bytes], **kwargs) -> str:
+    def _pdf_to_markdown(self, file_content: Union[requests.Response, BytesIO], **kwargs) -> str:
         """
         Converts a PDF file to markdown.
         """
-        if isinstance(file_content, requests.Response):
-            file_content = BytesIO(file_content.content)
         markdown_pages = ocr(file_content, **kwargs)
         return "\n\n---\n\n".join(markdown_pages)
+    def _xml_to_markdown(self, file_content: Union[requests.Response, BytesIO], **kwargs) -> str:
+        """
+        Converts an XML (or Nessus) file to markdown.
+        """
+        def parse_element(element: ET.Element, depth: int = 0) -> str:
+            """
+            Recursively parses an XML element and converts it to markdown.
+            """
+            markdown = []
+            heading = "#" * (depth + 1)
+            markdown.append(f"{heading} {element.tag}")
+            for key, val in element.attrib.items():
+                markdown.append(f"- **{key}**: {val}")
+            text = (element.text or "").strip()
+            if text:
+                markdown.append(f"\n{text}\n")
+            for child in element:
+                markdown.append(parse_element(child, depth + 1))
+            return "\n".join(markdown)
+        root = ET.fromstring(file_content.read().decode("utf-8"))
+        markdown_content = parse_element(root)
+        return markdown_content

MindsDB 25.7.1.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.7.1.0py3-none-any.whl → 25.7.2.0py3-none-any.whl